{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 14.951881014873141, "eval_steps": 500, "global_step": 1600, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0189732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 1947.0, "completions/mean_length": 597.8717041015625, "completions/mean_terminated_length": 530.21728515625, "completions/min_length": 22.0, "completions/min_terminated_length": 22.0, "epoch": 0.009332166812481774, "grad_norm": 0.1767578125, "learning_rate": 1e-06, "loss": -0.0217, "num_tokens": 552565.0, "reward": 0.5267857313156128, "reward_std": 0.2720065116882324, "rewards/verify_math_reward/mean": 0.5267857313156128, "rewards/verify_math_reward/std": 0.4995608329772949, "step": 1 }, { "clip_ratio/high_max": 0.0014833615286988788, "clip_ratio/high_mean": 0.00045727290410013666, "clip_ratio/low_mean": 0.000256149965196073, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007134228812901711, "completions/clipped_ratio": 0.0078125, "completions/max_length": 4096.0, "completions/max_terminated_length": 3467.0, "completions/mean_length": 577.5647583007812, "completions/mean_terminated_length": 549.8605346679688, "completions/min_length": 102.0, "completions/min_terminated_length": 102.0, "epoch": 0.018664333624963548, "grad_norm": 0.1484375, "learning_rate": 1e-06, "loss": 0.0008, "num_tokens": 1128647.0, "reward": 0.4732142984867096, "reward_std": 0.2450285404920578, "rewards/verify_math_reward/mean": 0.4732142984867096, "rewards/verify_math_reward/std": 0.4995608925819397, "step": 2 }, { "clip_ratio/high_max": 0.001585116649948759, "clip_ratio/high_mean": 0.0005085156053610262, "clip_ratio/low_mean": 0.0002956197542971495, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.000804135374892212, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 2789.0, "completions/mean_length": 587.3873291015625, "completions/mean_terminated_length": 543.7774047851562, "completions/min_length": 42.0, "completions/min_terminated_length": 42.0, "epoch": 0.02799650043744532, "grad_norm": 0.1572265625, "learning_rate": 1e-06, "loss": -0.0058, "num_tokens": 1708434.0, "reward": 0.4988839626312256, "reward_std": 0.23973384499549866, "rewards/verify_math_reward/mean": 0.4988839328289032, "rewards/verify_math_reward/std": 0.5002779960632324, "step": 3 }, { "clip_ratio/high_max": 0.0016634631665510824, "clip_ratio/high_mean": 0.0005582791625329264, "clip_ratio/low_mean": 0.000263425162643216, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008217043141485192, "completions/clipped_ratio": 0.006696428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 2717.0, "completions/mean_length": 557.1004638671875, "completions/mean_terminated_length": 533.24267578125, "completions/min_length": 14.0, "completions/min_terminated_length": 14.0, "epoch": 0.037328667249927096, "grad_norm": 0.171875, "learning_rate": 1e-06, "loss": -0.0144, "num_tokens": 2262452.0, "reward": 0.5424107313156128, "reward_std": 0.25761085748672485, "rewards/verify_math_reward/mean": 0.5424107313156128, "rewards/verify_math_reward/std": 0.4984763264656067, "step": 4 }, { "clip_ratio/high_max": 0.001812106933357427, "clip_ratio/high_mean": 0.0006231880311133864, "clip_ratio/low_mean": 0.00030001021900716296, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009231982385244919, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 2825.0, "completions/mean_length": 584.5267944335938, "completions/mean_terminated_length": 540.88134765625, "completions/min_length": 19.0, "completions/min_terminated_length": 19.0, "epoch": 0.04666083406240887, "grad_norm": 0.171875, "learning_rate": 1e-06, "loss": -0.0199, "num_tokens": 2824700.0, "reward": 0.494419664144516, "reward_std": 0.27577054500579834, "rewards/verify_math_reward/mean": 0.4944196343421936, "rewards/verify_math_reward/std": 0.5002480745315552, "step": 5 }, { "clip_ratio/high_max": 0.0014002122825331753, "clip_ratio/high_mean": 0.0004531991592102713, "clip_ratio/low_mean": 0.00042794430373760406, "clip_ratio/low_min": 1.1819212886621244e-05, "clip_ratio/region_mean": 0.0008811434686322173, "completions/clipped_ratio": 0.022321428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 2857.0, "completions/mean_length": 638.2701416015625, "completions/mean_terminated_length": 559.3264770507812, "completions/min_length": 24.0, "completions/min_terminated_length": 24.0, "epoch": 0.05599300087489064, "grad_norm": 0.15234375, "learning_rate": 1e-06, "loss": 0.0018, "num_tokens": 3405094.0, "reward": 0.494419664144516, "reward_std": 0.266407310962677, "rewards/verify_math_reward/mean": 0.4944196343421936, "rewards/verify_math_reward/std": 0.5002480745315552, "step": 6 }, { "clip_ratio/high_max": 0.001297875107411528, "clip_ratio/high_mean": 0.00038535677481377206, "clip_ratio/low_mean": 0.0003542163467500359, "clip_ratio/low_min": 1.504573901911499e-05, "clip_ratio/region_mean": 0.000739573106784519, "completions/clipped_ratio": 0.008928571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3607.0, "completions/mean_length": 546.1830444335938, "completions/mean_terminated_length": 514.2026977539062, "completions/min_length": 46.0, "completions/min_terminated_length": 46.0, "epoch": 0.06532516768737241, "grad_norm": 0.1748046875, "learning_rate": 1e-06, "loss": 0.0048, "num_tokens": 3960162.0, "reward": 0.5267857313156128, "reward_std": 0.2519356906414032, "rewards/verify_math_reward/mean": 0.5267857313156128, "rewards/verify_math_reward/std": 0.4995608627796173, "step": 7 }, { "clip_ratio/high_max": 0.0013917427995693288, "clip_ratio/high_mean": 0.00041493226422062435, "clip_ratio/low_mean": 0.00031620567665413546, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.000731137938601023, "completions/clipped_ratio": 0.010044642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 2882.0, "completions/mean_length": 617.8616333007812, "completions/mean_terminated_length": 582.5704345703125, "completions/min_length": 64.0, "completions/min_terminated_length": 64.0, "epoch": 0.07465733449985419, "grad_norm": 0.1357421875, "learning_rate": 1e-06, "loss": 0.0006, "num_tokens": 4551294.0, "reward": 0.4888392984867096, "reward_std": 0.2438676804304123, "rewards/verify_math_reward/mean": 0.4888392984867096, "rewards/verify_math_reward/std": 0.5001546144485474, "step": 8 }, { "clip_ratio/high_max": 0.0018802964186761528, "clip_ratio/high_mean": 0.0005461828970965144, "clip_ratio/low_mean": 0.00031180268240404985, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008579855916650558, "completions/clipped_ratio": 0.021205357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 2460.0, "completions/mean_length": 632.1920166015625, "completions/mean_terminated_length": 557.1493530273438, "completions/min_length": 54.0, "completions/min_terminated_length": 54.0, "epoch": 0.08398950131233596, "grad_norm": 0.166015625, "learning_rate": 1e-06, "loss": -0.0022, "num_tokens": 5128890.0, "reward": 0.5089285969734192, "reward_std": 0.2608848214149475, "rewards/verify_math_reward/mean": 0.5089285969734192, "rewards/verify_math_reward/std": 0.5001994967460632, "step": 9 }, { "clip_ratio/high_max": 0.0015824930942471838, "clip_ratio/high_mean": 0.0004886576807621168, "clip_ratio/low_mean": 0.00035787165802503296, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008465293458357337, "completions/clipped_ratio": 0.022321428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3914.0, "completions/mean_length": 633.857177734375, "completions/mean_terminated_length": 554.812744140625, "completions/min_length": 147.0, "completions/min_terminated_length": 147.0, "epoch": 0.09332166812481774, "grad_norm": 0.1416015625, "learning_rate": 1e-06, "loss": -0.0117, "num_tokens": 5710146.0, "reward": 0.4720982313156128, "reward_std": 0.24513548612594604, "rewards/verify_math_reward/mean": 0.4720982015132904, "rewards/verify_math_reward/std": 0.49949970841407776, "step": 10 }, { "clip_ratio/high_max": 0.0017227220623681205, "clip_ratio/high_mean": 0.0004794969561316975, "clip_ratio/low_mean": 0.0002942044511655695, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007737014029771672, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3806.0, "completions/mean_length": 633.8248291015625, "completions/mean_terminated_length": 574.87744140625, "completions/min_length": 93.0, "completions/min_terminated_length": 93.0, "epoch": 0.1026538349372995, "grad_norm": 0.14453125, "learning_rate": 1e-06, "loss": 0.0013, "num_tokens": 6300957.0, "reward": 0.4486607313156128, "reward_std": 0.24141353368759155, "rewards/verify_math_reward/mean": 0.4486607015132904, "rewards/verify_math_reward/std": 0.4976350665092468, "step": 11 }, { "clip_ratio/high_max": 0.0016666346273268573, "clip_ratio/high_mean": 0.00045902808733444544, "clip_ratio/low_mean": 0.0003052549761264345, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007642830696568126, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3757.0, "completions/mean_length": 589.5580444335938, "completions/mean_terminated_length": 537.934326171875, "completions/min_length": 17.0, "completions/min_terminated_length": 17.0, "epoch": 0.11198600174978128, "grad_norm": 0.1689453125, "learning_rate": 1e-06, "loss": -0.0052, "num_tokens": 6878433.0, "reward": 0.486607164144516, "reward_std": 0.23912441730499268, "rewards/verify_math_reward/mean": 0.4866071343421936, "rewards/verify_math_reward/std": 0.5000997185707092, "step": 12 }, { "clip_ratio/high_max": 0.0014551977310475195, "clip_ratio/high_mean": 0.0004775686838911497, "clip_ratio/low_mean": 0.0002972270609689076, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007747957361061708, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 3355.0, "completions/mean_length": 634.234375, "completions/mean_terminated_length": 591.206787109375, "completions/min_length": 56.0, "completions/min_terminated_length": 56.0, "epoch": 0.12131816856226305, "grad_norm": 0.125, "learning_rate": 1e-06, "loss": -0.0063, "num_tokens": 7489643.0, "reward": 0.5256696939468384, "reward_std": 0.24795658886432648, "rewards/verify_math_reward/mean": 0.5256696343421936, "rewards/verify_math_reward/std": 0.4996195137500763, "step": 13 }, { "clip_ratio/high_max": 0.00174857863021316, "clip_ratio/high_mean": 0.000491976448074638, "clip_ratio/low_mean": 0.0003782003570904635, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008701768101673224, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 2712.0, "completions/mean_length": 585.6015625, "completions/mean_terminated_length": 521.776123046875, "completions/min_length": 62.0, "completions/min_terminated_length": 62.0, "epoch": 0.13065033537474482, "grad_norm": 0.1513671875, "learning_rate": 1e-06, "loss": -0.0063, "num_tokens": 8044918.0, "reward": 0.4832589626312256, "reward_std": 0.2616012990474701, "rewards/verify_math_reward/mean": 0.4832589328289032, "rewards/verify_math_reward/std": 0.4999987483024597, "step": 14 }, { "clip_ratio/high_max": 0.0016589798287895974, "clip_ratio/high_mean": 0.0005142171869465528, "clip_ratio/low_mean": 0.0002859190755089003, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008001362625691399, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 3622.0, "completions/mean_length": 623.09375, "completions/mean_terminated_length": 579.9276733398438, "completions/min_length": 75.0, "completions/min_terminated_length": 75.0, "epoch": 0.1399825021872266, "grad_norm": 0.1552734375, "learning_rate": 1e-06, "loss": -0.0073, "num_tokens": 8651618.0, "reward": 0.5390625, "reward_std": 0.2433338314294815, "rewards/verify_math_reward/mean": 0.5390625, "rewards/verify_math_reward/std": 0.4987502098083496, "step": 15 }, { "clip_ratio/high_max": 0.0013559699900724809, "clip_ratio/high_mean": 0.000439762489804707, "clip_ratio/low_mean": 0.0003798546824782534, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008196171775125549, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 4065.0, "completions/mean_length": 607.515625, "completions/mean_terminated_length": 548.120361328125, "completions/min_length": 18.0, "completions/min_terminated_length": 18.0, "epoch": 0.14931466899970838, "grad_norm": 0.1494140625, "learning_rate": 1e-06, "loss": -0.0049, "num_tokens": 9225976.0, "reward": 0.527901828289032, "reward_std": 0.23308269679546356, "rewards/verify_math_reward/mean": 0.5279017686843872, "rewards/verify_math_reward/std": 0.49949970841407776, "step": 16 }, { "clip_ratio/high_max": 0.0014293352942331694, "clip_ratio/high_mean": 0.00041319123761240917, "clip_ratio/low_mean": 0.00031763293384301505, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007308241583814379, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 3419.0, "completions/mean_length": 598.8002319335938, "completions/mean_terminated_length": 559.3284301757812, "completions/min_length": 71.0, "completions/min_terminated_length": 71.0, "epoch": 0.15864683581219013, "grad_norm": 0.15625, "learning_rate": 1e-06, "loss": -0.004, "num_tokens": 9807165.0, "reward": 0.5256696939468384, "reward_std": 0.27177828550338745, "rewards/verify_math_reward/mean": 0.5256696343421936, "rewards/verify_math_reward/std": 0.4996195137500763, "step": 17 }, { "clip_ratio/high_max": 0.0014759054902242497, "clip_ratio/high_mean": 0.00043099112144773244, "clip_ratio/low_mean": 0.00032991902389767347, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007609101417074271, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 2924.0, "completions/mean_length": 619.7288208007812, "completions/mean_terminated_length": 568.5492553710938, "completions/min_length": 14.0, "completions/min_terminated_length": 14.0, "epoch": 0.1679790026246719, "grad_norm": 0.1552734375, "learning_rate": 1e-06, "loss": -0.0018, "num_tokens": 10390426.0, "reward": 0.4966517984867096, "reward_std": 0.2521638870239258, "rewards/verify_math_reward/mean": 0.4966517984867096, "rewards/verify_math_reward/std": 0.5002680420875549, "step": 18 }, { "clip_ratio/high_max": 0.0020546681134874234, "clip_ratio/high_mean": 0.0006518496816170227, "clip_ratio/low_mean": 0.00034170514368270233, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009935548205248779, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3812.0, "completions/mean_length": 602.8873291015625, "completions/mean_terminated_length": 555.469482421875, "completions/min_length": 11.0, "completions/min_terminated_length": 11.0, "epoch": 0.1773111694371537, "grad_norm": 0.16015625, "learning_rate": 1e-06, "loss": -0.0093, "num_tokens": 10975917.0, "reward": 0.5625, "reward_std": 0.3053692877292633, "rewards/verify_math_reward/mean": 0.5625, "rewards/verify_math_reward/std": 0.49635544419288635, "step": 19 }, { "clip_ratio/high_max": 0.0016187741694011493, "clip_ratio/high_mean": 0.0005318163211995852, "clip_ratio/low_mean": 0.000337265061943981, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008690813920111395, "completions/clipped_ratio": 0.0078125, "completions/max_length": 4096.0, "completions/max_terminated_length": 4018.0, "completions/mean_length": 548.1707763671875, "completions/mean_terminated_length": 520.235107421875, "completions/min_length": 3.0, "completions/min_terminated_length": 3.0, "epoch": 0.18664333624963547, "grad_norm": 0.1533203125, "learning_rate": 1e-06, "loss": -0.0106, "num_tokens": 11529574.0, "reward": 0.574776828289032, "reward_std": 0.2594951391220093, "rewards/verify_math_reward/mean": 0.5747767686843872, "rewards/verify_math_reward/std": 0.49465295672416687, "step": 20 }, { "clip_ratio/high_max": 0.0015502021578868153, "clip_ratio/high_mean": 0.0004626288832696446, "clip_ratio/low_mean": 0.00028075454758891283, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007433834298353759, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 2681.0, "completions/mean_length": 565.4107666015625, "completions/mean_terminated_length": 525.5620727539062, "completions/min_length": 18.0, "completions/min_terminated_length": 18.0, "epoch": 0.19597550306211722, "grad_norm": 0.1640625, "learning_rate": 1e-06, "loss": -0.0105, "num_tokens": 12077414.0, "reward": 0.5546875, "reward_std": 0.26329195499420166, "rewards/verify_math_reward/mean": 0.5546875, "rewards/verify_math_reward/std": 0.4972778558731079, "step": 21 }, { "clip_ratio/high_max": 0.00131268548193475, "clip_ratio/high_mean": 0.0003837667984498694, "clip_ratio/low_mean": 0.00024391151930558408, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0006276783155954035, "completions/clipped_ratio": 0.008928571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 2551.0, "completions/mean_length": 624.7924194335938, "completions/mean_terminated_length": 593.520263671875, "completions/min_length": 50.0, "completions/min_terminated_length": 50.0, "epoch": 0.205307669874599, "grad_norm": 0.1357421875, "learning_rate": 1e-06, "loss": 0.0158, "num_tokens": 12699892.0, "reward": 0.4832589626312256, "reward_std": 0.2300340235233307, "rewards/verify_math_reward/mean": 0.4832589328289032, "rewards/verify_math_reward/std": 0.4999987483024597, "step": 22 }, { "clip_ratio/high_max": 0.0019214958756492706, "clip_ratio/high_mean": 0.0005548375861508248, "clip_ratio/low_mean": 0.00021802542983095918, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007728630353085464, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3787.0, "completions/mean_length": 588.3058471679688, "completions/mean_terminated_length": 536.6636352539062, "completions/min_length": 11.0, "completions/min_terminated_length": 11.0, "epoch": 0.21463983668708078, "grad_norm": 0.1376953125, "learning_rate": 1e-06, "loss": -0.0106, "num_tokens": 13258102.0, "reward": 0.5926339626312256, "reward_std": 0.21225734055042267, "rewards/verify_math_reward/mean": 0.5926339030265808, "rewards/verify_math_reward/std": 0.49161845445632935, "step": 23 }, { "clip_ratio/high_max": 0.0016732044668970047, "clip_ratio/high_mean": 0.000507047609971778, "clip_ratio/low_mean": 0.00031541719931738044, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008224648172472371, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 2814.0, "completions/mean_length": 639.9029541015625, "completions/mean_terminated_length": 585.0442504882812, "completions/min_length": 106.0, "completions/min_terminated_length": 106.0, "epoch": 0.22397200349956256, "grad_norm": 0.1416015625, "learning_rate": 1e-06, "loss": -0.0117, "num_tokens": 13858407.0, "reward": 0.4765625298023224, "reward_std": 0.27910587191581726, "rewards/verify_math_reward/mean": 0.4765625, "rewards/verify_math_reward/std": 0.49972933530807495, "step": 24 }, { "clip_ratio/high_max": 0.0012036838897984126, "clip_ratio/high_mean": 0.00035329613638168667, "clip_ratio/low_mean": 0.0002732304989194745, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0006265266288210114, "completions/clipped_ratio": 0.021205357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 4047.0, "completions/mean_length": 665.4866333007812, "completions/mean_terminated_length": 591.1653442382812, "completions/min_length": 25.0, "completions/min_terminated_length": 25.0, "epoch": 0.23330417031204434, "grad_norm": 0.12890625, "learning_rate": 1e-06, "loss": 0.0019, "num_tokens": 14462091.0, "reward": 0.5133928656578064, "reward_std": 0.22398342192173004, "rewards/verify_math_reward/mean": 0.5133928656578064, "rewards/verify_math_reward/std": 0.5000997185707092, "step": 25 }, { "clip_ratio/high_max": 0.0015261218959494727, "clip_ratio/high_mean": 0.00045390168679659837, "clip_ratio/low_mean": 0.0003453062097378279, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.000799207899490284, "completions/clipped_ratio": 0.021205357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 2476.0, "completions/mean_length": 638.7600708007812, "completions/mean_terminated_length": 563.8597412109375, "completions/min_length": 141.0, "completions/min_terminated_length": 141.0, "epoch": 0.2426363371245261, "grad_norm": 0.142578125, "learning_rate": 1e-06, "loss": 0.0026, "num_tokens": 15044068.0, "reward": 0.4720982313156128, "reward_std": 0.25757694244384766, "rewards/verify_math_reward/mean": 0.4720982015132904, "rewards/verify_math_reward/std": 0.49949970841407776, "step": 26 }, { "clip_ratio/high_max": 0.0017356351945636561, "clip_ratio/high_mean": 0.0005630187088172534, "clip_ratio/low_mean": 0.0004012571356497574, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009642758450354449, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 2087.0, "completions/mean_length": 596.5614013671875, "completions/mean_terminated_length": 541.0147705078125, "completions/min_length": 122.0, "completions/min_terminated_length": 122.0, "epoch": 0.25196850393700787, "grad_norm": 0.158203125, "learning_rate": 1e-06, "loss": -0.0004, "num_tokens": 15609363.0, "reward": 0.527901828289032, "reward_std": 0.28294095396995544, "rewards/verify_math_reward/mean": 0.5279017686843872, "rewards/verify_math_reward/std": 0.49949970841407776, "step": 27 }, { "clip_ratio/high_max": 0.001649338155402802, "clip_ratio/high_mean": 0.0005405367069215572, "clip_ratio/low_mean": 0.0003426419023071503, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008831786117298179, "completions/clipped_ratio": 0.008928571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3773.0, "completions/mean_length": 583.0100708007812, "completions/mean_terminated_length": 551.3615112304688, "completions/min_length": 143.0, "completions/min_terminated_length": 143.0, "epoch": 0.26130067074948965, "grad_norm": 0.138671875, "learning_rate": 1e-06, "loss": 0.0095, "num_tokens": 16188964.0, "reward": 0.5223214626312256, "reward_std": 0.2360539585351944, "rewards/verify_math_reward/mean": 0.5223214030265808, "rewards/verify_math_reward/std": 0.49978047609329224, "step": 28 }, { "clip_ratio/high_max": 0.0016810440192784881, "clip_ratio/high_mean": 0.0005199292579618486, "clip_ratio/low_mean": 0.0003285311371428179, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008484603981742112, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 4041.0, "completions/mean_length": 637.614990234375, "completions/mean_terminated_length": 574.7352294921875, "completions/min_length": 78.0, "completions/min_terminated_length": 78.0, "epoch": 0.27063283756197143, "grad_norm": 0.1552734375, "learning_rate": 1e-06, "loss": -0.0086, "num_tokens": 16783715.0, "reward": 0.4776785969734192, "reward_std": 0.257693886756897, "rewards/verify_math_reward/mean": 0.4776785671710968, "rewards/verify_math_reward/std": 0.4997805058956146, "step": 29 }, { "clip_ratio/high_max": 0.0013078466845399817, "clip_ratio/high_mean": 0.0003965248021131629, "clip_ratio/low_mean": 0.000290919498979747, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0006874442929074576, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 2668.0, "completions/mean_length": 574.9520263671875, "completions/mean_terminated_length": 527.155029296875, "completions/min_length": 11.0, "completions/min_terminated_length": 11.0, "epoch": 0.2799650043744532, "grad_norm": 0.12890625, "learning_rate": 1e-06, "loss": 0.0057, "num_tokens": 17345048.0, "reward": 0.5345982313156128, "reward_std": 0.21191851794719696, "rewards/verify_math_reward/mean": 0.5345982313156128, "rewards/verify_math_reward/std": 0.4990801215171814, "step": 30 }, { "clip_ratio/high_max": 0.0014755538031749893, "clip_ratio/high_mean": 0.0004436471647295548, "clip_ratio/low_mean": 0.0003437455734456307, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007873927415857906, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 2759.0, "completions/mean_length": 659.2879638671875, "completions/mean_terminated_length": 604.7369995117188, "completions/min_length": 108.0, "completions/min_terminated_length": 108.0, "epoch": 0.289297171186935, "grad_norm": 0.11376953125, "learning_rate": 1e-06, "loss": 0.0033, "num_tokens": 17963306.0, "reward": 0.5145089626312256, "reward_std": 0.22800594568252563, "rewards/verify_math_reward/mean": 0.5145089030265808, "rewards/verify_math_reward/std": 0.5000685453414917, "step": 31 }, { "clip_ratio/high_max": 0.0016754547441450995, "clip_ratio/high_mean": 0.0004678851930748351, "clip_ratio/low_mean": 0.0003820526953859371, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008499379000568297, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3801.0, "completions/mean_length": 597.9642944335938, "completions/mean_terminated_length": 550.4796752929688, "completions/min_length": 5.0, "completions/min_terminated_length": 5.0, "epoch": 0.29862933799941677, "grad_norm": 0.1357421875, "learning_rate": 1e-06, "loss": -0.0026, "num_tokens": 18540770.0, "reward": 0.5055803656578064, "reward_std": 0.2404039204120636, "rewards/verify_math_reward/mean": 0.5055803656578064, "rewards/verify_math_reward/std": 0.5002480745315552, "step": 32 }, { "clip_ratio/high_max": 0.0015293593560272711, "clip_ratio/high_mean": 0.0004095853012131556, "clip_ratio/low_mean": 0.0003093034737275957, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007188887730080751, "completions/clipped_ratio": 0.008928571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3211.0, "completions/mean_length": 540.5870971679688, "completions/mean_terminated_length": 508.5563049316406, "completions/min_length": 3.0, "completions/min_terminated_length": 3.0, "epoch": 0.3079615048118985, "grad_norm": 0.1474609375, "learning_rate": 1e-06, "loss": -0.0042, "num_tokens": 19078904.0, "reward": 0.4966517984867096, "reward_std": 0.2256055474281311, "rewards/verify_math_reward/mean": 0.4966517984867096, "rewards/verify_math_reward/std": 0.5002680420875549, "step": 33 }, { "clip_ratio/high_max": 0.0015010556344350334, "clip_ratio/high_mean": 0.0004454719312434463, "clip_ratio/low_mean": 0.00043172464529561694, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008771965631240164, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3720.0, "completions/mean_length": 615.6942138671875, "completions/mean_terminated_length": 560.4512329101562, "completions/min_length": 115.0, "completions/min_terminated_length": 115.0, "epoch": 0.31729367162438027, "grad_norm": 0.1494140625, "learning_rate": 1e-06, "loss": -0.0145, "num_tokens": 19656614.0, "reward": 0.53125, "reward_std": 0.2604729235172272, "rewards/verify_math_reward/mean": 0.53125, "rewards/verify_math_reward/std": 0.4993011951446533, "step": 34 }, { "clip_ratio/high_max": 0.00146182533717365, "clip_ratio/high_mean": 0.0004877040967130597, "clip_ratio/low_mean": 0.0003603455347729323, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008480496271658922, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3128.0, "completions/mean_length": 622.2467041015625, "completions/mean_terminated_length": 563.1021728515625, "completions/min_length": 14.0, "completions/min_terminated_length": 14.0, "epoch": 0.32662583843686205, "grad_norm": 0.1416015625, "learning_rate": 1e-06, "loss": 0.0023, "num_tokens": 20236363.0, "reward": 0.5758928656578064, "reward_std": 0.24145811796188354, "rewards/verify_math_reward/mean": 0.5758928656578064, "rewards/verify_math_reward/std": 0.49448272585868835, "step": 35 }, { "clip_ratio/high_max": 0.0013757151491518016, "clip_ratio/high_mean": 0.0004118498437719609, "clip_ratio/low_mean": 0.0003148514662143498, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007267013115779264, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3910.0, "completions/mean_length": 653.3002319335938, "completions/mean_terminated_length": 590.7056884765625, "completions/min_length": 3.0, "completions/min_terminated_length": 3.0, "epoch": 0.3359580052493438, "grad_norm": 0.1298828125, "learning_rate": 1e-06, "loss": 0.0071, "num_tokens": 20849008.0, "reward": 0.5133928656578064, "reward_std": 0.2310871183872223, "rewards/verify_math_reward/mean": 0.5133928656578064, "rewards/verify_math_reward/std": 0.500099778175354, "step": 36 }, { "clip_ratio/high_max": 0.0014319893280116958, "clip_ratio/high_mean": 0.00045862419722197956, "clip_ratio/low_mean": 0.000365841522352639, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008244657310569892, "completions/clipped_ratio": 0.008928571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3399.0, "completions/mean_length": 592.1953125, "completions/mean_terminated_length": 560.6295166015625, "completions/min_length": 96.0, "completions/min_terminated_length": 96.0, "epoch": 0.3452901720618256, "grad_norm": 0.1787109375, "learning_rate": 1e-06, "loss": 0.0, "num_tokens": 21423575.0, "reward": 0.4955357313156128, "reward_std": 0.2597554326057434, "rewards/verify_math_reward/mean": 0.4955357015132904, "rewards/verify_math_reward/std": 0.500259280204773, "step": 37 }, { "clip_ratio/high_max": 0.0016519977225470939, "clip_ratio/high_mean": 0.0005154066718660033, "clip_ratio/low_mean": 0.00026897135535364214, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007843780281291401, "completions/clipped_ratio": 0.0234375, "completions/max_length": 4096.0, "completions/max_terminated_length": 3462.0, "completions/mean_length": 644.5502319335938, "completions/mean_terminated_length": 561.7153930664062, "completions/min_length": 5.0, "completions/min_terminated_length": 5.0, "epoch": 0.3546223388743074, "grad_norm": 0.171875, "learning_rate": 1e-06, "loss": -0.0154, "num_tokens": 22004412.0, "reward": 0.5145089626312256, "reward_std": 0.2486048936843872, "rewards/verify_math_reward/mean": 0.5145089030265808, "rewards/verify_math_reward/std": 0.5000685453414917, "step": 38 }, { "clip_ratio/high_max": 0.0019011590975424042, "clip_ratio/high_mean": 0.0005794297785541858, "clip_ratio/low_mean": 0.0003370601472170165, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.000916489914743579, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3493.0, "completions/mean_length": 596.6417846679688, "completions/mean_terminated_length": 537.0613403320312, "completions/min_length": 58.0, "completions/min_terminated_length": 58.0, "epoch": 0.36395450568678916, "grad_norm": 0.154296875, "learning_rate": 1e-06, "loss": 0.0044, "num_tokens": 22573427.0, "reward": 0.5111607313156128, "reward_std": 0.2544548213481903, "rewards/verify_math_reward/mean": 0.5111607313156128, "rewards/verify_math_reward/std": 0.5001546144485474, "step": 39 }, { "clip_ratio/high_max": 0.0013376170936680865, "clip_ratio/high_mean": 0.00036210402311098733, "clip_ratio/low_mean": 0.00029379942054674757, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0006559034321753643, "completions/clipped_ratio": 0.010044642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 2798.0, "completions/mean_length": 575.0692138671875, "completions/mean_terminated_length": 539.3438110351562, "completions/min_length": 26.0, "completions/min_terminated_length": 26.0, "epoch": 0.37328667249927094, "grad_norm": 0.1328125, "learning_rate": 1e-06, "loss": -0.0051, "num_tokens": 23154177.0, "reward": 0.5133928656578064, "reward_std": 0.20117954909801483, "rewards/verify_math_reward/mean": 0.5133928656578064, "rewards/verify_math_reward/std": 0.500099778175354, "step": 40 }, { "clip_ratio/high_max": 0.0015395624159282306, "clip_ratio/high_mean": 0.0005284612877858308, "clip_ratio/low_mean": 0.0003896172624990868, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009180785582429962, "completions/clipped_ratio": 0.021205357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3805.0, "completions/mean_length": 670.1361694335938, "completions/mean_terminated_length": 595.9155883789062, "completions/min_length": 159.0, "completions/min_terminated_length": 159.0, "epoch": 0.3826188393117527, "grad_norm": 0.13671875, "learning_rate": 1e-06, "loss": 0.0013, "num_tokens": 23778579.0, "reward": 0.4654017984867096, "reward_std": 0.27790629863739014, "rewards/verify_math_reward/mean": 0.4654017984867096, "rewards/verify_math_reward/std": 0.4990801215171814, "step": 41 }, { "clip_ratio/high_max": 0.0011948465162276989, "clip_ratio/high_mean": 0.0003530940239215852, "clip_ratio/low_mean": 0.0002722036805380412, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0006252977007079608, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 3749.0, "completions/mean_length": 594.966552734375, "completions/mean_terminated_length": 551.4508666992188, "completions/min_length": 20.0, "completions/min_terminated_length": 20.0, "epoch": 0.39195100612423445, "grad_norm": 0.12255859375, "learning_rate": 1e-06, "loss": 0.0018, "num_tokens": 24357813.0, "reward": 0.5100446939468384, "reward_std": 0.201626718044281, "rewards/verify_math_reward/mean": 0.5100446343421936, "rewards/verify_math_reward/std": 0.5001782774925232, "step": 42 }, { "clip_ratio/high_max": 0.0017167266614706023, "clip_ratio/high_mean": 0.0005214383163547609, "clip_ratio/low_mean": 0.00025293392923231295, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.000774372233536269, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3908.0, "completions/mean_length": 591.9486694335938, "completions/mean_terminated_length": 544.3823852539062, "completions/min_length": 21.0, "completions/min_terminated_length": 21.0, "epoch": 0.4012831729367162, "grad_norm": 0.14453125, "learning_rate": 1e-06, "loss": -0.0036, "num_tokens": 24925183.0, "reward": 0.5424107313156128, "reward_std": 0.25145599246025085, "rewards/verify_math_reward/mean": 0.5424107313156128, "rewards/verify_math_reward/std": 0.4984763562679291, "step": 43 }, { "clip_ratio/high_max": 0.0015150593953876523, "clip_ratio/high_mean": 0.00045249088111631863, "clip_ratio/low_mean": 0.0002036661325064415, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0006561570080521051, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3314.0, "completions/mean_length": 561.5357666015625, "completions/mean_terminated_length": 505.43310546875, "completions/min_length": 85.0, "completions/min_terminated_length": 85.0, "epoch": 0.410615339749198, "grad_norm": 0.138671875, "learning_rate": 1e-06, "loss": -0.0011, "num_tokens": 25457167.0, "reward": 0.5870535969734192, "reward_std": 0.22834154963493347, "rewards/verify_math_reward/mean": 0.5870535969734192, "rewards/verify_math_reward/std": 0.49263834953308105, "step": 44 }, { "clip_ratio/high_max": 0.0018521121382946149, "clip_ratio/high_mean": 0.000525584012393665, "clip_ratio/low_mean": 0.00026987602973349567, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007954600423545344, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 2604.0, "completions/mean_length": 626.5658569335938, "completions/mean_terminated_length": 575.4869384765625, "completions/min_length": 114.0, "completions/min_terminated_length": 114.0, "epoch": 0.4199475065616798, "grad_norm": 0.1376953125, "learning_rate": 1e-06, "loss": 0.0236, "num_tokens": 26058714.0, "reward": 0.5379464626312256, "reward_std": 0.24356064200401306, "rewards/verify_math_reward/mean": 0.5379464030265808, "rewards/verify_math_reward/std": 0.4988364577293396, "step": 45 }, { "clip_ratio/high_max": 0.0013879348462069174, "clip_ratio/high_mean": 0.0004509769371452421, "clip_ratio/low_mean": 0.0003597295803956513, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008107065295916982, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 3997.0, "completions/mean_length": 656.5033569335938, "completions/mean_terminated_length": 617.682861328125, "completions/min_length": 143.0, "completions/min_terminated_length": 143.0, "epoch": 0.42927967337416156, "grad_norm": 0.11865234375, "learning_rate": 1e-06, "loss": 0.0052, "num_tokens": 26694429.0, "reward": 0.5379464626312256, "reward_std": 0.2376294881105423, "rewards/verify_math_reward/mean": 0.5379464030265808, "rewards/verify_math_reward/std": 0.4988364577293396, "step": 46 }, { "clip_ratio/high_max": 0.0016000384803191992, "clip_ratio/high_mean": 0.0004762422627209162, "clip_ratio/low_mean": 0.000366352751825616, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008425950154560269, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3916.0, "completions/mean_length": 634.5982666015625, "completions/mean_terminated_length": 583.6375732421875, "completions/min_length": 114.0, "completions/min_terminated_length": 114.0, "epoch": 0.43861184018664334, "grad_norm": 0.1396484375, "learning_rate": 1e-06, "loss": 0.0146, "num_tokens": 27294677.0, "reward": 0.5011160969734192, "reward_std": 0.2740648686885834, "rewards/verify_math_reward/mean": 0.5011160969734192, "rewards/verify_math_reward/std": 0.5002780556678772, "step": 47 }, { "clip_ratio/high_max": 0.0015195509613477043, "clip_ratio/high_mean": 0.0004423020870945038, "clip_ratio/low_mean": 0.00026836878760150285, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.000710670865373686, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 2887.0, "completions/mean_length": 602.3683471679688, "completions/mean_terminated_length": 554.9434814453125, "completions/min_length": 12.0, "completions/min_terminated_length": 12.0, "epoch": 0.4479440069991251, "grad_norm": 0.1416015625, "learning_rate": 1e-06, "loss": 0.0031, "num_tokens": 27879895.0, "reward": 0.4799107313156128, "reward_std": 0.21394869685173035, "rewards/verify_math_reward/mean": 0.4799107015132904, "rewards/verify_math_reward/std": 0.4998753070831299, "step": 48 }, { "clip_ratio/high_max": 0.0013185698808229063, "clip_ratio/high_mean": 0.0004206940443509666, "clip_ratio/low_mean": 0.0003416846567461107, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007623787046213693, "completions/clipped_ratio": 0.0078125, "completions/max_length": 4096.0, "completions/max_terminated_length": 3555.0, "completions/mean_length": 584.9074096679688, "completions/mean_terminated_length": 557.260986328125, "completions/min_length": 110.0, "completions/min_terminated_length": 110.0, "epoch": 0.4572761738116069, "grad_norm": 0.134765625, "learning_rate": 1e-06, "loss": 0.0056, "num_tokens": 28458316.0, "reward": 0.4776785969734192, "reward_std": 0.2448740452528, "rewards/verify_math_reward/mean": 0.4776785671710968, "rewards/verify_math_reward/std": 0.4997805058956146, "step": 49 }, { "clip_ratio/high_max": 0.0017849221330834553, "clip_ratio/high_mean": 0.0005301667756612005, "clip_ratio/low_mean": 0.00026941004330183205, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007995768182809115, "completions/clipped_ratio": 0.024553571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 2506.0, "completions/mean_length": 627.6607666015625, "completions/mean_terminated_length": 540.35693359375, "completions/min_length": 14.0, "completions/min_terminated_length": 14.0, "epoch": 0.4666083406240887, "grad_norm": 0.1416015625, "learning_rate": 1e-06, "loss": 0.0028, "num_tokens": 29033124.0, "reward": 0.4933035969734192, "reward_std": 0.23329952359199524, "rewards/verify_math_reward/mean": 0.4933035671710968, "rewards/verify_math_reward/std": 0.5002344250679016, "step": 50 }, { "clip_ratio/high_max": 0.0016930396777752321, "clip_ratio/high_mean": 0.0004935184060741449, "clip_ratio/low_mean": 0.0002914745394946294, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007849929406802403, "completions/clipped_ratio": 0.022321428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3520.0, "completions/mean_length": 637.3013916015625, "completions/mean_terminated_length": 558.3355712890625, "completions/min_length": 121.0, "completions/min_terminated_length": 121.0, "epoch": 0.4759405074365704, "grad_norm": 0.1591796875, "learning_rate": 1e-06, "loss": 0.0001, "num_tokens": 29603434.0, "reward": 0.5691964626312256, "reward_std": 0.2575739920139313, "rewards/verify_math_reward/mean": 0.5691964030265808, "rewards/verify_math_reward/std": 0.4954652488231659, "step": 51 }, { "clip_ratio/high_max": 0.0015872932563070208, "clip_ratio/high_mean": 0.0005216651053387977, "clip_ratio/low_mean": 0.00039137663725341554, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009130417402047897, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 4001.0, "completions/mean_length": 596.5033569335938, "completions/mean_terminated_length": 544.9818725585938, "completions/min_length": 68.0, "completions/min_terminated_length": 68.0, "epoch": 0.4852726742490522, "grad_norm": 0.1416015625, "learning_rate": 1e-06, "loss": 0.0073, "num_tokens": 30164341.0, "reward": 0.5993303656578064, "reward_std": 0.24558943510055542, "rewards/verify_math_reward/mean": 0.5993303656578064, "rewards/verify_math_reward/std": 0.49030786752700806, "step": 52 }, { "clip_ratio/high_max": 0.0014443692316490342, "clip_ratio/high_mean": 0.00046944564087425533, "clip_ratio/low_mean": 0.00031746199169901956, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007869076530369057, "completions/clipped_ratio": 0.010044642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 2552.0, "completions/mean_length": 574.0580444335938, "completions/mean_terminated_length": 538.3223876953125, "completions/min_length": 18.0, "completions/min_terminated_length": 18.0, "epoch": 0.49460484106153396, "grad_norm": 0.134765625, "learning_rate": 1e-06, "loss": 0.0067, "num_tokens": 30727585.0, "reward": 0.5223214626312256, "reward_std": 0.25388845801353455, "rewards/verify_math_reward/mean": 0.5223214030265808, "rewards/verify_math_reward/std": 0.49978047609329224, "step": 53 }, { "clip_ratio/high_max": 0.0015425280453200685, "clip_ratio/high_mean": 0.00047079653768378193, "clip_ratio/low_mean": 0.0003115816055014875, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007823781465958746, "completions/clipped_ratio": 0.010044642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3843.0, "completions/mean_length": 523.1707763671875, "completions/mean_terminated_length": 486.9187927246094, "completions/min_length": 18.0, "completions/min_terminated_length": 18.0, "epoch": 0.5039370078740157, "grad_norm": 0.1494140625, "learning_rate": 1e-06, "loss": -0.0093, "num_tokens": 31249322.0, "reward": 0.5379464626312256, "reward_std": 0.245513454079628, "rewards/verify_math_reward/mean": 0.5379464030265808, "rewards/verify_math_reward/std": 0.4988364279270172, "step": 54 }, { "clip_ratio/high_max": 0.0019663496750581544, "clip_ratio/high_mean": 0.0007126980458451726, "clip_ratio/low_mean": 0.0002605135380235879, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009732115763654292, "completions/clipped_ratio": 0.006696428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 2255.0, "completions/mean_length": 522.3359375, "completions/mean_terminated_length": 498.24383544921875, "completions/min_length": 6.0, "completions/min_terminated_length": 6.0, "epoch": 0.5132691746864976, "grad_norm": 0.1669921875, "learning_rate": 1e-06, "loss": 0.001, "num_tokens": 31787151.0, "reward": 0.566964328289032, "reward_std": 0.25960850715637207, "rewards/verify_math_reward/mean": 0.5669642686843872, "rewards/verify_math_reward/std": 0.49577224254608154, "step": 55 }, { "clip_ratio/high_max": 0.001540494351502275, "clip_ratio/high_mean": 0.00041581053733352746, "clip_ratio/low_mean": 0.000380868444835869, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007966789917190908, "completions/clipped_ratio": 0.006696428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 2734.0, "completions/mean_length": 564.703125, "completions/mean_terminated_length": 540.8966064453125, "completions/min_length": 70.0, "completions/min_terminated_length": 70.0, "epoch": 0.5226013414989793, "grad_norm": 0.1435546875, "learning_rate": 1e-06, "loss": 0.0157, "num_tokens": 32359957.0, "reward": 0.4910714626312256, "reward_std": 0.21601885557174683, "rewards/verify_math_reward/mean": 0.4910714328289032, "rewards/verify_math_reward/std": 0.5001994967460632, "step": 56 }, { "clip_ratio/high_max": 0.0012436833549145376, "clip_ratio/high_mean": 0.00035380065605750133, "clip_ratio/low_mean": 0.0002859475207515061, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0006397481815838546, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 2337.0, "completions/mean_length": 563.638427734375, "completions/mean_terminated_length": 519.7333374023438, "completions/min_length": 81.0, "completions/min_terminated_length": 81.0, "epoch": 0.531933508311461, "grad_norm": 0.14453125, "learning_rate": 1e-06, "loss": 0.0005, "num_tokens": 32906673.0, "reward": 0.543526828289032, "reward_std": 0.19925953447818756, "rewards/verify_math_reward/mean": 0.5435267686843872, "rewards/verify_math_reward/std": 0.49838000535964966, "step": 57 }, { "clip_ratio/high_max": 0.0018532251688156975, "clip_ratio/high_mean": 0.0005421158548415406, "clip_ratio/low_mean": 0.0003463318034846452, "clip_ratio/low_min": 1.0109996765095275e-05, "clip_ratio/region_mean": 0.0008884476683306275, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3806.0, "completions/mean_length": 598.5826416015625, "completions/mean_terminated_length": 539.0352172851562, "completions/min_length": 104.0, "completions/min_terminated_length": 104.0, "epoch": 0.5412656751239429, "grad_norm": 0.130859375, "learning_rate": 1e-06, "loss": -0.0038, "num_tokens": 33471427.0, "reward": 0.5267857313156128, "reward_std": 0.24194666743278503, "rewards/verify_math_reward/mean": 0.5267857313156128, "rewards/verify_math_reward/std": 0.4995608329772949, "step": 58 }, { "clip_ratio/high_max": 0.0015568905928375898, "clip_ratio/high_mean": 0.0004729019640308252, "clip_ratio/low_mean": 0.00026004129472312343, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007329432628466748, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3615.0, "completions/mean_length": 582.3270263671875, "completions/mean_terminated_length": 518.4420166015625, "completions/min_length": 88.0, "completions/min_terminated_length": 88.0, "epoch": 0.5505978419364246, "grad_norm": 0.1474609375, "learning_rate": 1e-06, "loss": -0.0135, "num_tokens": 34021800.0, "reward": 0.543526828289032, "reward_std": 0.208427295088768, "rewards/verify_math_reward/mean": 0.5435267686843872, "rewards/verify_math_reward/std": 0.49838000535964966, "step": 59 }, { "clip_ratio/high_max": 0.0015631410460628103, "clip_ratio/high_mean": 0.0004982794935131096, "clip_ratio/low_mean": 0.0003381057590559067, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008363852512047742, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3014.0, "completions/mean_length": 567.2801513671875, "completions/mean_terminated_length": 519.3789672851562, "completions/min_length": 98.0, "completions/min_terminated_length": 98.0, "epoch": 0.5599300087489064, "grad_norm": 0.140625, "learning_rate": 1e-06, "loss": 0.0014, "num_tokens": 34565939.0, "reward": 0.546875, "reward_std": 0.22668297588825226, "rewards/verify_math_reward/mean": 0.546875, "rewards/verify_math_reward/std": 0.4980759024620056, "step": 60 }, { "clip_ratio/high_max": 0.0014611678398068761, "clip_ratio/high_mean": 0.0004804958073236776, "clip_ratio/low_mean": 0.00039822009489398624, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008787158944869589, "completions/clipped_ratio": 0.0234375, "completions/max_length": 4096.0, "completions/max_terminated_length": 3113.0, "completions/mean_length": 670.9542846679688, "completions/mean_terminated_length": 588.7531127929688, "completions/min_length": 21.0, "completions/min_terminated_length": 21.0, "epoch": 0.5692621755613881, "grad_norm": 0.1357421875, "learning_rate": 1e-06, "loss": -0.0028, "num_tokens": 35165122.0, "reward": 0.5078125, "reward_std": 0.2520155608654022, "rewards/verify_math_reward/mean": 0.5078125, "rewards/verify_math_reward/std": 0.5002182126045227, "step": 61 }, { "clip_ratio/high_max": 0.0016026523899199674, "clip_ratio/high_mean": 0.0004569236366478435, "clip_ratio/low_mean": 0.00040043612898443826, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008573597569920821, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3630.0, "completions/mean_length": 640.9319458007812, "completions/mean_terminated_length": 590.0645141601562, "completions/min_length": 139.0, "completions/min_terminated_length": 139.0, "epoch": 0.57859434237387, "grad_norm": 0.130859375, "learning_rate": 1e-06, "loss": 0.014, "num_tokens": 35770117.0, "reward": 0.4843750298023224, "reward_std": 0.28068071603775024, "rewards/verify_math_reward/mean": 0.484375, "rewards/verify_math_reward/std": 0.5000349283218384, "step": 62 }, { "clip_ratio/high_max": 0.0015333942537836265, "clip_ratio/high_mean": 0.000492742325150175, "clip_ratio/low_mean": 0.00027141847272105224, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007641608012818324, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 2655.0, "completions/mean_length": 582.247802734375, "completions/mean_terminated_length": 530.5164184570312, "completions/min_length": 35.0, "completions/min_terminated_length": 35.0, "epoch": 0.5879265091863517, "grad_norm": 0.1533203125, "learning_rate": 1e-06, "loss": -0.0099, "num_tokens": 36317219.0, "reward": 0.578125, "reward_std": 0.24972325563430786, "rewards/verify_math_reward/mean": 0.578125, "rewards/verify_math_reward/std": 0.4941346049308777, "step": 63 }, { "clip_ratio/high_max": 0.0017877031059470028, "clip_ratio/high_mean": 0.0005220111013386486, "clip_ratio/low_mean": 0.00028729273242333875, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.000809303836831532, "completions/clipped_ratio": 0.0234375, "completions/max_length": 4096.0, "completions/max_terminated_length": 3996.0, "completions/mean_length": 646.6663208007812, "completions/mean_terminated_length": 563.8822631835938, "completions/min_length": 23.0, "completions/min_terminated_length": 23.0, "epoch": 0.5972586759988335, "grad_norm": 0.15234375, "learning_rate": 1e-06, "loss": -0.013, "num_tokens": 36900384.0, "reward": 0.5424107313156128, "reward_std": 0.2383444458246231, "rewards/verify_math_reward/mean": 0.5424107313156128, "rewards/verify_math_reward/std": 0.4984763264656067, "step": 64 }, { "clip_ratio/high_max": 0.0015150777999224374, "clip_ratio/high_mean": 0.00041612504696786345, "clip_ratio/low_mean": 0.00030822398287000397, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007243490281325649, "completions/clipped_ratio": 0.010044642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3929.0, "completions/mean_length": 616.5736694335938, "completions/mean_terminated_length": 581.2694091796875, "completions/min_length": 89.0, "completions/min_terminated_length": 89.0, "epoch": 0.6065908428113153, "grad_norm": 0.134765625, "learning_rate": 1e-06, "loss": -0.0026, "num_tokens": 37513650.0, "reward": 0.5133928656578064, "reward_std": 0.22654101252555847, "rewards/verify_math_reward/mean": 0.5133928656578064, "rewards/verify_math_reward/std": 0.500099778175354, "step": 65 }, { "clip_ratio/high_max": 0.001474865075579146, "clip_ratio/high_mean": 0.0003922959596138753, "clip_ratio/low_mean": 0.00031516578314949584, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007074617333273636, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3326.0, "completions/mean_length": 567.1607666015625, "completions/mean_terminated_length": 515.2072143554688, "completions/min_length": 107.0, "completions/min_terminated_length": 107.0, "epoch": 0.615923009623797, "grad_norm": 0.1298828125, "learning_rate": 1e-06, "loss": 0.0003, "num_tokens": 38067114.0, "reward": 0.5580357313156128, "reward_std": 0.20436903834342957, "rewards/verify_math_reward/mean": 0.5580357313156128, "rewards/verify_math_reward/std": 0.49689781665802, "step": 66 }, { "clip_ratio/high_max": 0.0016830643362482078, "clip_ratio/high_mean": 0.00047929826359904837, "clip_ratio/low_mean": 0.0003632792582948241, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008425775249634171, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 2716.0, "completions/mean_length": 655.7623291015625, "completions/mean_terminated_length": 609.062255859375, "completions/min_length": 91.0, "completions/min_terminated_length": 91.0, "epoch": 0.6252551764362788, "grad_norm": 0.1279296875, "learning_rate": 1e-06, "loss": -0.0102, "num_tokens": 38701197.0, "reward": 0.478794664144516, "reward_std": 0.25655919313430786, "rewards/verify_math_reward/mean": 0.4787946343421936, "rewards/verify_math_reward/std": 0.49982914328575134, "step": 67 }, { "clip_ratio/high_max": 0.0013804803229504614, "clip_ratio/high_mean": 0.0003894913461408578, "clip_ratio/low_mean": 0.0002950324698076656, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0006845238103778684, "completions/clipped_ratio": 0.006696428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3800.0, "completions/mean_length": 567.6183471679688, "completions/mean_terminated_length": 543.8314819335938, "completions/min_length": 3.0, "completions/min_terminated_length": 3.0, "epoch": 0.6345873432487605, "grad_norm": 0.1279296875, "learning_rate": 1e-06, "loss": 0.0097, "num_tokens": 39268831.0, "reward": 0.5189732313156128, "reward_std": 0.20478273928165436, "rewards/verify_math_reward/mean": 0.5189732313156128, "rewards/verify_math_reward/std": 0.49991893768310547, "step": 68 }, { "clip_ratio/high_max": 0.001562234175253252, "clip_ratio/high_mean": 0.0004804975578736048, "clip_ratio/low_mean": 0.00032069702626813523, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008011945865291636, "completions/clipped_ratio": 0.021205357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3845.0, "completions/mean_length": 655.90625, "completions/mean_terminated_length": 581.3773803710938, "completions/min_length": 129.0, "completions/min_terminated_length": 129.0, "epoch": 0.6439195100612424, "grad_norm": 0.1318359375, "learning_rate": 1e-06, "loss": 0.0025, "num_tokens": 39869067.0, "reward": 0.5334821939468384, "reward_std": 0.25821453332901, "rewards/verify_math_reward/mean": 0.5334821343421936, "rewards/verify_math_reward/std": 0.49915632605552673, "step": 69 }, { "clip_ratio/high_max": 0.0017730849103827495, "clip_ratio/high_mean": 0.000578458475501975, "clip_ratio/low_mean": 0.00031695155996658286, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008954100294431555, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 3794.0, "completions/mean_length": 615.0145263671875, "completions/mean_terminated_length": 571.748046875, "completions/min_length": 74.0, "completions/min_terminated_length": 74.0, "epoch": 0.6532516768737241, "grad_norm": 0.1376953125, "learning_rate": 1e-06, "loss": 0.0011, "num_tokens": 40465104.0, "reward": 0.5290178656578064, "reward_std": 0.2776102125644684, "rewards/verify_math_reward/mean": 0.5290178656578064, "rewards/verify_math_reward/std": 0.49943605065345764, "step": 70 }, { "clip_ratio/high_max": 0.0014781976860831492, "clip_ratio/high_mean": 0.0004499135351352379, "clip_ratio/low_mean": 0.00035473589559842367, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008046494249356329, "completions/clipped_ratio": 0.010044642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 2857.0, "completions/mean_length": 590.0324096679688, "completions/mean_terminated_length": 554.4588012695312, "completions/min_length": 88.0, "completions/min_terminated_length": 88.0, "epoch": 0.6625838436862059, "grad_norm": 0.1396484375, "learning_rate": 1e-06, "loss": 0.0085, "num_tokens": 41056493.0, "reward": 0.5111607313156128, "reward_std": 0.2310115545988083, "rewards/verify_math_reward/mean": 0.5111607313156128, "rewards/verify_math_reward/std": 0.5001546144485474, "step": 71 }, { "clip_ratio/high_max": 0.0014062314075999893, "clip_ratio/high_mean": 0.00042140097184528713, "clip_ratio/low_mean": 0.00031373776926102437, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007351387566814083, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 3267.0, "completions/mean_length": 617.328125, "completions/mean_terminated_length": 574.0903930664062, "completions/min_length": 80.0, "completions/min_terminated_length": 80.0, "epoch": 0.6719160104986877, "grad_norm": 0.1240234375, "learning_rate": 1e-06, "loss": 0.0066, "num_tokens": 41662003.0, "reward": 0.4832589626312256, "reward_std": 0.2109428197145462, "rewards/verify_math_reward/mean": 0.4832589328289032, "rewards/verify_math_reward/std": 0.4999987483024597, "step": 72 }, { "clip_ratio/high_max": 0.0015306857339965063, "clip_ratio/high_mean": 0.00042900978519355704, "clip_ratio/low_mean": 0.00036409031235962175, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007931000982352998, "completions/clipped_ratio": 0.0200892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 2582.0, "completions/mean_length": 622.6986694335938, "completions/mean_terminated_length": 551.4920654296875, "completions/min_length": 29.0, "completions/min_terminated_length": 29.0, "epoch": 0.6812481773111695, "grad_norm": 0.1337890625, "learning_rate": 1e-06, "loss": -0.0017, "num_tokens": 42242989.0, "reward": 0.5189732313156128, "reward_std": 0.22252734005451202, "rewards/verify_math_reward/mean": 0.5189732313156128, "rewards/verify_math_reward/std": 0.49991893768310547, "step": 73 }, { "clip_ratio/high_max": 0.0016175444525288185, "clip_ratio/high_mean": 0.0004853530490436242, "clip_ratio/low_mean": 0.00029653505043825135, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007818881131242961, "completions/clipped_ratio": 0.021205357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3468.0, "completions/mean_length": 656.9174194335938, "completions/mean_terminated_length": 582.4104614257812, "completions/min_length": 147.0, "completions/min_terminated_length": 147.0, "epoch": 0.6905803441236512, "grad_norm": 0.1357421875, "learning_rate": 1e-06, "loss": 0.004, "num_tokens": 42843467.0, "reward": 0.5133928656578064, "reward_std": 0.2449902892112732, "rewards/verify_math_reward/mean": 0.5133928656578064, "rewards/verify_math_reward/std": 0.500099778175354, "step": 74 }, { "clip_ratio/high_max": 0.0015286737470887601, "clip_ratio/high_mean": 0.00041930390784727933, "clip_ratio/low_mean": 0.00022943636531636002, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0006487402688435395, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3826.0, "completions/mean_length": 641.3449096679688, "completions/mean_terminated_length": 590.4835815429688, "completions/min_length": 150.0, "completions/min_terminated_length": 150.0, "epoch": 0.6999125109361329, "grad_norm": 0.12158203125, "learning_rate": 1e-06, "loss": -0.0038, "num_tokens": 43449928.0, "reward": 0.512276828289032, "reward_std": 0.18021291494369507, "rewards/verify_math_reward/mean": 0.5122767686843872, "rewards/verify_math_reward/std": 0.500128448009491, "step": 75 }, { "clip_ratio/high_max": 0.0014769398867429118, "clip_ratio/high_mean": 0.0004729157137717266, "clip_ratio/low_mean": 0.0003285410001581113, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008014567233658454, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3077.0, "completions/mean_length": 662.3928833007812, "completions/mean_terminated_length": 599.963623046875, "completions/min_length": 49.0, "completions/min_terminated_length": 49.0, "epoch": 0.7092446777486148, "grad_norm": 0.1455078125, "learning_rate": 1e-06, "loss": 0.0039, "num_tokens": 44071136.0, "reward": 0.4810267984867096, "reward_std": 0.2617064416408539, "rewards/verify_math_reward/mean": 0.4810267984867096, "rewards/verify_math_reward/std": 0.49991893768310547, "step": 76 }, { "clip_ratio/high_max": 0.0018359375881118467, "clip_ratio/high_mean": 0.0005651184619637206, "clip_ratio/low_mean": 0.0004314097777751158, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009965282588382252, "completions/clipped_ratio": 0.0200892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 4045.0, "completions/mean_length": 630.4140625, "completions/mean_terminated_length": 559.3656005859375, "completions/min_length": 24.0, "completions/min_terminated_length": 24.0, "epoch": 0.7185768445610965, "grad_norm": 0.14453125, "learning_rate": 1e-06, "loss": 0.0077, "num_tokens": 44653883.0, "reward": 0.5066964626312256, "reward_std": 0.2591606676578522, "rewards/verify_math_reward/mean": 0.5066964030265808, "rewards/verify_math_reward/std": 0.5002344250679016, "step": 77 }, { "clip_ratio/high_max": 0.0014776905736653134, "clip_ratio/high_mean": 0.00043647162488014146, "clip_ratio/low_mean": 0.000357973758582375, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007944453647041883, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 2043.0, "completions/mean_length": 583.5513916015625, "completions/mean_terminated_length": 539.893798828125, "completions/min_length": 85.0, "completions/min_terminated_length": 85.0, "epoch": 0.7279090113735783, "grad_norm": 0.1396484375, "learning_rate": 1e-06, "loss": -0.0092, "num_tokens": 45217601.0, "reward": 0.5647321939468384, "reward_std": 0.2161722332239151, "rewards/verify_math_reward/mean": 0.5647321343421936, "rewards/verify_math_reward/std": 0.49606892466545105, "step": 78 }, { "clip_ratio/high_max": 0.0017077371121558826, "clip_ratio/high_mean": 0.000597816728259204, "clip_ratio/low_mean": 0.00041782399648582214, "clip_ratio/low_min": 8.30675162433181e-06, "clip_ratio/region_mean": 0.0010156407151953317, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 2142.0, "completions/mean_length": 576.6707763671875, "completions/mean_terminated_length": 532.9276733398438, "completions/min_length": 39.0, "completions/min_terminated_length": 39.0, "epoch": 0.73724117818606, "grad_norm": 0.150390625, "learning_rate": 1e-06, "loss": -0.0014, "num_tokens": 45778330.0, "reward": 0.5491071939468384, "reward_std": 0.2764067053794861, "rewards/verify_math_reward/mean": 0.5491071343421936, "rewards/verify_math_reward/std": 0.49786055088043213, "step": 79 }, { "clip_ratio/high_max": 0.0015504330685871537, "clip_ratio/high_mean": 0.00042385151959933864, "clip_ratio/low_mean": 0.0004107067993572855, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008345583219124819, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 4092.0, "completions/mean_length": 574.9442138671875, "completions/mean_terminated_length": 531.1796875, "completions/min_length": 18.0, "completions/min_terminated_length": 18.0, "epoch": 0.7465733449985419, "grad_norm": 0.1455078125, "learning_rate": 1e-06, "loss": 0.0116, "num_tokens": 46338472.0, "reward": 0.5602678656578064, "reward_std": 0.27185171842575073, "rewards/verify_math_reward/mean": 0.5602678656578064, "rewards/verify_math_reward/std": 0.4966317117214203, "step": 80 }, { "clip_ratio/high_max": 0.0018751709330899757, "clip_ratio/high_mean": 0.000534818087999156, "clip_ratio/low_mean": 0.00041298053474747576, "clip_ratio/low_min": 1.201923078042455e-05, "clip_ratio/region_mean": 0.0009477986250203685, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 1882.0, "completions/mean_length": 644.7142944335938, "completions/mean_terminated_length": 593.902587890625, "completions/min_length": 32.0, "completions/min_terminated_length": 32.0, "epoch": 0.7559055118110236, "grad_norm": 0.14453125, "learning_rate": 1e-06, "loss": 0.0086, "num_tokens": 46958704.0, "reward": 0.5111607313156128, "reward_std": 0.2724490761756897, "rewards/verify_math_reward/mean": 0.5111607313156128, "rewards/verify_math_reward/std": 0.5001546144485474, "step": 81 }, { "clip_ratio/high_max": 0.001774657021087478, "clip_ratio/high_mean": 0.0005811095738863514, "clip_ratio/low_mean": 0.00030802302762822364, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.000889132607881038, "completions/clipped_ratio": 0.022321428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 4069.0, "completions/mean_length": 642.3326416015625, "completions/mean_terminated_length": 563.481689453125, "completions/min_length": 133.0, "completions/min_terminated_length": 133.0, "epoch": 0.7652376786235054, "grad_norm": 0.140625, "learning_rate": 1e-06, "loss": 0.0095, "num_tokens": 47546482.0, "reward": 0.543526828289032, "reward_std": 0.22124601900577545, "rewards/verify_math_reward/mean": 0.5435267686843872, "rewards/verify_math_reward/std": 0.49838000535964966, "step": 82 }, { "clip_ratio/high_max": 0.0017228152819370735, "clip_ratio/high_mean": 0.0005372254679514299, "clip_ratio/low_mean": 0.0002621714993438218, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007993969729795936, "completions/clipped_ratio": 0.010044642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 2364.0, "completions/mean_length": 585.703125, "completions/mean_terminated_length": 550.0856323242188, "completions/min_length": 79.0, "completions/min_terminated_length": 79.0, "epoch": 0.7745698454359872, "grad_norm": 0.134765625, "learning_rate": 1e-06, "loss": -0.0122, "num_tokens": 48119624.0, "reward": 0.6116071939468384, "reward_std": 0.21981680393218994, "rewards/verify_math_reward/mean": 0.6116071343421936, "rewards/verify_math_reward/std": 0.48765692114830017, "step": 83 }, { "clip_ratio/high_max": 0.0015164682645263383, "clip_ratio/high_mean": 0.0003703758887922959, "clip_ratio/low_mean": 0.00025774244113563327, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0006281183268583845, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 4013.0, "completions/mean_length": 631.4330444335938, "completions/mean_terminated_length": 584.4027099609375, "completions/min_length": 55.0, "completions/min_terminated_length": 55.0, "epoch": 0.7839020122484689, "grad_norm": 0.12158203125, "learning_rate": 1e-06, "loss": -0.0057, "num_tokens": 48731852.0, "reward": 0.4508928656578064, "reward_std": 0.20778609812259674, "rewards/verify_math_reward/mean": 0.4508928656578064, "rewards/verify_math_reward/std": 0.49786055088043213, "step": 84 }, { "clip_ratio/high_max": 0.001654619662076584, "clip_ratio/high_mean": 0.0005539177609534818, "clip_ratio/low_mean": 0.00027753965173360484, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008314574142787023, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3794.0, "completions/mean_length": 609.1239013671875, "completions/mean_terminated_length": 549.7559814453125, "completions/min_length": 29.0, "completions/min_terminated_length": 29.0, "epoch": 0.7932341790609507, "grad_norm": 0.1435546875, "learning_rate": 1e-06, "loss": -0.0047, "num_tokens": 49305915.0, "reward": 0.520089328289032, "reward_std": 0.22800637781620026, "rewards/verify_math_reward/mean": 0.5200892686843872, "rewards/verify_math_reward/std": 0.4998753070831299, "step": 85 }, { "clip_ratio/high_max": 0.0014105494983596145, "clip_ratio/high_mean": 0.0004022495686513139, "clip_ratio/low_mean": 0.00031966669246230595, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007219162589535699, "completions/clipped_ratio": 0.004464285714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 2128.0, "completions/mean_length": 560.828125, "completions/mean_terminated_length": 544.975341796875, "completions/min_length": 7.0, "completions/min_terminated_length": 7.0, "epoch": 0.8025663458734325, "grad_norm": 0.12890625, "learning_rate": 1e-06, "loss": -0.0073, "num_tokens": 49878897.0, "reward": 0.5703125, "reward_std": 0.2051176279783249, "rewards/verify_math_reward/mean": 0.5703125, "rewards/verify_math_reward/std": 0.49530795216560364, "step": 86 }, { "clip_ratio/high_max": 0.0018720411289905314, "clip_ratio/high_mean": 0.0005458629669874426, "clip_ratio/low_mean": 0.0002696333073117785, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008154962742992211, "completions/clipped_ratio": 0.0078125, "completions/max_length": 4096.0, "completions/max_terminated_length": 2253.0, "completions/mean_length": 522.9933471679688, "completions/mean_terminated_length": 494.8594055175781, "completions/min_length": 16.0, "completions/min_terminated_length": 16.0, "epoch": 0.8118985126859143, "grad_norm": 0.14453125, "learning_rate": 1e-06, "loss": 0.0133, "num_tokens": 50399491.0, "reward": 0.5848214626312256, "reward_std": 0.21879789233207703, "rewards/verify_math_reward/mean": 0.5848214030265808, "rewards/verify_math_reward/std": 0.49302801489830017, "step": 87 }, { "clip_ratio/high_max": 0.0016477267345180735, "clip_ratio/high_mean": 0.0004959221550961956, "clip_ratio/low_mean": 0.00031129847548072576, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008072206387623737, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 2472.0, "completions/mean_length": 572.3092041015625, "completions/mean_terminated_length": 512.314453125, "completions/min_length": 15.0, "completions/min_terminated_length": 15.0, "epoch": 0.821230679498396, "grad_norm": 0.138671875, "learning_rate": 1e-06, "loss": 0.007, "num_tokens": 50930912.0, "reward": 0.6071428656578064, "reward_std": 0.20921824872493744, "rewards/verify_math_reward/mean": 0.6071428656578064, "rewards/verify_math_reward/std": 0.48865827918052673, "step": 88 }, { "clip_ratio/high_max": 0.0017612755818845471, "clip_ratio/high_mean": 0.00045294952428776014, "clip_ratio/low_mean": 0.0003718053224019968, "clip_ratio/low_min": 2.210980892414227e-05, "clip_ratio/region_mean": 0.0008247548489634937, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 2488.0, "completions/mean_length": 598.7467041015625, "completions/mean_terminated_length": 555.2779541015625, "completions/min_length": 122.0, "completions/min_terminated_length": 122.0, "epoch": 0.8305628463108778, "grad_norm": 0.1416015625, "learning_rate": 1e-06, "loss": -0.008, "num_tokens": 51524181.0, "reward": 0.5078125, "reward_std": 0.21928435564041138, "rewards/verify_math_reward/mean": 0.5078125, "rewards/verify_math_reward/std": 0.5002182126045227, "step": 89 }, { "clip_ratio/high_max": 0.0015908889745333, "clip_ratio/high_mean": 0.0004882191981323558, "clip_ratio/low_mean": 0.00028090034481920156, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007691195569350384, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 2957.0, "completions/mean_length": 603.0725708007812, "completions/mean_terminated_length": 559.6576538085938, "completions/min_length": 14.0, "completions/min_terminated_length": 14.0, "epoch": 0.8398950131233596, "grad_norm": 0.1328125, "learning_rate": 1e-06, "loss": 0.0088, "num_tokens": 52107030.0, "reward": 0.5178571939468384, "reward_std": 0.22804872691631317, "rewards/verify_math_reward/mean": 0.5178571343421936, "rewards/verify_math_reward/std": 0.4999600946903229, "step": 90 }, { "clip_ratio/high_max": 0.0013870225702703465, "clip_ratio/high_mean": 0.00045190190371613426, "clip_ratio/low_mean": 0.00035744704405260563, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008093489577731816, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3973.0, "completions/mean_length": 609.4330444335938, "completions/mean_terminated_length": 550.0703735351562, "completions/min_length": 20.0, "completions/min_terminated_length": 20.0, "epoch": 0.8492271799358414, "grad_norm": 0.1396484375, "learning_rate": 1e-06, "loss": -0.0079, "num_tokens": 52679602.0, "reward": 0.5647321939468384, "reward_std": 0.2426195591688156, "rewards/verify_math_reward/mean": 0.5647321343421936, "rewards/verify_math_reward/std": 0.49606895446777344, "step": 91 }, { "clip_ratio/high_max": 0.001600240262632724, "clip_ratio/high_mean": 0.0005148330510564847, "clip_ratio/low_mean": 0.0003026256623570589, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008174586901077419, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 2956.0, "completions/mean_length": 613.90625, "completions/mean_terminated_length": 558.6349487304688, "completions/min_length": 56.0, "completions/min_terminated_length": 56.0, "epoch": 0.8585593467483231, "grad_norm": 0.1279296875, "learning_rate": 1e-06, "loss": 0.0042, "num_tokens": 53259398.0, "reward": 0.5848214626312256, "reward_std": 0.21797305345535278, "rewards/verify_math_reward/mean": 0.5848214030265808, "rewards/verify_math_reward/std": 0.49302801489830017, "step": 92 }, { "clip_ratio/high_max": 0.0018657794225873658, "clip_ratio/high_mean": 0.0006483363099505368, "clip_ratio/low_mean": 0.00034770081265378394, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009960370989574585, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 3717.0, "completions/mean_length": 586.0256958007812, "completions/mean_terminated_length": 542.3988647460938, "completions/min_length": 129.0, "completions/min_terminated_length": 129.0, "epoch": 0.8678915135608049, "grad_norm": 0.1533203125, "learning_rate": 1e-06, "loss": 0.0216, "num_tokens": 53836957.0, "reward": 0.5647321939468384, "reward_std": 0.2878982126712799, "rewards/verify_math_reward/mean": 0.5647321343421936, "rewards/verify_math_reward/std": 0.49606895446777344, "step": 93 }, { "clip_ratio/high_max": 0.0016519532255188096, "clip_ratio/high_mean": 0.0005057442658653599, "clip_ratio/low_mean": 0.0003273701208854618, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008331143963005161, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 4040.0, "completions/mean_length": 611.1116333007812, "completions/mean_terminated_length": 563.8054809570312, "completions/min_length": 74.0, "completions/min_terminated_length": 74.0, "epoch": 0.8772236803732867, "grad_norm": 0.1435546875, "learning_rate": 1e-06, "loss": 0.0194, "num_tokens": 54412113.0, "reward": 0.5424107313156128, "reward_std": 0.23409229516983032, "rewards/verify_math_reward/mean": 0.5424107313156128, "rewards/verify_math_reward/std": 0.4984763562679291, "step": 94 }, { "clip_ratio/high_max": 0.0016927642309383373, "clip_ratio/high_mean": 0.000469393598791612, "clip_ratio/low_mean": 0.00029131326095921395, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007607068546349183, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3994.0, "completions/mean_length": 648.1741333007812, "completions/mean_terminated_length": 589.4710693359375, "completions/min_length": 123.0, "completions/min_terminated_length": 123.0, "epoch": 0.8865558471857684, "grad_norm": 0.12109375, "learning_rate": 1e-06, "loss": -0.012, "num_tokens": 55018933.0, "reward": 0.5089285969734192, "reward_std": 0.20644059777259827, "rewards/verify_math_reward/mean": 0.5089285969734192, "rewards/verify_math_reward/std": 0.5001994967460632, "step": 95 }, { "clip_ratio/high_max": 0.0017833485844676034, "clip_ratio/high_mean": 0.0006488943522526824, "clip_ratio/low_mean": 0.00030729869297374535, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009561930583004141, "completions/clipped_ratio": 0.0189732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 2564.0, "completions/mean_length": 627.1004638671875, "completions/mean_terminated_length": 560.0113525390625, "completions/min_length": 101.0, "completions/min_terminated_length": 101.0, "epoch": 0.8958880139982502, "grad_norm": 0.1474609375, "learning_rate": 1e-06, "loss": -0.0016, "num_tokens": 55594639.0, "reward": 0.5546875, "reward_std": 0.26103493571281433, "rewards/verify_math_reward/mean": 0.5546875, "rewards/verify_math_reward/std": 0.4972778558731079, "step": 96 }, { "clip_ratio/high_max": 0.0013815107549817185, "clip_ratio/high_mean": 0.00044456125920078193, "clip_ratio/low_mean": 0.0003650004285873365, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.000809561681762716, "completions/clipped_ratio": 0.010044642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3633.0, "completions/mean_length": 610.333740234375, "completions/mean_terminated_length": 574.9661254882812, "completions/min_length": 146.0, "completions/min_terminated_length": 146.0, "epoch": 0.905220180810732, "grad_norm": 0.138671875, "learning_rate": 1e-06, "loss": 0.0125, "num_tokens": 56195930.0, "reward": 0.5290178656578064, "reward_std": 0.22154095768928528, "rewards/verify_math_reward/mean": 0.5290178656578064, "rewards/verify_math_reward/std": 0.49943605065345764, "step": 97 }, { "clip_ratio/high_max": 0.0015758289937366499, "clip_ratio/high_mean": 0.00048606450809529633, "clip_ratio/low_mean": 0.00035013958085983177, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008362040998690645, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3612.0, "completions/mean_length": 678.099365234375, "completions/mean_terminated_length": 615.9556884765625, "completions/min_length": 39.0, "completions/min_terminated_length": 39.0, "epoch": 0.9145523476232138, "grad_norm": 0.123046875, "learning_rate": 1e-06, "loss": 0.0026, "num_tokens": 56823899.0, "reward": 0.4698660969734192, "reward_std": 0.225752592086792, "rewards/verify_math_reward/mean": 0.4698660671710968, "rewards/verify_math_reward/std": 0.49936988949775696, "step": 98 }, { "clip_ratio/high_max": 0.0015351482543337625, "clip_ratio/high_mean": 0.0004317303132665984, "clip_ratio/low_mean": 0.0002192336081634494, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.000650963912903535, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3610.0, "completions/mean_length": 640.6105346679688, "completions/mean_terminated_length": 577.7852172851562, "completions/min_length": 80.0, "completions/min_terminated_length": 80.0, "epoch": 0.9238845144356955, "grad_norm": 0.12451171875, "learning_rate": 1e-06, "loss": 0.0173, "num_tokens": 57431566.0, "reward": 0.4843750298023224, "reward_std": 0.2132733017206192, "rewards/verify_math_reward/mean": 0.484375, "rewards/verify_math_reward/std": 0.5000349283218384, "step": 99 }, { "clip_ratio/high_max": 0.001455231406907842, "clip_ratio/high_mean": 0.0004697137283073971, "clip_ratio/low_mean": 0.00026407662903693563, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007337903650750377, "completions/clipped_ratio": 0.021205357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 2518.0, "completions/mean_length": 644.2890625, "completions/mean_terminated_length": 569.508544921875, "completions/min_length": 71.0, "completions/min_terminated_length": 71.0, "epoch": 0.9332166812481774, "grad_norm": 0.130859375, "learning_rate": 1e-06, "loss": 0.0143, "num_tokens": 58021289.0, "reward": 0.53125, "reward_std": 0.2083178460597992, "rewards/verify_math_reward/mean": 0.53125, "rewards/verify_math_reward/std": 0.4993011951446533, "step": 100 }, { "clip_ratio/high_max": 0.001401610332322889, "clip_ratio/high_mean": 0.0004110899087663711, "clip_ratio/low_mean": 0.0003003903148055542, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.000711480231530004, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3700.0, "completions/mean_length": 611.7701416015625, "completions/mean_terminated_length": 560.473388671875, "completions/min_length": 53.0, "completions/min_terminated_length": 53.0, "epoch": 0.9425488480606591, "grad_norm": 0.12255859375, "learning_rate": 1e-06, "loss": -0.0039, "num_tokens": 58601707.0, "reward": 0.527901828289032, "reward_std": 0.21383923292160034, "rewards/verify_math_reward/mean": 0.5279017686843872, "rewards/verify_math_reward/std": 0.49949970841407776, "step": 101 }, { "clip_ratio/high_max": 0.0014903666742611676, "clip_ratio/high_mean": 0.0004585197335700286, "clip_ratio/low_mean": 0.00033919756333489204, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007977173081599176, "completions/clipped_ratio": 0.0200892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3685.0, "completions/mean_length": 635.4330444335938, "completions/mean_terminated_length": 564.4874877929688, "completions/min_length": 116.0, "completions/min_terminated_length": 116.0, "epoch": 0.9518810148731408, "grad_norm": 0.1328125, "learning_rate": 1e-06, "loss": 0.007, "num_tokens": 59197239.0, "reward": 0.504464328289032, "reward_std": 0.23217298090457916, "rewards/verify_math_reward/mean": 0.5044642686843872, "rewards/verify_math_reward/std": 0.5002593398094177, "step": 102 }, { "clip_ratio/high_max": 0.0013767386080871802, "clip_ratio/high_mean": 0.00037214821895759087, "clip_ratio/low_mean": 0.0003223466443387224, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0006944948577256582, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3788.0, "completions/mean_length": 686.8438110351562, "completions/mean_terminated_length": 624.8590698242188, "completions/min_length": 124.0, "completions/min_terminated_length": 124.0, "epoch": 0.9612131816856226, "grad_norm": 0.1220703125, "learning_rate": 1e-06, "loss": -0.0013, "num_tokens": 59834579.0, "reward": 0.494419664144516, "reward_std": 0.22564129531383514, "rewards/verify_math_reward/mean": 0.4944196343421936, "rewards/verify_math_reward/std": 0.5002480745315552, "step": 103 }, { "clip_ratio/high_max": 0.0015710645147919422, "clip_ratio/high_mean": 0.00045990343096491415, "clip_ratio/low_mean": 0.00040235662572740694, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.000862260055328079, "completions/clipped_ratio": 0.0189732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 2159.0, "completions/mean_length": 611.8058471679688, "completions/mean_terminated_length": 544.4208984375, "completions/min_length": 86.0, "completions/min_terminated_length": 86.0, "epoch": 0.9705453484981044, "grad_norm": 0.1416015625, "learning_rate": 1e-06, "loss": -0.0011, "num_tokens": 60406069.0, "reward": 0.5446428656578064, "reward_std": 0.23514607548713684, "rewards/verify_math_reward/mean": 0.5446428656578064, "rewards/verify_math_reward/std": 0.49828118085861206, "step": 104 }, { "clip_ratio/high_max": 0.0015253851961460896, "clip_ratio/high_mean": 0.0004695453960721352, "clip_ratio/low_mean": 0.0003123652346630479, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007819106272108911, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 4033.0, "completions/mean_length": 578.4296875, "completions/mean_terminated_length": 518.5391845703125, "completions/min_length": 106.0, "completions/min_terminated_length": 106.0, "epoch": 0.9798775153105862, "grad_norm": 0.146484375, "learning_rate": 1e-06, "loss": -0.0096, "num_tokens": 60950798.0, "reward": 0.5100446939468384, "reward_std": 0.23822499811649323, "rewards/verify_math_reward/mean": 0.5100446343421936, "rewards/verify_math_reward/std": 0.5001782774925232, "step": 105 }, { "clip_ratio/high_max": 0.0015600823371642036, "clip_ratio/high_mean": 0.00041957787686897063, "clip_ratio/low_mean": 0.00031790625700978126, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.000737484128876531, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 3423.0, "completions/mean_length": 594.1082763671875, "completions/mean_terminated_length": 550.5819091796875, "completions/min_length": 105.0, "completions/min_terminated_length": 105.0, "epoch": 0.9892096821230679, "grad_norm": 0.126953125, "learning_rate": 1e-06, "loss": 0.0023, "num_tokens": 61520255.0, "reward": 0.5145089626312256, "reward_std": 0.2048247903585434, "rewards/verify_math_reward/mean": 0.5145089030265808, "rewards/verify_math_reward/std": 0.5000685453414917, "step": 106 }, { "clip_ratio/high_max": 0.0016536110906599788, "clip_ratio/high_mean": 0.00047350559771075496, "clip_ratio/low_mean": 0.0002858078601093439, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007593134505441412, "completions/clipped_ratio": 0.017045454545454586, "completions/max_length": 4096.0, "completions/max_terminated_length": 2737.0, "completions/mean_length": 574.446044921875, "completions/mean_terminated_length": 513.3786010742188, "completions/min_length": 17.0, "completions/min_terminated_length": 17.0, "epoch": 0.9985418489355498, "grad_norm": 0.1416015625, "learning_rate": 1e-06, "loss": 0.0073, "num_tokens": 62094613.0, "reward": 0.5245535969734192, "reward_std": 0.24536897242069244, "rewards/verify_math_reward/mean": 0.5245535969734192, "rewards/verify_math_reward/std": 0.4996756911277771, "step": 107 }, { "clip_ratio/high_max": 0.001530498701868055, "clip_ratio/high_mean": 0.00045928891904623015, "clip_ratio/low_mean": 0.00035026266925797245, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008095516059256624, "completions/clipped_ratio": 0.008928571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 4052.0, "completions/mean_length": 585.390625, "completions/mean_terminated_length": 553.7635498046875, "completions/min_length": 26.0, "completions/min_terminated_length": 26.0, "epoch": 1.0093321668124817, "grad_norm": 0.1318359375, "learning_rate": 1e-06, "loss": -0.0047, "num_tokens": 62676779.0, "reward": 0.5345982313156128, "reward_std": 0.2416074424982071, "rewards/verify_math_reward/mean": 0.5345982313156128, "rewards/verify_math_reward/std": 0.4990801215171814, "step": 108 }, { "clip_ratio/high_max": 0.0019083833612967283, "clip_ratio/high_mean": 0.0006770117274754739, "clip_ratio/low_mean": 0.00033359393023602024, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001010605651572405, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 4018.0, "completions/mean_length": 589.703125, "completions/mean_terminated_length": 546.1220703125, "completions/min_length": 34.0, "completions/min_terminated_length": 34.0, "epoch": 1.0186643336249634, "grad_norm": 0.154296875, "learning_rate": 1e-06, "loss": 0.0188, "num_tokens": 63256545.0, "reward": 0.5558035969734192, "reward_std": 0.2604754865169525, "rewards/verify_math_reward/mean": 0.5558035969734192, "rewards/verify_math_reward/std": 0.49715372920036316, "step": 109 }, { "clip_ratio/high_max": 0.0015690998388890876, "clip_ratio/high_mean": 0.0004443408483894018, "clip_ratio/low_mean": 0.0003363973024761435, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.000780738146204385, "completions/clipped_ratio": 0.021205357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3858.0, "completions/mean_length": 654.1785888671875, "completions/mean_terminated_length": 579.6123046875, "completions/min_length": 118.0, "completions/min_terminated_length": 118.0, "epoch": 1.0279965004374454, "grad_norm": 0.130859375, "learning_rate": 1e-06, "loss": 0.0081, "num_tokens": 63857865.0, "reward": 0.4988839626312256, "reward_std": 0.2183186411857605, "rewards/verify_math_reward/mean": 0.4988839328289032, "rewards/verify_math_reward/std": 0.5002779960632324, "step": 110 }, { "clip_ratio/high_max": 0.0015895372371232952, "clip_ratio/high_mean": 0.0004398960209073266, "clip_ratio/low_mean": 0.0003045257033136295, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.000744421729223177, "completions/clipped_ratio": 0.0234375, "completions/max_length": 4096.0, "completions/max_terminated_length": 2673.0, "completions/mean_length": 637.169677734375, "completions/mean_terminated_length": 554.15771484375, "completions/min_length": 84.0, "completions/min_terminated_length": 84.0, "epoch": 1.0373286672499271, "grad_norm": 0.1494140625, "learning_rate": 1e-06, "loss": -0.0036, "num_tokens": 64447129.0, "reward": 0.455357164144516, "reward_std": 0.23278027772903442, "rewards/verify_math_reward/mean": 0.4553571343421936, "rewards/verify_math_reward/std": 0.49828118085861206, "step": 111 }, { "clip_ratio/high_max": 0.001427862858690787, "clip_ratio/high_mean": 0.0004775260467795306, "clip_ratio/low_mean": 0.0003213918915889735, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007989179357537068, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3104.0, "completions/mean_length": 585.0949096679688, "completions/mean_terminated_length": 533.4053955078125, "completions/min_length": 94.0, "completions/min_terminated_length": 94.0, "epoch": 1.0466608340624088, "grad_norm": 0.1357421875, "learning_rate": 1e-06, "loss": -0.006, "num_tokens": 65012702.0, "reward": 0.5558035969734192, "reward_std": 0.24092638492584229, "rewards/verify_math_reward/mean": 0.5558035969734192, "rewards/verify_math_reward/std": 0.49715372920036316, "step": 112 }, { "clip_ratio/high_max": 0.00150731401390658, "clip_ratio/high_mean": 0.0004276625006696122, "clip_ratio/low_mean": 0.0003573052471210758, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007849677404010436, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 3862.0, "completions/mean_length": 602.1049194335938, "completions/mean_terminated_length": 558.677978515625, "completions/min_length": 154.0, "completions/min_terminated_length": 154.0, "epoch": 1.0559930008748906, "grad_norm": 0.130859375, "learning_rate": 1e-06, "loss": -0.0044, "num_tokens": 65597764.0, "reward": 0.5267857313156128, "reward_std": 0.22913458943367004, "rewards/verify_math_reward/mean": 0.5267857313156128, "rewards/verify_math_reward/std": 0.4995608627796173, "step": 113 }, { "clip_ratio/high_max": 0.0013777668355032802, "clip_ratio/high_mean": 0.00040216286163285986, "clip_ratio/low_mean": 0.00028282253288125503, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0006849853864423494, "completions/clipped_ratio": 0.0200892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 2109.0, "completions/mean_length": 622.2020263671875, "completions/mean_terminated_length": 550.9852294921875, "completions/min_length": 100.0, "completions/min_terminated_length": 100.0, "epoch": 1.0653251676873725, "grad_norm": 0.1298828125, "learning_rate": 1e-06, "loss": 0.0007, "num_tokens": 66192977.0, "reward": 0.4720982313156128, "reward_std": 0.20162531733512878, "rewards/verify_math_reward/mean": 0.4720982015132904, "rewards/verify_math_reward/std": 0.49949967861175537, "step": 114 }, { "clip_ratio/high_max": 0.0019017514350707643, "clip_ratio/high_mean": 0.0005487539110617945, "clip_ratio/low_mean": 0.0002849090384415831, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008336629598488798, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3430.0, "completions/mean_length": 602.4140625, "completions/mean_terminated_length": 550.9796142578125, "completions/min_length": 87.0, "completions/min_terminated_length": 87.0, "epoch": 1.0746573344998542, "grad_norm": 0.123046875, "learning_rate": 1e-06, "loss": 0.0025, "num_tokens": 66767404.0, "reward": 0.5256696939468384, "reward_std": 0.18926939368247986, "rewards/verify_math_reward/mean": 0.5256696343421936, "rewards/verify_math_reward/std": 0.4996195137500763, "step": 115 }, { "clip_ratio/high_max": 0.0016422621683886973, "clip_ratio/high_mean": 0.0005316739079717081, "clip_ratio/low_mean": 0.0003052589680692108, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008369328761546058, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 3778.0, "completions/mean_length": 596.8504638671875, "completions/mean_terminated_length": 553.3582153320312, "completions/min_length": 126.0, "completions/min_terminated_length": 126.0, "epoch": 1.083989501312336, "grad_norm": 0.130859375, "learning_rate": 1e-06, "loss": -0.0088, "num_tokens": 67344966.0, "reward": 0.5814732313156128, "reward_std": 0.2368360310792923, "rewards/verify_math_reward/mean": 0.5814732313156128, "rewards/verify_math_reward/std": 0.4935929775238037, "step": 116 }, { "clip_ratio/high_max": 0.0015750610418763245, "clip_ratio/high_mean": 0.00046872477969372994, "clip_ratio/low_mean": 0.00023229588077811059, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.000701020672750019, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 2539.0, "completions/mean_length": 581.203125, "completions/mean_terminated_length": 537.5164184570312, "completions/min_length": 120.0, "completions/min_terminated_length": 120.0, "epoch": 1.0933216681248177, "grad_norm": 0.1328125, "learning_rate": 1e-06, "loss": 0.0101, "num_tokens": 67905788.0, "reward": 0.520089328289032, "reward_std": 0.23138132691383362, "rewards/verify_math_reward/mean": 0.5200892686843872, "rewards/verify_math_reward/std": 0.4998753070831299, "step": 117 }, { "clip_ratio/high_max": 0.0017898867627081927, "clip_ratio/high_mean": 0.0006009550015733112, "clip_ratio/low_mean": 0.0003076823650189908, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.000908637357497355, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3980.0, "completions/mean_length": 606.8638916015625, "completions/mean_terminated_length": 551.480712890625, "completions/min_length": 22.0, "completions/min_terminated_length": 22.0, "epoch": 1.1026538349372994, "grad_norm": 0.1435546875, "learning_rate": 1e-06, "loss": -0.0089, "num_tokens": 68466546.0, "reward": 0.5602678656578064, "reward_std": 0.2578386664390564, "rewards/verify_math_reward/mean": 0.5602678656578064, "rewards/verify_math_reward/std": 0.4966317415237427, "step": 118 }, { "clip_ratio/high_max": 0.0014992362812336069, "clip_ratio/high_mean": 0.00047664243606959644, "clip_ratio/low_mean": 0.0003313528115995723, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008079952503976529, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 2877.0, "completions/mean_length": 570.1998291015625, "completions/mean_terminated_length": 514.2346801757812, "completions/min_length": 11.0, "completions/min_terminated_length": 11.0, "epoch": 1.1119860017497813, "grad_norm": 0.1474609375, "learning_rate": 1e-06, "loss": -0.001, "num_tokens": 68993725.0, "reward": 0.590401828289032, "reward_std": 0.23255206644535065, "rewards/verify_math_reward/mean": 0.5904017686843872, "rewards/verify_math_reward/std": 0.49203425645828247, "step": 119 }, { "clip_ratio/high_max": 0.0017286572456214344, "clip_ratio/high_mean": 0.0005849628532814677, "clip_ratio/low_mean": 0.00034277511713298736, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009277379631384974, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 3482.0, "completions/mean_length": 610.125, "completions/mean_terminated_length": 570.7810668945312, "completions/min_length": 22.0, "completions/min_terminated_length": 22.0, "epoch": 1.121318168562263, "grad_norm": 0.1416015625, "learning_rate": 1e-06, "loss": 0.0049, "num_tokens": 69596077.0, "reward": 0.546875, "reward_std": 0.2594548761844635, "rewards/verify_math_reward/mean": 0.546875, "rewards/verify_math_reward/std": 0.4980759024620056, "step": 120 }, { "clip_ratio/high_max": 0.0015606451106577879, "clip_ratio/high_mean": 0.0004644531281883246, "clip_ratio/low_mean": 0.0003220886408143997, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.000786541769230098, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3428.0, "completions/mean_length": 618.7846069335938, "completions/mean_terminated_length": 563.5906982421875, "completions/min_length": 4.0, "completions/min_terminated_length": 4.0, "epoch": 1.1306503353747448, "grad_norm": 0.12890625, "learning_rate": 1e-06, "loss": -0.0022, "num_tokens": 70190364.0, "reward": 0.5066964626312256, "reward_std": 0.20136386156082153, "rewards/verify_math_reward/mean": 0.5066964030265808, "rewards/verify_math_reward/std": 0.5002344250679016, "step": 121 }, { "clip_ratio/high_max": 0.0019570581998777925, "clip_ratio/high_mean": 0.0006037022139935289, "clip_ratio/low_mean": 0.00031340639498012024, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009171086157948594, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3824.0, "completions/mean_length": 610.0, "completions/mean_terminated_length": 546.6181640625, "completions/min_length": 43.0, "completions/min_terminated_length": 43.0, "epoch": 1.1399825021872265, "grad_norm": 0.1435546875, "learning_rate": 1e-06, "loss": 0.0002, "num_tokens": 70749956.0, "reward": 0.6071428656578064, "reward_std": 0.25292134284973145, "rewards/verify_math_reward/mean": 0.6071428656578064, "rewards/verify_math_reward/std": 0.48865827918052673, "step": 122 }, { "clip_ratio/high_max": 0.001493587864388246, "clip_ratio/high_mean": 0.0004782370549492043, "clip_ratio/low_mean": 0.0003309355058718211, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008091725567282992, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 3851.0, "completions/mean_length": 619.6027221679688, "completions/mean_terminated_length": 580.36572265625, "completions/min_length": 9.0, "completions/min_terminated_length": 9.0, "epoch": 1.1493146689997085, "grad_norm": 0.13671875, "learning_rate": 1e-06, "loss": -0.0024, "num_tokens": 71354248.0, "reward": 0.5569196939468384, "reward_std": 0.2237972617149353, "rewards/verify_math_reward/mean": 0.5569196343421936, "rewards/verify_math_reward/std": 0.4970270097255707, "step": 123 }, { "clip_ratio/high_max": 0.0016810951947263675, "clip_ratio/high_mean": 0.0005409228716644066, "clip_ratio/low_mean": 0.00027879514937012573, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008197180222850875, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3330.0, "completions/mean_length": 607.2957763671875, "completions/mean_terminated_length": 555.9331665039062, "completions/min_length": 83.0, "completions/min_terminated_length": 83.0, "epoch": 1.1586468358121902, "grad_norm": 0.1474609375, "learning_rate": 1e-06, "loss": 0.0064, "num_tokens": 71934649.0, "reward": 0.5680803656578064, "reward_std": 0.22320246696472168, "rewards/verify_math_reward/mean": 0.5680803656578064, "rewards/verify_math_reward/std": 0.4956200420856476, "step": 124 }, { "clip_ratio/high_max": 0.001345407143162447, "clip_ratio/high_mean": 0.0003656016708646348, "clip_ratio/low_mean": 0.00030714410536347714, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0006727457744091225, "completions/clipped_ratio": 0.0189732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 4025.0, "completions/mean_length": 653.8605346679688, "completions/mean_terminated_length": 587.2889404296875, "completions/min_length": 106.0, "completions/min_terminated_length": 106.0, "epoch": 1.167979002624672, "grad_norm": 0.12255859375, "learning_rate": 1e-06, "loss": 0.0041, "num_tokens": 72541220.0, "reward": 0.4687500298023224, "reward_std": 0.1849105805158615, "rewards/verify_math_reward/mean": 0.46875, "rewards/verify_math_reward/std": 0.4993011951446533, "step": 125 }, { "clip_ratio/high_max": 0.0015350585535998107, "clip_ratio/high_mean": 0.00047525672493975435, "clip_ratio/low_mean": 0.0002555170239020299, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007307737505470868, "completions/clipped_ratio": 0.004464285714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 1873.0, "completions/mean_length": 559.700927734375, "completions/mean_terminated_length": 543.8430786132812, "completions/min_length": 124.0, "completions/min_terminated_length": 124.0, "epoch": 1.1773111694371536, "grad_norm": 0.11962890625, "learning_rate": 1e-06, "loss": -0.0021, "num_tokens": 73114808.0, "reward": 0.5837053656578064, "reward_std": 0.2014077603816986, "rewards/verify_math_reward/mean": 0.5837053656578064, "rewards/verify_math_reward/std": 0.49321895837783813, "step": 126 }, { "clip_ratio/high_max": 0.0018995921382156666, "clip_ratio/high_mean": 0.0005328663756927199, "clip_ratio/low_mean": 0.000282887822208977, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008157542024491704, "completions/clipped_ratio": 0.008928571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3492.0, "completions/mean_length": 575.5703125, "completions/mean_terminated_length": 543.854736328125, "completions/min_length": 102.0, "completions/min_terminated_length": 102.0, "epoch": 1.1866433362496354, "grad_norm": 0.1298828125, "learning_rate": 1e-06, "loss": 0.0165, "num_tokens": 73680351.0, "reward": 0.5948660969734192, "reward_std": 0.20970863103866577, "rewards/verify_math_reward/mean": 0.5948660969734192, "rewards/verify_math_reward/std": 0.49119213223457336, "step": 127 }, { "clip_ratio/high_max": 0.0015654981434636284, "clip_ratio/high_mean": 0.00047197203639370855, "clip_ratio/low_mean": 0.00032296653432695166, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007949385735628312, "completions/clipped_ratio": 0.0189732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3145.0, "completions/mean_length": 630.0390625, "completions/mean_terminated_length": 563.0068359375, "completions/min_length": 12.0, "completions/min_terminated_length": 12.0, "epoch": 1.1959755030621173, "grad_norm": 0.1220703125, "learning_rate": 1e-06, "loss": 0.0024, "num_tokens": 74267154.0, "reward": 0.5290178656578064, "reward_std": 0.24348537623882294, "rewards/verify_math_reward/mean": 0.5290178656578064, "rewards/verify_math_reward/std": 0.49943605065345764, "step": 128 }, { "clip_ratio/high_max": 0.0017097643831220921, "clip_ratio/high_mean": 0.000537017465376266, "clip_ratio/low_mean": 0.00046191234196157893, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009989298032451188, "completions/clipped_ratio": 0.0200892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3274.0, "completions/mean_length": 629.2578125, "completions/mean_terminated_length": 558.1856689453125, "completions/min_length": 2.0, "completions/min_terminated_length": 2.0, "epoch": 1.205307669874599, "grad_norm": 0.1396484375, "learning_rate": 1e-06, "loss": -0.0019, "num_tokens": 74850329.0, "reward": 0.520089328289032, "reward_std": 0.2848559021949768, "rewards/verify_math_reward/mean": 0.5200892686843872, "rewards/verify_math_reward/std": 0.4998753070831299, "step": 129 }, { "clip_ratio/high_max": 0.0014128207740213838, "clip_ratio/high_mean": 0.00044809176881699386, "clip_ratio/low_mean": 0.0004100578944417066, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008581496549595613, "completions/clipped_ratio": 0.0234375, "completions/max_length": 4096.0, "completions/max_terminated_length": 3684.0, "completions/mean_length": 674.7042846679688, "completions/mean_terminated_length": 592.5931396484375, "completions/min_length": 49.0, "completions/min_terminated_length": 49.0, "epoch": 1.2146398366870808, "grad_norm": 0.1259765625, "learning_rate": 1e-06, "loss": -0.0156, "num_tokens": 75461992.0, "reward": 0.463169664144516, "reward_std": 0.22529610991477966, "rewards/verify_math_reward/mean": 0.4631696343421936, "rewards/verify_math_reward/std": 0.49892017245292664, "step": 130 }, { "clip_ratio/high_max": 0.00170088264712831, "clip_ratio/high_mean": 0.0005455831060316996, "clip_ratio/low_mean": 0.00022974602575231984, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007753291392873507, "completions/clipped_ratio": 0.006696428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 4091.0, "completions/mean_length": 547.3928833007812, "completions/mean_terminated_length": 523.4696655273438, "completions/min_length": 75.0, "completions/min_terminated_length": 75.0, "epoch": 1.2239720034995625, "grad_norm": 0.138671875, "learning_rate": 1e-06, "loss": 0.0008, "num_tokens": 76013032.0, "reward": 0.582589328289032, "reward_std": 0.2192518711090088, "rewards/verify_math_reward/mean": 0.5825892686843872, "rewards/verify_math_reward/std": 0.493407279253006, "step": 131 }, { "clip_ratio/high_max": 0.0014436859664783697, "clip_ratio/high_mean": 0.0004139404118177481, "clip_ratio/low_mean": 0.0003627363410032558, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007766767539578723, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 3784.0, "completions/mean_length": 575.0123291015625, "completions/mean_terminated_length": 531.2485961914062, "completions/min_length": 86.0, "completions/min_terminated_length": 86.0, "epoch": 1.2333041703120444, "grad_norm": 0.142578125, "learning_rate": 1e-06, "loss": 0.0039, "num_tokens": 76568683.0, "reward": 0.5345982313156128, "reward_std": 0.21838393807411194, "rewards/verify_math_reward/mean": 0.5345982313156128, "rewards/verify_math_reward/std": 0.4990801215171814, "step": 132 }, { "clip_ratio/high_max": 0.0016194839154195506, "clip_ratio/high_mean": 0.0004929634028485452, "clip_ratio/low_mean": 0.0002613802015503097, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007543436026935524, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3628.0, "completions/mean_length": 588.1283569335938, "completions/mean_terminated_length": 536.4835815429688, "completions/min_length": 126.0, "completions/min_terminated_length": 126.0, "epoch": 1.2426363371245261, "grad_norm": 0.1279296875, "learning_rate": 1e-06, "loss": -0.0057, "num_tokens": 77133270.0, "reward": 0.5212053656578064, "reward_std": 0.20613820850849152, "rewards/verify_math_reward/mean": 0.5212053656578064, "rewards/verify_math_reward/std": 0.49982914328575134, "step": 133 }, { "clip_ratio/high_max": 0.0018116976552846609, "clip_ratio/high_mean": 0.0005805711366519972, "clip_ratio/low_mean": 0.00030581744613300543, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008863885868777288, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 2496.0, "completions/mean_length": 585.453125, "completions/mean_terminated_length": 537.7986450195312, "completions/min_length": 87.0, "completions/min_terminated_length": 87.0, "epoch": 1.2519685039370079, "grad_norm": 0.1318359375, "learning_rate": 1e-06, "loss": 0.0049, "num_tokens": 77689948.0, "reward": 0.5792410969734192, "reward_std": 0.23405978083610535, "rewards/verify_math_reward/mean": 0.5792410969734192, "rewards/verify_math_reward/std": 0.49395665526390076, "step": 134 }, { "clip_ratio/high_max": 0.0019365234938959475, "clip_ratio/high_mean": 0.0006199660264201157, "clip_ratio/low_mean": 0.00035826957866902376, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009782355946299504, "completions/clipped_ratio": 0.0189732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3678.0, "completions/mean_length": 568.747802734375, "completions/mean_terminated_length": 500.5301208496094, "completions/min_length": 6.0, "completions/min_terminated_length": 6.0, "epoch": 1.2613006707494896, "grad_norm": 0.162109375, "learning_rate": 1e-06, "loss": -0.0084, "num_tokens": 78213930.0, "reward": 0.6049107313156128, "reward_std": 0.2616020143032074, "rewards/verify_math_reward/mean": 0.6049107313156128, "rewards/verify_math_reward/std": 0.48914292454719543, "step": 135 }, { "clip_ratio/high_max": 0.0016102466006486793, "clip_ratio/high_mean": 0.00047373598295052943, "clip_ratio/low_mean": 0.00022196987777078903, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0006957058722036891, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 3942.0, "completions/mean_length": 569.9006958007812, "completions/mean_terminated_length": 526.073486328125, "completions/min_length": 123.0, "completions/min_terminated_length": 123.0, "epoch": 1.2706328375619713, "grad_norm": 0.12890625, "learning_rate": 1e-06, "loss": 0.0049, "num_tokens": 78771105.0, "reward": 0.546875, "reward_std": 0.1892266422510147, "rewards/verify_math_reward/mean": 0.546875, "rewards/verify_math_reward/std": 0.4980759024620056, "step": 136 }, { "clip_ratio/high_max": 0.0017010311821650248, "clip_ratio/high_mean": 0.0004654307119835721, "clip_ratio/low_mean": 0.00017142242313639144, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0006368531379621345, "completions/clipped_ratio": 0.021205357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3878.0, "completions/mean_length": 637.5770263671875, "completions/mean_terminated_length": 562.6510620117188, "completions/min_length": 111.0, "completions/min_terminated_length": 111.0, "epoch": 1.2799650043744533, "grad_norm": 0.125, "learning_rate": 1e-06, "loss": -0.0038, "num_tokens": 79350774.0, "reward": 0.559151828289032, "reward_std": 0.19730672240257263, "rewards/verify_math_reward/mean": 0.5591517686843872, "rewards/verify_math_reward/std": 0.496766060590744, "step": 137 }, { "clip_ratio/high_max": 0.001485254843828443, "clip_ratio/high_mean": 0.0004001565591806866, "clip_ratio/low_mean": 0.00037924269122413534, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007793992515416903, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3942.0, "completions/mean_length": 625.6027221679688, "completions/mean_terminated_length": 562.5045166015625, "completions/min_length": 115.0, "completions/min_terminated_length": 115.0, "epoch": 1.289297171186935, "grad_norm": 0.12890625, "learning_rate": 1e-06, "loss": -0.0051, "num_tokens": 79938090.0, "reward": 0.5401785969734192, "reward_std": 0.21222344040870667, "rewards/verify_math_reward/mean": 0.5401785969734192, "rewards/verify_math_reward/std": 0.49866142868995667, "step": 138 }, { "clip_ratio/high_max": 0.001471151117584668, "clip_ratio/high_mean": 0.0004658673462927254, "clip_ratio/low_mean": 0.0003632058278526529, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008290731775559834, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3431.0, "completions/mean_length": 619.0535888671875, "completions/mean_terminated_length": 571.855224609375, "completions/min_length": 4.0, "completions/min_terminated_length": 4.0, "epoch": 1.2986293379994167, "grad_norm": 0.146484375, "learning_rate": 1e-06, "loss": 0.0004, "num_tokens": 80539394.0, "reward": 0.4676339626312256, "reward_std": 0.24558056890964508, "rewards/verify_math_reward/mean": 0.4676339328289032, "rewards/verify_math_reward/std": 0.4992299973964691, "step": 139 }, { "clip_ratio/high_max": 0.0013791911542284652, "clip_ratio/high_mean": 0.00041825788878213643, "clip_ratio/low_mean": 0.00029218789666174416, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007104457718014601, "completions/clipped_ratio": 0.021205357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 2577.0, "completions/mean_length": 654.0625, "completions/mean_terminated_length": 579.4937133789062, "completions/min_length": 129.0, "completions/min_terminated_length": 129.0, "epoch": 1.3079615048118984, "grad_norm": 0.12255859375, "learning_rate": 1e-06, "loss": 0.0074, "num_tokens": 81140666.0, "reward": 0.4754464626312256, "reward_std": 0.21421971917152405, "rewards/verify_math_reward/mean": 0.4754464328289032, "rewards/verify_math_reward/std": 0.4996756315231323, "step": 140 }, { "clip_ratio/high_max": 0.0015901103088253876, "clip_ratio/high_mean": 0.0004360975998451977, "clip_ratio/low_mean": 0.00036504875345144683, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008011463532966445, "completions/clipped_ratio": 0.0200892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3518.0, "completions/mean_length": 657.25, "completions/mean_terminated_length": 586.751708984375, "completions/min_length": 119.0, "completions/min_terminated_length": 119.0, "epoch": 1.3172936716243804, "grad_norm": 0.1376953125, "learning_rate": 1e-06, "loss": 0.0037, "num_tokens": 81749762.0, "reward": 0.4654017984867096, "reward_std": 0.24352674186229706, "rewards/verify_math_reward/mean": 0.4654017984867096, "rewards/verify_math_reward/std": 0.4990801215171814, "step": 141 }, { "clip_ratio/high_max": 0.0012775105051332503, "clip_ratio/high_mean": 0.00041183771418218384, "clip_ratio/low_mean": 0.0003315282663152175, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007433659852722485, "completions/clipped_ratio": 0.010044642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 4037.0, "completions/mean_length": 606.578125, "completions/mean_terminated_length": 571.1724853515625, "completions/min_length": 4.0, "completions/min_terminated_length": 4.0, "epoch": 1.326625838436862, "grad_norm": 0.1259765625, "learning_rate": 1e-06, "loss": 0.0075, "num_tokens": 82349800.0, "reward": 0.504464328289032, "reward_std": 0.21294091641902924, "rewards/verify_math_reward/mean": 0.5044642686843872, "rewards/verify_math_reward/std": 0.5002593398094177, "step": 142 }, { "clip_ratio/high_max": 0.001707187309875735, "clip_ratio/high_mean": 0.0005364659032238706, "clip_ratio/low_mean": 0.0002514300375651146, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007878959459048929, "completions/clipped_ratio": 0.0234375, "completions/max_length": 4096.0, "completions/max_terminated_length": 3176.0, "completions/mean_length": 652.2745971679688, "completions/mean_terminated_length": 569.6251220703125, "completions/min_length": 5.0, "completions/min_terminated_length": 5.0, "epoch": 1.3359580052493438, "grad_norm": 0.126953125, "learning_rate": 1e-06, "loss": -0.0009, "num_tokens": 82939790.0, "reward": 0.486607164144516, "reward_std": 0.22218145430088043, "rewards/verify_math_reward/mean": 0.4866071343421936, "rewards/verify_math_reward/std": 0.500099778175354, "step": 143 }, { "clip_ratio/high_max": 0.0014778714521526126, "clip_ratio/high_mean": 0.0004181734707344731, "clip_ratio/low_mean": 0.0003226648072995886, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007408382789435564, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 2449.0, "completions/mean_length": 656.372802734375, "completions/mean_terminated_length": 601.7755126953125, "completions/min_length": 104.0, "completions/min_terminated_length": 104.0, "epoch": 1.3452901720618256, "grad_norm": 0.12255859375, "learning_rate": 1e-06, "loss": 0.0041, "num_tokens": 83566772.0, "reward": 0.4609375298023224, "reward_std": 0.2352951020002365, "rewards/verify_math_reward/mean": 0.4609375, "rewards/verify_math_reward/std": 0.4987502098083496, "step": 144 }, { "clip_ratio/high_max": 0.002060226055618841, "clip_ratio/high_mean": 0.0005598815916982858, "clip_ratio/low_mean": 0.0002592910132079851, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008191726058157656, "completions/clipped_ratio": 0.022321428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3251.0, "completions/mean_length": 616.4832763671875, "completions/mean_terminated_length": 537.042236328125, "completions/min_length": 89.0, "completions/min_terminated_length": 89.0, "epoch": 1.3546223388743073, "grad_norm": 0.1435546875, "learning_rate": 1e-06, "loss": -0.0093, "num_tokens": 84123877.0, "reward": 0.5647321939468384, "reward_std": 0.22852841019630432, "rewards/verify_math_reward/mean": 0.5647321343421936, "rewards/verify_math_reward/std": 0.49606895446777344, "step": 145 }, { "clip_ratio/high_max": 0.0016651964615448378, "clip_ratio/high_mean": 0.0005243159653218754, "clip_ratio/low_mean": 0.00037592443391076813, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009002403958220384, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 4095.0, "completions/mean_length": 631.380615234375, "completions/mean_terminated_length": 572.3916015625, "completions/min_length": 91.0, "completions/min_terminated_length": 91.0, "epoch": 1.3639545056867892, "grad_norm": 0.1396484375, "learning_rate": 1e-06, "loss": 0.0021, "num_tokens": 84731538.0, "reward": 0.527901828289032, "reward_std": 0.23349706828594208, "rewards/verify_math_reward/mean": 0.5279017686843872, "rewards/verify_math_reward/std": 0.49949970841407776, "step": 146 }, { "clip_ratio/high_max": 0.001918751197081292, "clip_ratio/high_mean": 0.0005806725478123553, "clip_ratio/low_mean": 0.00036979717219765007, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009504697172815213, "completions/clipped_ratio": 0.0200892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3948.0, "completions/mean_length": 659.8660888671875, "completions/mean_terminated_length": 589.4214477539062, "completions/min_length": 71.0, "completions/min_terminated_length": 71.0, "epoch": 1.373286672499271, "grad_norm": 0.134765625, "learning_rate": 1e-06, "loss": -0.0037, "num_tokens": 85336658.0, "reward": 0.5089285969734192, "reward_std": 0.24296332895755768, "rewards/verify_math_reward/mean": 0.5089285969734192, "rewards/verify_math_reward/std": 0.5001994967460632, "step": 147 }, { "clip_ratio/high_max": 0.0018146254715247778, "clip_ratio/high_mean": 0.0006021163496825466, "clip_ratio/low_mean": 0.0003653205460523168, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.000967436893915874, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3292.0, "completions/mean_length": 639.5614013671875, "completions/mean_terminated_length": 580.7117309570312, "completions/min_length": 133.0, "completions/min_terminated_length": 133.0, "epoch": 1.3826188393117527, "grad_norm": 0.1357421875, "learning_rate": 1e-06, "loss": 0.0015, "num_tokens": 85931713.0, "reward": 0.5412946939468384, "reward_std": 0.24123017489910126, "rewards/verify_math_reward/mean": 0.5412946343421936, "rewards/verify_math_reward/std": 0.49857014417648315, "step": 148 }, { "clip_ratio/high_max": 0.001473166490541189, "clip_ratio/high_mean": 0.0004518850794283935, "clip_ratio/low_mean": 0.000227511869979935, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0006793969478167128, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 4076.0, "completions/mean_length": 597.7131958007812, "completions/mean_terminated_length": 534.10791015625, "completions/min_length": 71.0, "completions/min_terminated_length": 71.0, "epoch": 1.3919510061242344, "grad_norm": 0.125, "learning_rate": 1e-06, "loss": -0.0084, "num_tokens": 86486352.0, "reward": 0.5613839626312256, "reward_std": 0.18708907067775726, "rewards/verify_math_reward/mean": 0.5613839030265808, "rewards/verify_math_reward/std": 0.496494859457016, "step": 149 }, { "clip_ratio/high_max": 0.0018019757117144763, "clip_ratio/high_mean": 0.0005032439592014271, "clip_ratio/low_mean": 0.0003022702036332703, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008055141565819213, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3827.0, "completions/mean_length": 600.8449096679688, "completions/mean_terminated_length": 545.3662109375, "completions/min_length": 7.0, "completions/min_terminated_length": 7.0, "epoch": 1.4012831729367163, "grad_norm": 0.123046875, "learning_rate": 1e-06, "loss": -0.0118, "num_tokens": 87063621.0, "reward": 0.578125, "reward_std": 0.19276241958141327, "rewards/verify_math_reward/mean": 0.578125, "rewards/verify_math_reward/std": 0.4941346049308777, "step": 150 }, { "clip_ratio/high_max": 0.001472546028708166, "clip_ratio/high_mean": 0.00042694759076766786, "clip_ratio/low_mean": 0.00031916322961933474, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007461108234565472, "completions/clipped_ratio": 0.021205357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3750.0, "completions/mean_length": 638.6652221679688, "completions/mean_terminated_length": 563.7628173828125, "completions/min_length": 61.0, "completions/min_terminated_length": 61.0, "epoch": 1.410615339749198, "grad_norm": 0.130859375, "learning_rate": 1e-06, "loss": 0.009, "num_tokens": 87655241.0, "reward": 0.504464328289032, "reward_std": 0.21534626185894012, "rewards/verify_math_reward/mean": 0.5044642686843872, "rewards/verify_math_reward/std": 0.5002593398094177, "step": 151 }, { "clip_ratio/high_max": 0.0015102116994967218, "clip_ratio/high_mean": 0.00044700323132929043, "clip_ratio/low_mean": 0.0003898030786331219, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008368063117814017, "completions/clipped_ratio": 0.006696428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 2332.0, "completions/mean_length": 560.7545166015625, "completions/mean_terminated_length": 536.9213256835938, "completions/min_length": 35.0, "completions/min_terminated_length": 35.0, "epoch": 1.4199475065616798, "grad_norm": 0.1357421875, "learning_rate": 1e-06, "loss": 0.0186, "num_tokens": 88217581.0, "reward": 0.59375, "reward_std": 0.23653365671634674, "rewards/verify_math_reward/mean": 0.59375, "rewards/verify_math_reward/std": 0.4914066195487976, "step": 152 }, { "clip_ratio/high_max": 0.001383383129905269, "clip_ratio/high_mean": 0.000401061109641887, "clip_ratio/low_mean": 0.00030080922033448587, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007018703408903093, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 2550.0, "completions/mean_length": 592.9029541015625, "completions/mean_terminated_length": 533.2588500976562, "completions/min_length": 92.0, "completions/min_terminated_length": 92.0, "epoch": 1.4292796733741615, "grad_norm": 0.126953125, "learning_rate": 1e-06, "loss": 0.0074, "num_tokens": 88776606.0, "reward": 0.5301339626312256, "reward_std": 0.19730813801288605, "rewards/verify_math_reward/mean": 0.5301339030265808, "rewards/verify_math_reward/std": 0.49936985969543457, "step": 153 }, { "clip_ratio/high_max": 0.0017530759469082113, "clip_ratio/high_mean": 0.0005233436927483126, "clip_ratio/low_mean": 0.0002847095665856614, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008080532716121525, "completions/clipped_ratio": 0.010044642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 1682.0, "completions/mean_length": 575.3136596679688, "completions/mean_terminated_length": 539.5907592773438, "completions/min_length": 43.0, "completions/min_terminated_length": 43.0, "epoch": 1.4386118401866432, "grad_norm": 0.1318359375, "learning_rate": 1e-06, "loss": 0.011, "num_tokens": 89337751.0, "reward": 0.5602678656578064, "reward_std": 0.2180151343345642, "rewards/verify_math_reward/mean": 0.5602678656578064, "rewards/verify_math_reward/std": 0.4966317415237427, "step": 154 }, { "clip_ratio/high_max": 0.0014256071099225665, "clip_ratio/high_mean": 0.0005033377417476004, "clip_ratio/low_mean": 0.00025389279960563726, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007572305385110667, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 2283.0, "completions/mean_length": 578.6049194335938, "completions/mean_terminated_length": 514.6522827148438, "completions/min_length": 95.0, "completions/min_terminated_length": 95.0, "epoch": 1.4479440069991252, "grad_norm": 0.130859375, "learning_rate": 1e-06, "loss": 0.0092, "num_tokens": 89874973.0, "reward": 0.5915178656578064, "reward_std": 0.2219550609588623, "rewards/verify_math_reward/mean": 0.5915178656578064, "rewards/verify_math_reward/std": 0.49182769656181335, "step": 155 }, { "clip_ratio/high_max": 0.0016124090470839292, "clip_ratio/high_mean": 0.00046968579681561096, "clip_ratio/low_mean": 0.0003855286004181835, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008552144081477309, "completions/clipped_ratio": 0.008928571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 2753.0, "completions/mean_length": 560.0335083007812, "completions/mean_terminated_length": 528.1779174804688, "completions/min_length": 29.0, "completions/min_terminated_length": 29.0, "epoch": 1.457276173811607, "grad_norm": 0.1279296875, "learning_rate": 1e-06, "loss": 0.0048, "num_tokens": 90433299.0, "reward": 0.5602678656578064, "reward_std": 0.22064054012298584, "rewards/verify_math_reward/mean": 0.5602678656578064, "rewards/verify_math_reward/std": 0.4966317415237427, "step": 156 }, { "clip_ratio/high_max": 0.001310938184360566, "clip_ratio/high_mean": 0.0003792753468587762, "clip_ratio/low_mean": 0.00029047973248452763, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0006697550852550194, "completions/clipped_ratio": 0.010044642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3763.0, "completions/mean_length": 630.8013916015625, "completions/mean_terminated_length": 595.6414794921875, "completions/min_length": 46.0, "completions/min_terminated_length": 46.0, "epoch": 1.4666083406240886, "grad_norm": 0.115234375, "learning_rate": 1e-06, "loss": -0.01, "num_tokens": 91053137.0, "reward": 0.5145089626312256, "reward_std": 0.2079797238111496, "rewards/verify_math_reward/mean": 0.5145089030265808, "rewards/verify_math_reward/std": 0.5000685453414917, "step": 157 }, { "clip_ratio/high_max": 0.001303116920098546, "clip_ratio/high_mean": 0.00037509429557758267, "clip_ratio/low_mean": 0.00026581576435091847, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0006409100451492122, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 3264.0, "completions/mean_length": 611.3939819335938, "completions/mean_terminated_length": 572.0643310546875, "completions/min_length": 135.0, "completions/min_terminated_length": 135.0, "epoch": 1.4759405074365703, "grad_norm": 0.12890625, "learning_rate": 1e-06, "loss": 0.005, "num_tokens": 91650842.0, "reward": 0.543526828289032, "reward_std": 0.20298220217227936, "rewards/verify_math_reward/mean": 0.5435267686843872, "rewards/verify_math_reward/std": 0.49838000535964966, "step": 158 }, { "clip_ratio/high_max": 0.0016756560808062204, "clip_ratio/high_mean": 0.0005686681888619205, "clip_ratio/low_mean": 0.0002220531608827514, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007907213466751273, "completions/clipped_ratio": 0.022321428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 2831.0, "completions/mean_length": 665.8783569335938, "completions/mean_terminated_length": 587.5650634765625, "completions/min_length": 19.0, "completions/min_terminated_length": 19.0, "epoch": 1.4852726742490523, "grad_norm": 0.1220703125, "learning_rate": 1e-06, "loss": -0.0064, "num_tokens": 92244093.0, "reward": 0.5424107313156128, "reward_std": 0.2174845188856125, "rewards/verify_math_reward/mean": 0.5424107313156128, "rewards/verify_math_reward/std": 0.4984763562679291, "step": 159 }, { "clip_ratio/high_max": 0.0015577739741274854, "clip_ratio/high_mean": 0.00045788256193191046, "clip_ratio/low_mean": 0.0003860801249402357, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008439626881227014, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3892.0, "completions/mean_length": 618.5301513671875, "completions/mean_terminated_length": 567.3329467773438, "completions/min_length": 104.0, "completions/min_terminated_length": 104.0, "epoch": 1.494604841061534, "grad_norm": 0.130859375, "learning_rate": 1e-06, "loss": 0.0106, "num_tokens": 92835152.0, "reward": 0.5725446939468384, "reward_std": 0.23759308457374573, "rewards/verify_math_reward/mean": 0.5725446343421936, "rewards/verify_math_reward/std": 0.49498558044433594, "step": 160 }, { "clip_ratio/high_max": 0.0016572253407503013, "clip_ratio/high_mean": 0.0005490923235811351, "clip_ratio/low_mean": 0.0002949228953639249, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008440152068942552, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3117.0, "completions/mean_length": 606.3694458007812, "completions/mean_terminated_length": 554.9931640625, "completions/min_length": 12.0, "completions/min_terminated_length": 12.0, "epoch": 1.5039370078740157, "grad_norm": 0.1396484375, "learning_rate": 1e-06, "loss": -0.0076, "num_tokens": 93407603.0, "reward": 0.5758928656578064, "reward_std": 0.23304016888141632, "rewards/verify_math_reward/mean": 0.5758928656578064, "rewards/verify_math_reward/std": 0.49448272585868835, "step": 161 }, { "clip_ratio/high_max": 0.0015902642408036627, "clip_ratio/high_mean": 0.0005070419258572656, "clip_ratio/low_mean": 0.00037056753353681415, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008776094673521584, "completions/clipped_ratio": 0.025669642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3024.0, "completions/mean_length": 660.7957763671875, "completions/mean_terminated_length": 570.2921142578125, "completions/min_length": 3.0, "completions/min_terminated_length": 3.0, "epoch": 1.5132691746864975, "grad_norm": 0.12890625, "learning_rate": 1e-06, "loss": -0.0035, "num_tokens": 94000908.0, "reward": 0.5055803656578064, "reward_std": 0.2254137545824051, "rewards/verify_math_reward/mean": 0.5055803656578064, "rewards/verify_math_reward/std": 0.5002480745315552, "step": 162 }, { "clip_ratio/high_max": 0.0013241914139143773, "clip_ratio/high_mean": 0.0003986500561268258, "clip_ratio/low_mean": 0.0004089949568424345, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008076450162661786, "completions/clipped_ratio": 0.010044642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 2820.0, "completions/mean_length": 588.0491333007812, "completions/mean_terminated_length": 552.4554443359375, "completions/min_length": 11.0, "completions/min_terminated_length": 11.0, "epoch": 1.5226013414989792, "grad_norm": 0.1328125, "learning_rate": 1e-06, "loss": 0.0089, "num_tokens": 94592552.0, "reward": 0.4832589626312256, "reward_std": 0.25959643721580505, "rewards/verify_math_reward/mean": 0.4832589328289032, "rewards/verify_math_reward/std": 0.4999987483024597, "step": 163 }, { "clip_ratio/high_max": 0.0015286482203009655, "clip_ratio/high_mean": 0.0004840133163952487, "clip_ratio/low_mean": 0.0002756354216444379, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007596487298542343, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3384.0, "completions/mean_length": 590.3817138671875, "completions/mean_terminated_length": 530.6947021484375, "completions/min_length": 141.0, "completions/min_terminated_length": 141.0, "epoch": 1.531933508311461, "grad_norm": 0.123046875, "learning_rate": 1e-06, "loss": 0.0031, "num_tokens": 95153582.0, "reward": 0.5491071939468384, "reward_std": 0.19486860930919647, "rewards/verify_math_reward/mean": 0.5491071343421936, "rewards/verify_math_reward/std": 0.49786055088043213, "step": 164 }, { "clip_ratio/high_max": 0.001393048787576845, "clip_ratio/high_mean": 0.00045955346922710305, "clip_ratio/low_mean": 0.0002616896331346652, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007212431041807577, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3177.0, "completions/mean_length": 636.2254638671875, "completions/mean_terminated_length": 589.2601928710938, "completions/min_length": 3.0, "completions/min_terminated_length": 3.0, "epoch": 1.5412656751239429, "grad_norm": 0.125, "learning_rate": 1e-06, "loss": -0.0005, "num_tokens": 95766376.0, "reward": 0.515625, "reward_std": 0.21876581013202667, "rewards/verify_math_reward/mean": 0.515625, "rewards/verify_math_reward/std": 0.5000349283218384, "step": 165 }, { "clip_ratio/high_max": 0.0015330221358453855, "clip_ratio/high_mean": 0.0004225006006208787, "clip_ratio/low_mean": 0.0003513339688652195, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007738345693724114, "completions/clipped_ratio": 0.004464285714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 3382.0, "completions/mean_length": 524.646240234375, "completions/mean_terminated_length": 508.6311950683594, "completions/min_length": 80.0, "completions/min_terminated_length": 80.0, "epoch": 1.5505978419364246, "grad_norm": 0.130859375, "learning_rate": 1e-06, "loss": 0.0108, "num_tokens": 96313635.0, "reward": 0.5390625, "reward_std": 0.18652385473251343, "rewards/verify_math_reward/mean": 0.5390625, "rewards/verify_math_reward/std": 0.4987502098083496, "step": 166 }, { "clip_ratio/high_max": 0.0018427275372232543, "clip_ratio/high_mean": 0.0005930055681346857, "clip_ratio/low_mean": 0.0003942071366509481, "clip_ratio/low_min": 1.1531365089467727e-05, "clip_ratio/region_mean": 0.0009872127111520967, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3141.0, "completions/mean_length": 606.59375, "completions/mean_terminated_length": 551.2063598632812, "completions/min_length": 66.0, "completions/min_terminated_length": 66.0, "epoch": 1.5599300087489065, "grad_norm": 0.1455078125, "learning_rate": 1e-06, "loss": -0.0067, "num_tokens": 96882703.0, "reward": 0.5424107313156128, "reward_std": 0.27500778436660767, "rewards/verify_math_reward/mean": 0.5424107313156128, "rewards/verify_math_reward/std": 0.4984763264656067, "step": 167 }, { "clip_ratio/high_max": 0.0017362831840728177, "clip_ratio/high_mean": 0.0005281108299186599, "clip_ratio/low_mean": 0.00038208843841402995, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009101992700379924, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 3256.0, "completions/mean_length": 630.1395263671875, "completions/mean_terminated_length": 587.06103515625, "completions/min_length": 66.0, "completions/min_terminated_length": 66.0, "epoch": 1.5692621755613883, "grad_norm": 0.1328125, "learning_rate": 1e-06, "loss": 0.0029, "num_tokens": 97486596.0, "reward": 0.527901828289032, "reward_std": 0.23364794254302979, "rewards/verify_math_reward/mean": 0.5279017686843872, "rewards/verify_math_reward/std": 0.49949967861175537, "step": 168 }, { "clip_ratio/high_max": 0.0015097710402187658, "clip_ratio/high_mean": 0.00043105074610139127, "clip_ratio/low_mean": 0.0002880731638015277, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007191239051280718, "completions/clipped_ratio": 0.010044642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3691.0, "completions/mean_length": 589.4308471679688, "completions/mean_terminated_length": 553.8511352539062, "completions/min_length": 27.0, "completions/min_terminated_length": 27.0, "epoch": 1.57859434237387, "grad_norm": 0.140625, "learning_rate": 1e-06, "loss": -0.0024, "num_tokens": 98065662.0, "reward": 0.543526828289032, "reward_std": 0.2099350243806839, "rewards/verify_math_reward/mean": 0.5435267686843872, "rewards/verify_math_reward/std": 0.49838000535964966, "step": 169 }, { "clip_ratio/high_max": 0.0012964018524144194, "clip_ratio/high_mean": 0.0004398460700940632, "clip_ratio/low_mean": 0.00031049041422193113, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.00075033648590761, "completions/clipped_ratio": 0.006696428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 4056.0, "completions/mean_length": 584.7199096679688, "completions/mean_terminated_length": 561.04833984375, "completions/min_length": 4.0, "completions/min_terminated_length": 4.0, "epoch": 1.5879265091863517, "grad_norm": 0.1220703125, "learning_rate": 1e-06, "loss": -0.0001, "num_tokens": 98654163.0, "reward": 0.559151828289032, "reward_std": 0.23101112246513367, "rewards/verify_math_reward/mean": 0.5591517686843872, "rewards/verify_math_reward/std": 0.496766060590744, "step": 170 }, { "clip_ratio/high_max": 0.0014608978817705065, "clip_ratio/high_mean": 0.0004272782414318499, "clip_ratio/low_mean": 0.00038870188473083545, "clip_ratio/low_min": 9.882985068543348e-06, "clip_ratio/region_mean": 0.0008159801154761226, "completions/clipped_ratio": 0.005580357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 2955.0, "completions/mean_length": 577.6105346679688, "completions/mean_terminated_length": 557.866455078125, "completions/min_length": 63.0, "completions/min_terminated_length": 63.0, "epoch": 1.5972586759988334, "grad_norm": 0.138671875, "learning_rate": 1e-06, "loss": 0.0077, "num_tokens": 99242310.0, "reward": 0.5022321939468384, "reward_std": 0.23322536051273346, "rewards/verify_math_reward/mean": 0.5022321343421936, "rewards/verify_math_reward/std": 0.5002742409706116, "step": 171 }, { "clip_ratio/high_max": 0.0016138159080583137, "clip_ratio/high_mean": 0.0005044096062647441, "clip_ratio/low_mean": 0.00039532564051114605, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.000899735243365285, "completions/clipped_ratio": 0.0078125, "completions/max_length": 4096.0, "completions/max_terminated_length": 3262.0, "completions/mean_length": 613.7254638671875, "completions/mean_terminated_length": 586.3059692382812, "completions/min_length": 12.0, "completions/min_terminated_length": 12.0, "epoch": 1.6065908428113151, "grad_norm": 0.1328125, "learning_rate": 1e-06, "loss": 0.004, "num_tokens": 99855288.0, "reward": 0.5401785969734192, "reward_std": 0.27092063426971436, "rewards/verify_math_reward/mean": 0.5401785969734192, "rewards/verify_math_reward/std": 0.49866142868995667, "step": 172 }, { "clip_ratio/high_max": 0.0017673315305728465, "clip_ratio/high_mean": 0.0005731661549361888, "clip_ratio/low_mean": 0.0003138328314662431, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.000886998975147435, "completions/clipped_ratio": 0.0200892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3660.0, "completions/mean_length": 611.2288208007812, "completions/mean_terminated_length": 539.7870483398438, "completions/min_length": 59.0, "completions/min_terminated_length": 59.0, "epoch": 1.6159230096237969, "grad_norm": 0.1611328125, "learning_rate": 1e-06, "loss": -0.0062, "num_tokens": 100409669.0, "reward": 0.5546875, "reward_std": 0.2342003732919693, "rewards/verify_math_reward/mean": 0.5546875, "rewards/verify_math_reward/std": 0.4972778558731079, "step": 173 }, { "clip_ratio/high_max": 0.0017514068495074753, "clip_ratio/high_mean": 0.0005628040185001737, "clip_ratio/low_mean": 0.00027705558295565424, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008398595955441124, "completions/clipped_ratio": 0.0189732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 4009.0, "completions/mean_length": 611.2767944335938, "completions/mean_terminated_length": 543.8816528320312, "completions/min_length": 109.0, "completions/min_terminated_length": 109.0, "epoch": 1.6252551764362788, "grad_norm": 0.138671875, "learning_rate": 1e-06, "loss": 0.0078, "num_tokens": 100978053.0, "reward": 0.53125, "reward_std": 0.23773828148841858, "rewards/verify_math_reward/mean": 0.53125, "rewards/verify_math_reward/std": 0.4993011951446533, "step": 174 }, { "clip_ratio/high_max": 0.0016308020103679155, "clip_ratio/high_mean": 0.00046576996840030915, "clip_ratio/low_mean": 0.0003534856388114349, "clip_ratio/low_min": 1.5067502317833714e-05, "clip_ratio/region_mean": 0.0008192556115318439, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3960.0, "completions/mean_length": 636.96875, "completions/mean_terminated_length": 574.0772705078125, "completions/min_length": 140.0, "completions/min_terminated_length": 140.0, "epoch": 1.6345873432487605, "grad_norm": 0.1318359375, "learning_rate": 1e-06, "loss": 0.0121, "num_tokens": 101591609.0, "reward": 0.5502232313156128, "reward_std": 0.20711390674114227, "rewards/verify_math_reward/mean": 0.5502232313156128, "rewards/verify_math_reward/std": 0.49774909019470215, "step": 175 }, { "clip_ratio/high_max": 0.0016254229958576616, "clip_ratio/high_mean": 0.0004968349437604047, "clip_ratio/low_mean": 0.00037942842868687876, "clip_ratio/low_min": 1.242544749402441e-05, "clip_ratio/region_mean": 0.0008762633733567782, "completions/clipped_ratio": 0.0078125, "completions/max_length": 4096.0, "completions/max_terminated_length": 3897.0, "completions/mean_length": 611.802490234375, "completions/mean_terminated_length": 584.3678588867188, "completions/min_length": 39.0, "completions/min_terminated_length": 39.0, "epoch": 1.6439195100612425, "grad_norm": 0.13671875, "learning_rate": 1e-06, "loss": 0.0039, "num_tokens": 102201416.0, "reward": 0.5502232313156128, "reward_std": 0.24957171082496643, "rewards/verify_math_reward/mean": 0.5502232313156128, "rewards/verify_math_reward/std": 0.49774909019470215, "step": 176 }, { "clip_ratio/high_max": 0.00159170483766502, "clip_ratio/high_mean": 0.0005176980662326969, "clip_ratio/low_mean": 0.0003070960899549391, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008247941505032941, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 2242.0, "completions/mean_length": 618.4230346679688, "completions/mean_terminated_length": 559.2134399414062, "completions/min_length": 104.0, "completions/min_terminated_length": 104.0, "epoch": 1.6532516768737242, "grad_norm": 0.1455078125, "learning_rate": 1e-06, "loss": 0.0021, "num_tokens": 102784699.0, "reward": 0.5033482313156128, "reward_std": 0.22774632275104523, "rewards/verify_math_reward/mean": 0.5033482313156128, "rewards/verify_math_reward/std": 0.5002680420875549, "step": 177 }, { "clip_ratio/high_max": 0.0018602106811158592, "clip_ratio/high_mean": 0.0005304169801547687, "clip_ratio/low_mean": 0.00028010148514567845, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008105184706437285, "completions/clipped_ratio": 0.010044642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3690.0, "completions/mean_length": 583.4676513671875, "completions/mean_terminated_length": 547.8274536132812, "completions/min_length": 11.0, "completions/min_terminated_length": 11.0, "epoch": 1.662583843686206, "grad_norm": 0.1396484375, "learning_rate": 1e-06, "loss": 0.0057, "num_tokens": 103354078.0, "reward": 0.5714285969734192, "reward_std": 0.22187836468219757, "rewards/verify_math_reward/mean": 0.5714285969734192, "rewards/verify_math_reward/std": 0.49514806270599365, "step": 178 }, { "clip_ratio/high_max": 0.0012931452702105162, "clip_ratio/high_mean": 0.0003741790530966682, "clip_ratio/low_mean": 0.0003236346656194655, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0006978137143960339, "completions/clipped_ratio": 0.008928571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 1951.0, "completions/mean_length": 566.6975708007812, "completions/mean_terminated_length": 534.9020385742188, "completions/min_length": 5.0, "completions/min_terminated_length": 5.0, "epoch": 1.6719160104986877, "grad_norm": 0.126953125, "learning_rate": 1e-06, "loss": 0.0099, "num_tokens": 103911255.0, "reward": 0.5479910969734192, "reward_std": 0.22198784351348877, "rewards/verify_math_reward/mean": 0.5479910969734192, "rewards/verify_math_reward/std": 0.49796950817108154, "step": 179 }, { "clip_ratio/high_max": 0.0016382071171392454, "clip_ratio/high_mean": 0.000511829075321657, "clip_ratio/low_mean": 0.00027342408543518104, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007852531634853221, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 3362.0, "completions/mean_length": 562.0145263671875, "completions/mean_terminated_length": 522.1275634765625, "completions/min_length": 68.0, "completions/min_terminated_length": 68.0, "epoch": 1.6812481773111694, "grad_norm": 0.1416015625, "learning_rate": 1e-06, "loss": 0.0069, "num_tokens": 104460764.0, "reward": 0.5758928656578064, "reward_std": 0.22436067461967468, "rewards/verify_math_reward/mean": 0.5758928656578064, "rewards/verify_math_reward/std": 0.49448272585868835, "step": 180 }, { "clip_ratio/high_max": 0.0016517209041921888, "clip_ratio/high_mean": 0.00045418715899359086, "clip_ratio/low_mean": 0.00036191883907576994, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008161060031852685, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 4069.0, "completions/mean_length": 611.5926513671875, "completions/mean_terminated_length": 548.23974609375, "completions/min_length": 135.0, "completions/min_terminated_length": 135.0, "epoch": 1.690580344123651, "grad_norm": 0.1328125, "learning_rate": 1e-06, "loss": -0.0009, "num_tokens": 105026063.0, "reward": 0.5725446939468384, "reward_std": 0.21928434073925018, "rewards/verify_math_reward/mean": 0.5725446343421936, "rewards/verify_math_reward/std": 0.49498558044433594, "step": 181 }, { "clip_ratio/high_max": 0.0015860793446336174, "clip_ratio/high_mean": 0.0004572858007918512, "clip_ratio/low_mean": 0.0003717205906923482, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.000829006378808117, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3033.0, "completions/mean_length": 659.6339721679688, "completions/mean_terminated_length": 605.0884399414062, "completions/min_length": 99.0, "completions/min_terminated_length": 99.0, "epoch": 1.6999125109361328, "grad_norm": 0.1318359375, "learning_rate": 1e-06, "loss": -0.0045, "num_tokens": 105664255.0, "reward": 0.4687500298023224, "reward_std": 0.24250829219818115, "rewards/verify_math_reward/mean": 0.46875, "rewards/verify_math_reward/std": 0.4993011951446533, "step": 182 }, { "clip_ratio/high_max": 0.0019195967724954244, "clip_ratio/high_mean": 0.0005889438366466493, "clip_ratio/low_mean": 0.000331454284719257, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009203981280734297, "completions/clipped_ratio": 0.0078125, "completions/max_length": 4096.0, "completions/max_terminated_length": 2211.0, "completions/mean_length": 527.6082763671875, "completions/mean_terminated_length": 499.5107116699219, "completions/min_length": 121.0, "completions/min_terminated_length": 121.0, "epoch": 1.7092446777486148, "grad_norm": 0.142578125, "learning_rate": 1e-06, "loss": -0.0011, "num_tokens": 106204096.0, "reward": 0.5970982313156128, "reward_std": 0.23187805712223053, "rewards/verify_math_reward/mean": 0.5970982313156128, "rewards/verify_math_reward/std": 0.49075525999069214, "step": 183 }, { "clip_ratio/high_max": 0.0017383342383254785, "clip_ratio/high_mean": 0.0005435834009404061, "clip_ratio/low_mean": 0.0003668535971428355, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009104369983106153, "completions/clipped_ratio": 0.008928571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3056.0, "completions/mean_length": 607.7489013671875, "completions/mean_terminated_length": 576.3232421875, "completions/min_length": 156.0, "completions/min_terminated_length": 156.0, "epoch": 1.7185768445610965, "grad_norm": 0.12255859375, "learning_rate": 1e-06, "loss": 0.0063, "num_tokens": 106808719.0, "reward": 0.535714328289032, "reward_std": 0.2203042209148407, "rewards/verify_math_reward/mean": 0.5357142686843872, "rewards/verify_math_reward/std": 0.4990014135837555, "step": 184 }, { "clip_ratio/high_max": 0.0014295119999587769, "clip_ratio/high_mean": 0.0004413099918565422, "clip_ratio/low_mean": 0.00035032830066938914, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.000791638280134066, "completions/clipped_ratio": 0.025669642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 2822.0, "completions/mean_length": 650.6908569335938, "completions/mean_terminated_length": 559.9209594726562, "completions/min_length": 16.0, "completions/min_terminated_length": 16.0, "epoch": 1.7279090113735784, "grad_norm": 0.130859375, "learning_rate": 1e-06, "loss": 0.0033, "num_tokens": 107383314.0, "reward": 0.5145089626312256, "reward_std": 0.23653294146060944, "rewards/verify_math_reward/mean": 0.5145089030265808, "rewards/verify_math_reward/std": 0.5000685453414917, "step": 185 }, { "clip_ratio/high_max": 0.0013785191176793887, "clip_ratio/high_mean": 0.000440667439875142, "clip_ratio/low_mean": 0.0002983641954870109, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007390316295641242, "completions/clipped_ratio": 0.022321428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3910.0, "completions/mean_length": 626.3192138671875, "completions/mean_terminated_length": 547.1027221679688, "completions/min_length": 3.0, "completions/min_terminated_length": 3.0, "epoch": 1.7372411781860602, "grad_norm": 0.13671875, "learning_rate": 1e-06, "loss": -0.0053, "num_tokens": 107954424.0, "reward": 0.4877232313156128, "reward_std": 0.22071540355682373, "rewards/verify_math_reward/mean": 0.4877232015132904, "rewards/verify_math_reward/std": 0.500128448009491, "step": 186 }, { "clip_ratio/high_max": 0.002037179237959208, "clip_ratio/high_mean": 0.0006475231461990916, "clip_ratio/low_mean": 0.00035991261415802, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010074357742269058, "completions/clipped_ratio": 0.008928571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 1821.0, "completions/mean_length": 546.9765625, "completions/mean_terminated_length": 515.00341796875, "completions/min_length": 102.0, "completions/min_terminated_length": 102.0, "epoch": 1.7465733449985419, "grad_norm": 0.140625, "learning_rate": 1e-06, "loss": 0.0037, "num_tokens": 108499491.0, "reward": 0.606026828289032, "reward_std": 0.2405993491411209, "rewards/verify_math_reward/mean": 0.6060267686843872, "rewards/verify_math_reward/std": 0.48890194296836853, "step": 187 }, { "clip_ratio/high_max": 0.0016446320369141176, "clip_ratio/high_mean": 0.0004841918025704217, "clip_ratio/low_mean": 0.0003864412077518864, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008706330017957953, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 4034.0, "completions/mean_length": 650.833740234375, "completions/mean_terminated_length": 588.1942749023438, "completions/min_length": 115.0, "completions/min_terminated_length": 115.0, "epoch": 1.7559055118110236, "grad_norm": 0.1328125, "learning_rate": 1e-06, "loss": -0.0053, "num_tokens": 109108118.0, "reward": 0.5334821939468384, "reward_std": 0.23860770463943481, "rewards/verify_math_reward/mean": 0.5334821343421936, "rewards/verify_math_reward/std": 0.49915632605552673, "step": 188 }, { "clip_ratio/high_max": 0.0017032873129210202, "clip_ratio/high_mean": 0.0005462472404360597, "clip_ratio/low_mean": 0.0002708622223508428, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008171094632416498, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3971.0, "completions/mean_length": 626.6964721679688, "completions/mean_terminated_length": 563.6181640625, "completions/min_length": 3.0, "completions/min_terminated_length": 3.0, "epoch": 1.7652376786235053, "grad_norm": 0.134765625, "learning_rate": 1e-06, "loss": 0.0067, "num_tokens": 109700790.0, "reward": 0.5301339626312256, "reward_std": 0.2315317690372467, "rewards/verify_math_reward/mean": 0.5301339030265808, "rewards/verify_math_reward/std": 0.49936988949775696, "step": 189 }, { "clip_ratio/high_max": 0.0016155478788277833, "clip_ratio/high_mean": 0.0005073620002349344, "clip_ratio/low_mean": 0.0003483386274183431, "clip_ratio/low_min": 8.278145287476946e-06, "clip_ratio/region_mean": 0.000855700641295698, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3282.0, "completions/mean_length": 647.1138916015625, "completions/mean_terminated_length": 600.29638671875, "completions/min_length": 16.0, "completions/min_terminated_length": 16.0, "epoch": 1.774569845435987, "grad_norm": 0.1279296875, "learning_rate": 1e-06, "loss": 0.008, "num_tokens": 110316236.0, "reward": 0.5446428656578064, "reward_std": 0.24089176952838898, "rewards/verify_math_reward/mean": 0.5446428656578064, "rewards/verify_math_reward/std": 0.4982811510562897, "step": 190 }, { "clip_ratio/high_max": 0.0015572338061247137, "clip_ratio/high_mean": 0.0004307132467147312, "clip_ratio/low_mean": 0.0002452737038538544, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0006759869578445432, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3457.0, "completions/mean_length": 601.9989013671875, "completions/mean_terminated_length": 542.5097045898438, "completions/min_length": 136.0, "completions/min_terminated_length": 136.0, "epoch": 1.7839020122484688, "grad_norm": 0.123046875, "learning_rate": 1e-06, "loss": -0.0056, "num_tokens": 110884539.0, "reward": 0.5736607313156128, "reward_std": 0.19325098395347595, "rewards/verify_math_reward/mean": 0.5736607313156128, "rewards/verify_math_reward/std": 0.4948205351829529, "step": 191 }, { "clip_ratio/high_max": 0.0017674143418844324, "clip_ratio/high_mean": 0.0005331737964979766, "clip_ratio/low_mean": 0.00028827472226566897, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008214485351345502, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 2948.0, "completions/mean_length": 593.4051513671875, "completions/mean_terminated_length": 545.858642578125, "completions/min_length": 82.0, "completions/min_terminated_length": 82.0, "epoch": 1.7932341790609507, "grad_norm": 0.138671875, "learning_rate": 1e-06, "loss": -0.0015, "num_tokens": 111450726.0, "reward": 0.5379464626312256, "reward_std": 0.24709786474704742, "rewards/verify_math_reward/mean": 0.5379464030265808, "rewards/verify_math_reward/std": 0.4988364279270172, "step": 192 }, { "clip_ratio/high_max": 0.0015050893280204036, "clip_ratio/high_mean": 0.0004983497168495887, "clip_ratio/low_mean": 0.00032437484799174854, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008227245612033585, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3481.0, "completions/mean_length": 666.810302734375, "completions/mean_terminated_length": 604.4613647460938, "completions/min_length": 35.0, "completions/min_terminated_length": 35.0, "epoch": 1.8025663458734325, "grad_norm": 0.119140625, "learning_rate": 1e-06, "loss": -0.0172, "num_tokens": 112075148.0, "reward": 0.520089328289032, "reward_std": 0.2022358477115631, "rewards/verify_math_reward/mean": 0.5200892686843872, "rewards/verify_math_reward/std": 0.4998753070831299, "step": 193 }, { "clip_ratio/high_max": 0.001532794936792925, "clip_ratio/high_mean": 0.0004692633813192515, "clip_ratio/low_mean": 0.00026098032344634703, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.000730243698853883, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3893.0, "completions/mean_length": 581.3225708007812, "completions/mean_terminated_length": 533.6119995117188, "completions/min_length": 57.0, "completions/min_terminated_length": 57.0, "epoch": 1.8118985126859144, "grad_norm": 0.1279296875, "learning_rate": 1e-06, "loss": -0.0022, "num_tokens": 112632317.0, "reward": 0.546875, "reward_std": 0.18945668637752533, "rewards/verify_math_reward/mean": 0.546875, "rewards/verify_math_reward/std": 0.4980759024620056, "step": 194 }, { "clip_ratio/high_max": 0.0016100813445518725, "clip_ratio/high_mean": 0.0005150434521965508, "clip_ratio/low_mean": 0.0002971570513636834, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008122004992401344, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3582.0, "completions/mean_length": 605.6127319335938, "completions/mean_terminated_length": 546.18505859375, "completions/min_length": 106.0, "completions/min_terminated_length": 106.0, "epoch": 1.8212306794983961, "grad_norm": 0.138671875, "learning_rate": 1e-06, "loss": 0.004, "num_tokens": 113197890.0, "reward": 0.53125, "reward_std": 0.2296549677848816, "rewards/verify_math_reward/mean": 0.53125, "rewards/verify_math_reward/std": 0.4993011951446533, "step": 195 }, { "clip_ratio/high_max": 0.0014897636101522949, "clip_ratio/high_mean": 0.00043615922061235324, "clip_ratio/low_mean": 0.0003125943557051869, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007487535808650136, "completions/clipped_ratio": 0.0200892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 2664.0, "completions/mean_length": 611.0, "completions/mean_terminated_length": 539.5535278320312, "completions/min_length": 109.0, "completions/min_terminated_length": 109.0, "epoch": 1.8305628463108778, "grad_norm": 0.11572265625, "learning_rate": 1e-06, "loss": -0.0146, "num_tokens": 113754490.0, "reward": 0.5881696939468384, "reward_std": 0.19711239635944366, "rewards/verify_math_reward/mean": 0.5881696343421936, "rewards/verify_math_reward/std": 0.4924395978450775, "step": 196 }, { "clip_ratio/high_max": 0.0015158454298216384, "clip_ratio/high_mean": 0.0005011510493204696, "clip_ratio/low_mean": 0.0004062542257088353, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009074052768482943, "completions/clipped_ratio": 0.022321428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3622.0, "completions/mean_length": 692.9297485351562, "completions/mean_terminated_length": 615.2340087890625, "completions/min_length": 5.0, "completions/min_terminated_length": 5.0, "epoch": 1.8398950131233596, "grad_norm": 0.1318359375, "learning_rate": 1e-06, "loss": 0.0194, "num_tokens": 114394243.0, "reward": 0.4174107313156128, "reward_std": 0.27331990003585815, "rewards/verify_math_reward/mean": 0.4174107015132904, "rewards/verify_math_reward/std": 0.4934072494506836, "step": 197 }, { "clip_ratio/high_max": 0.0015211136451398488, "clip_ratio/high_mean": 0.0004293553620300372, "clip_ratio/low_mean": 0.0004139009126902238, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008432562708549085, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3918.0, "completions/mean_length": 614.5201416015625, "completions/mean_terminated_length": 555.2440795898438, "completions/min_length": 77.0, "completions/min_terminated_length": 77.0, "epoch": 1.8492271799358413, "grad_norm": 0.1416015625, "learning_rate": 1e-06, "loss": -0.0078, "num_tokens": 114978949.0, "reward": 0.4609375298023224, "reward_std": 0.25711795687675476, "rewards/verify_math_reward/mean": 0.4609375, "rewards/verify_math_reward/std": 0.4987502098083496, "step": 198 }, { "clip_ratio/high_max": 0.0016054617317422526, "clip_ratio/high_mean": 0.00046373914187824994, "clip_ratio/low_mean": 0.0002626001449925752, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007263392790264334, "completions/clipped_ratio": 0.0189732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3612.0, "completions/mean_length": 605.904052734375, "completions/mean_terminated_length": 538.4049682617188, "completions/min_length": 82.0, "completions/min_terminated_length": 82.0, "epoch": 1.858559346748323, "grad_norm": 0.126953125, "learning_rate": 1e-06, "loss": 0.0002, "num_tokens": 115532775.0, "reward": 0.5011160969734192, "reward_std": 0.2074911743402481, "rewards/verify_math_reward/mean": 0.5011160969734192, "rewards/verify_math_reward/std": 0.5002779960632324, "step": 199 }, { "clip_ratio/high_max": 0.001363764398774947, "clip_ratio/high_mean": 0.0004251047785146511, "clip_ratio/low_mean": 0.00025553890270657575, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0006806436749684508, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 2639.0, "completions/mean_length": 614.4765625, "completions/mean_terminated_length": 563.2196655273438, "completions/min_length": 100.0, "completions/min_terminated_length": 100.0, "epoch": 1.8678915135608047, "grad_norm": 0.1171875, "learning_rate": 1e-06, "loss": 0.0032, "num_tokens": 116121490.0, "reward": 0.6071428656578064, "reward_std": 0.20835243165493011, "rewards/verify_math_reward/mean": 0.6071428656578064, "rewards/verify_math_reward/std": 0.48865824937820435, "step": 200 }, { "clip_ratio/high_max": 0.001835477736676694, "clip_ratio/high_mean": 0.0006160475415981637, "clip_ratio/low_mean": 0.0004029053911835945, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001018952926642669, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3816.0, "completions/mean_length": 595.8092041015625, "completions/mean_terminated_length": 544.2774658203125, "completions/min_length": 130.0, "completions/min_terminated_length": 130.0, "epoch": 1.8772236803732867, "grad_norm": 0.1435546875, "learning_rate": 1e-06, "loss": -0.015, "num_tokens": 116682703.0, "reward": 0.5446428656578064, "reward_std": 0.2618962228298187, "rewards/verify_math_reward/mean": 0.5446428656578064, "rewards/verify_math_reward/std": 0.49828118085861206, "step": 201 }, { "clip_ratio/high_max": 0.0018763634798233397, "clip_ratio/high_mean": 0.0005957267251233134, "clip_ratio/low_mean": 0.00028847546036558924, "clip_ratio/low_min": 6.042149834684096e-06, "clip_ratio/region_mean": 0.0008842021939017286, "completions/clipped_ratio": 0.022321428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3431.0, "completions/mean_length": 673.896240234375, "completions/mean_terminated_length": 595.7659301757812, "completions/min_length": 19.0, "completions/min_terminated_length": 19.0, "epoch": 1.8865558471857684, "grad_norm": 0.12060546875, "learning_rate": 1e-06, "loss": -0.0026, "num_tokens": 117284674.0, "reward": 0.5491071939468384, "reward_std": 0.23686742782592773, "rewards/verify_math_reward/mean": 0.5491071343421936, "rewards/verify_math_reward/std": 0.49786055088043213, "step": 202 }, { "clip_ratio/high_max": 0.0015708985192759428, "clip_ratio/high_mean": 0.0005060054093064537, "clip_ratio/low_mean": 0.0003360165705998952, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008420219883191749, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 4055.0, "completions/mean_length": 608.3080444335938, "completions/mean_terminated_length": 568.943603515625, "completions/min_length": 4.0, "completions/min_terminated_length": 4.0, "epoch": 1.8958880139982504, "grad_norm": 0.1396484375, "learning_rate": 1e-06, "loss": 0.0024, "num_tokens": 117880550.0, "reward": 0.494419664144516, "reward_std": 0.2561882734298706, "rewards/verify_math_reward/mean": 0.4944196343421936, "rewards/verify_math_reward/std": 0.5002480745315552, "step": 203 }, { "clip_ratio/high_max": 0.0015028491043267422, "clip_ratio/high_mean": 0.0005095660935694468, "clip_ratio/low_mean": 0.0003117276033890448, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008212936972995522, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3866.0, "completions/mean_length": 625.9330444335938, "completions/mean_terminated_length": 578.8280639648438, "completions/min_length": 90.0, "completions/min_terminated_length": 90.0, "epoch": 1.905220180810732, "grad_norm": 0.1494140625, "learning_rate": 1e-06, "loss": 0.0157, "num_tokens": 118489186.0, "reward": 0.535714328289032, "reward_std": 0.26035529375076294, "rewards/verify_math_reward/mean": 0.5357142686843872, "rewards/verify_math_reward/std": 0.4990014135837555, "step": 204 }, { "clip_ratio/high_max": 0.0013872589634047472, "clip_ratio/high_mean": 0.00036737400603215065, "clip_ratio/low_mean": 0.0002780808164288828, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0006454548242800229, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3114.0, "completions/mean_length": 630.53125, "completions/mean_terminated_length": 575.5238037109375, "completions/min_length": 14.0, "completions/min_terminated_length": 14.0, "epoch": 1.9145523476232138, "grad_norm": 0.115234375, "learning_rate": 1e-06, "loss": -0.004, "num_tokens": 119082614.0, "reward": 0.5089285969734192, "reward_std": 0.18239142000675201, "rewards/verify_math_reward/mean": 0.5089285969734192, "rewards/verify_math_reward/std": 0.5001994967460632, "step": 205 }, { "clip_ratio/high_max": 0.0015918045519356383, "clip_ratio/high_mean": 0.0004928177258989308, "clip_ratio/low_mean": 0.0003009687043231679, "clip_ratio/low_min": 9.489826879871543e-06, "clip_ratio/region_mean": 0.0007937864347695722, "completions/clipped_ratio": 0.0189732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 2512.0, "completions/mean_length": 656.5971069335938, "completions/mean_terminated_length": 590.0784912109375, "completions/min_length": 4.0, "completions/min_terminated_length": 4.0, "epoch": 1.9238845144356955, "grad_norm": 0.1259765625, "learning_rate": 1e-06, "loss": -0.0077, "num_tokens": 119680917.0, "reward": 0.5078125, "reward_std": 0.22751741111278534, "rewards/verify_math_reward/mean": 0.5078125, "rewards/verify_math_reward/std": 0.5002182126045227, "step": 206 }, { "clip_ratio/high_max": 0.0014030275124241598, "clip_ratio/high_mean": 0.0004283967252831644, "clip_ratio/low_mean": 0.00028666629077633843, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007150630153773818, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3158.0, "completions/mean_length": 604.9553833007812, "completions/mean_terminated_length": 553.5582885742188, "completions/min_length": 18.0, "completions/min_terminated_length": 18.0, "epoch": 1.9332166812481772, "grad_norm": 0.1259765625, "learning_rate": 1e-06, "loss": -0.0031, "num_tokens": 120263157.0, "reward": 0.5691964626312256, "reward_std": 0.2050851285457611, "rewards/verify_math_reward/mean": 0.5691964030265808, "rewards/verify_math_reward/std": 0.4954652786254883, "step": 207 }, { "clip_ratio/high_max": 0.0016974749250948662, "clip_ratio/high_mean": 0.0004747115065129037, "clip_ratio/low_mean": 0.00037400690644062706, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008487184168188833, "completions/clipped_ratio": 0.010044642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 2375.0, "completions/mean_length": 631.3449096679688, "completions/mean_terminated_length": 596.1904907226562, "completions/min_length": 143.0, "completions/min_terminated_length": 143.0, "epoch": 1.942548848060659, "grad_norm": 0.1201171875, "learning_rate": 1e-06, "loss": -0.0036, "num_tokens": 120882970.0, "reward": 0.5055803656578064, "reward_std": 0.22537913918495178, "rewards/verify_math_reward/mean": 0.5055803656578064, "rewards/verify_math_reward/std": 0.5002480745315552, "step": 208 }, { "clip_ratio/high_max": 0.001820983075958793, "clip_ratio/high_mean": 0.0006102061242927448, "clip_ratio/low_mean": 0.0003176068756829409, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009278130028178566, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3603.0, "completions/mean_length": 559.8683471679688, "completions/mean_terminated_length": 499.6617736816406, "completions/min_length": 28.0, "completions/min_terminated_length": 28.0, "epoch": 1.9518810148731407, "grad_norm": 0.12890625, "learning_rate": 1e-06, "loss": -0.0089, "num_tokens": 121408396.0, "reward": 0.5993303656578064, "reward_std": 0.2248920202255249, "rewards/verify_math_reward/mean": 0.5993303656578064, "rewards/verify_math_reward/std": 0.49030786752700806, "step": 209 }, { "clip_ratio/high_max": 0.0019035836230614223, "clip_ratio/high_mean": 0.0006059127754269866, "clip_ratio/low_mean": 0.00039355266926577315, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.000999465443783265, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3370.0, "completions/mean_length": 599.7980346679688, "completions/mean_terminated_length": 536.2306518554688, "completions/min_length": 90.0, "completions/min_terminated_length": 90.0, "epoch": 1.9612131816856226, "grad_norm": 0.142578125, "learning_rate": 1e-06, "loss": 0.0003, "num_tokens": 121962903.0, "reward": 0.5691964626312256, "reward_std": 0.24487264454364777, "rewards/verify_math_reward/mean": 0.5691964030265808, "rewards/verify_math_reward/std": 0.4954652488231659, "step": 210 }, { "clip_ratio/high_max": 0.0016127133912959835, "clip_ratio/high_mean": 0.000553788932847965, "clip_ratio/low_mean": 0.0004200730081720394, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009738619387462677, "completions/clipped_ratio": 0.010044642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 4002.0, "completions/mean_length": 596.724365234375, "completions/mean_terminated_length": 561.2186889648438, "completions/min_length": 155.0, "completions/min_terminated_length": 155.0, "epoch": 1.9705453484981044, "grad_norm": 0.138671875, "learning_rate": 1e-06, "loss": 0.0213, "num_tokens": 122550624.0, "reward": 0.5613839626312256, "reward_std": 0.250287801027298, "rewards/verify_math_reward/mean": 0.5613839030265808, "rewards/verify_math_reward/std": 0.496494859457016, "step": 211 }, { "clip_ratio/high_max": 0.0015774362473166548, "clip_ratio/high_mean": 0.000461443678887008, "clip_ratio/low_mean": 0.00034105664599337615, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008025003280636156, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3063.0, "completions/mean_length": 582.279052734375, "completions/mean_terminated_length": 522.4540405273438, "completions/min_length": 124.0, "completions/min_terminated_length": 124.0, "epoch": 1.9798775153105863, "grad_norm": 0.1328125, "learning_rate": 1e-06, "loss": 0.0105, "num_tokens": 123095442.0, "reward": 0.5837053656578064, "reward_std": 0.20339404046535492, "rewards/verify_math_reward/mean": 0.5837053656578064, "rewards/verify_math_reward/std": 0.49321892857551575, "step": 212 }, { "clip_ratio/high_max": 0.0013675859308932559, "clip_ratio/high_mean": 0.00044480577344074845, "clip_ratio/low_mean": 0.000273161410063949, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007179671843005053, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 2225.0, "completions/mean_length": 564.7801513671875, "completions/mean_terminated_length": 516.8450317382812, "completions/min_length": 120.0, "completions/min_terminated_length": 120.0, "epoch": 1.989209682123068, "grad_norm": 0.126953125, "learning_rate": 1e-06, "loss": 0.008, "num_tokens": 123645085.0, "reward": 0.5859375, "reward_std": 0.20087602734565735, "rewards/verify_math_reward/mean": 0.5859375, "rewards/verify_math_reward/std": 0.4928344786167145, "step": 213 }, { "clip_ratio/high_max": 0.0015998765984477359, "clip_ratio/high_mean": 0.0005085137538571871, "clip_ratio/low_mean": 0.000265932229694954, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007744459771856782, "completions/clipped_ratio": 0.005681818181818232, "completions/max_length": 4096.0, "completions/max_terminated_length": 3360.0, "completions/mean_length": 588.5369262695312, "completions/mean_terminated_length": 568.4942626953125, "completions/min_length": 65.0, "completions/min_terminated_length": 65.0, "epoch": 1.9985418489355498, "grad_norm": 0.12158203125, "learning_rate": 1e-06, "loss": 0.0152, "num_tokens": 124229877.0, "reward": 0.6171875, "reward_std": 0.20387978851795197, "rewards/verify_math_reward/mean": 0.6171875, "rewards/verify_math_reward/std": 0.4863446056842804, "step": 214 }, { "clip_ratio/high_max": 0.0015850091485845041, "clip_ratio/high_mean": 0.0005638165930577088, "clip_ratio/low_mean": 0.0002923993365584465, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008562159546272596, "completions/clipped_ratio": 0.0200892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3623.0, "completions/mean_length": 625.107177734375, "completions/mean_terminated_length": 553.9498901367188, "completions/min_length": 85.0, "completions/min_terminated_length": 85.0, "epoch": 2.0093321668124817, "grad_norm": 0.1396484375, "learning_rate": 1e-06, "loss": -0.0124, "num_tokens": 124804733.0, "reward": 0.5580357313156128, "reward_std": 0.23560000956058502, "rewards/verify_math_reward/mean": 0.5580357313156128, "rewards/verify_math_reward/std": 0.49689778685569763, "step": 215 }, { "clip_ratio/high_max": 0.0014533565063175047, "clip_ratio/high_mean": 0.0004272621478094152, "clip_ratio/low_mean": 0.00027526743645012175, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007025295690255007, "completions/clipped_ratio": 0.029017857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 4010.0, "completions/mean_length": 679.6015625, "completions/mean_terminated_length": 577.5023193359375, "completions/min_length": 102.0, "completions/min_terminated_length": 102.0, "epoch": 2.0186643336249634, "grad_norm": 0.1279296875, "learning_rate": 1e-06, "loss": -0.0028, "num_tokens": 125396840.0, "reward": 0.5212053656578064, "reward_std": 0.1908424347639084, "rewards/verify_math_reward/mean": 0.5212053656578064, "rewards/verify_math_reward/std": 0.49982914328575134, "step": 216 }, { "clip_ratio/high_max": 0.0014981682179495692, "clip_ratio/high_mean": 0.0004227994944585589, "clip_ratio/low_mean": 0.0003687052671921265, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007915047617643722, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3070.0, "completions/mean_length": 609.4631958007812, "completions/mean_terminated_length": 554.121337890625, "completions/min_length": 114.0, "completions/min_terminated_length": 114.0, "epoch": 2.027996500437445, "grad_norm": 0.1259765625, "learning_rate": 1e-06, "loss": 0.0112, "num_tokens": 125979271.0, "reward": 0.5066964626312256, "reward_std": 0.2080136239528656, "rewards/verify_math_reward/mean": 0.5066964030265808, "rewards/verify_math_reward/std": 0.5002344250679016, "step": 217 }, { "clip_ratio/high_max": 0.0016089115633803885, "clip_ratio/high_mean": 0.0004406281209412555, "clip_ratio/low_mean": 0.00037061431135043676, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008112424502542126, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 4039.0, "completions/mean_length": 646.9631958007812, "completions/mean_terminated_length": 608.0349731445312, "completions/min_length": 43.0, "completions/min_terminated_length": 43.0, "epoch": 2.037328667249927, "grad_norm": 0.1220703125, "learning_rate": 1e-06, "loss": 0.0125, "num_tokens": 126609302.0, "reward": 0.504464328289032, "reward_std": 0.23469960689544678, "rewards/verify_math_reward/mean": 0.5044642686843872, "rewards/verify_math_reward/std": 0.5002593398094177, "step": 218 }, { "clip_ratio/high_max": 0.0019641154885903234, "clip_ratio/high_mean": 0.0006396210555976722, "clip_ratio/low_mean": 0.0003920840014188798, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010317050582671072, "completions/clipped_ratio": 0.008928571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 2194.0, "completions/mean_length": 527.4989013671875, "completions/mean_terminated_length": 495.3502502441406, "completions/min_length": 87.0, "completions/min_terminated_length": 87.0, "epoch": 2.046660834062409, "grad_norm": 0.1591796875, "learning_rate": 1e-06, "loss": -0.0031, "num_tokens": 127136517.0, "reward": 0.6194196939468384, "reward_std": 0.25434422492980957, "rewards/verify_math_reward/mean": 0.6194196343421936, "rewards/verify_math_reward/std": 0.48580074310302734, "step": 219 }, { "clip_ratio/high_max": 0.0013395062705967575, "clip_ratio/high_mean": 0.00038412570756918285, "clip_ratio/low_mean": 0.0004287898282200331, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.000812915544884163, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 3811.0, "completions/mean_length": 628.989990234375, "completions/mean_terminated_length": 589.8589477539062, "completions/min_length": 24.0, "completions/min_terminated_length": 24.0, "epoch": 2.055993000874891, "grad_norm": 0.12255859375, "learning_rate": 1e-06, "loss": 0.0021, "num_tokens": 127753668.0, "reward": 0.470982164144516, "reward_std": 0.2315768003463745, "rewards/verify_math_reward/mean": 0.4709821343421936, "rewards/verify_math_reward/std": 0.49943602085113525, "step": 220 }, { "clip_ratio/high_max": 0.0015597017663822044, "clip_ratio/high_mean": 0.0004319779341130925, "clip_ratio/low_mean": 0.00028511065738712205, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007170885951381933, "completions/clipped_ratio": 0.010044642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3443.0, "completions/mean_length": 584.3147583007812, "completions/mean_terminated_length": 548.6831665039062, "completions/min_length": 58.0, "completions/min_terminated_length": 58.0, "epoch": 2.0653251676873725, "grad_norm": 0.134765625, "learning_rate": 1e-06, "loss": -0.0016, "num_tokens": 128328262.0, "reward": 0.5546875, "reward_std": 0.21079309284687042, "rewards/verify_math_reward/mean": 0.5546875, "rewards/verify_math_reward/std": 0.4972778558731079, "step": 221 }, { "clip_ratio/high_max": 0.0016742968946346082, "clip_ratio/high_mean": 0.000601753146156625, "clip_ratio/low_mean": 0.0003211780654055474, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009229312072420726, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3281.0, "completions/mean_length": 632.9631958007812, "completions/mean_terminated_length": 577.9943237304688, "completions/min_length": 129.0, "completions/min_terminated_length": 129.0, "epoch": 2.0746573344998542, "grad_norm": 0.1337890625, "learning_rate": 1e-06, "loss": 0.0124, "num_tokens": 128929701.0, "reward": 0.5412946939468384, "reward_std": 0.22469627857208252, "rewards/verify_math_reward/mean": 0.5412946343421936, "rewards/verify_math_reward/std": 0.49857014417648315, "step": 222 }, { "clip_ratio/high_max": 0.0017082222111639567, "clip_ratio/high_mean": 0.0005120211583289347, "clip_ratio/low_mean": 0.0002879522712646576, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007999734343684395, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3516.0, "completions/mean_length": 577.9163208007812, "completions/mean_terminated_length": 530.1595458984375, "completions/min_length": 29.0, "completions/min_terminated_length": 29.0, "epoch": 2.083989501312336, "grad_norm": 0.142578125, "learning_rate": 1e-06, "loss": 0.0066, "num_tokens": 129499482.0, "reward": 0.6104910969734192, "reward_std": 0.19723255932331085, "rewards/verify_math_reward/mean": 0.6104910969734192, "rewards/verify_math_reward/std": 0.48791125416755676, "step": 223 }, { "clip_ratio/high_max": 0.0014899580464771134, "clip_ratio/high_mean": 0.0004448560117680245, "clip_ratio/low_mean": 0.00024159531301393145, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0006864513279651874, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 2740.0, "completions/mean_length": 623.2277221679688, "completions/mean_terminated_length": 560.0863647460938, "completions/min_length": 137.0, "completions/min_terminated_length": 137.0, "epoch": 2.0933216681248177, "grad_norm": 0.11962890625, "learning_rate": 1e-06, "loss": 0.0014, "num_tokens": 130078062.0, "reward": 0.5736607313156128, "reward_std": 0.2121456265449524, "rewards/verify_math_reward/mean": 0.5736607313156128, "rewards/verify_math_reward/std": 0.4948205351829529, "step": 224 }, { "clip_ratio/high_max": 0.0013468493916661828, "clip_ratio/high_mean": 0.00040207266920333495, "clip_ratio/low_mean": 0.00037441720064634865, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007764898682580679, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 2562.0, "completions/mean_length": 623.4342041015625, "completions/mean_terminated_length": 584.2404174804688, "completions/min_length": 105.0, "completions/min_terminated_length": 105.0, "epoch": 2.1026538349372994, "grad_norm": 0.1337890625, "learning_rate": 1e-06, "loss": 0.007, "num_tokens": 130694107.0, "reward": 0.4642857313156128, "reward_std": 0.23006653785705566, "rewards/verify_math_reward/mean": 0.4642857015132904, "rewards/verify_math_reward/std": 0.4990013837814331, "step": 225 }, { "clip_ratio/high_max": 0.0015661860970794805, "clip_ratio/high_mean": 0.0005118552935527987, "clip_ratio/low_mean": 0.0003541565188243112, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008660118146508466, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 3569.0, "completions/mean_length": 640.6038208007812, "completions/mean_terminated_length": 601.6038818359375, "completions/min_length": 73.0, "completions/min_terminated_length": 73.0, "epoch": 2.111986001749781, "grad_norm": 0.1279296875, "learning_rate": 1e-06, "loss": -0.0019, "num_tokens": 131318976.0, "reward": 0.5502232313156128, "reward_std": 0.2358168661594391, "rewards/verify_math_reward/mean": 0.5502232313156128, "rewards/verify_math_reward/std": 0.49774909019470215, "step": 226 }, { "clip_ratio/high_max": 0.001513285938926856, "clip_ratio/high_mean": 0.00047211629862431437, "clip_ratio/low_mean": 0.0002881199313833349, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007602362429679488, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3505.0, "completions/mean_length": 618.193115234375, "completions/mean_terminated_length": 554.960205078125, "completions/min_length": 110.0, "completions/min_terminated_length": 110.0, "epoch": 2.121318168562263, "grad_norm": 0.13671875, "learning_rate": 1e-06, "loss": 0.0049, "num_tokens": 131890093.0, "reward": 0.6149553656578064, "reward_std": 0.2159428745508194, "rewards/verify_math_reward/mean": 0.6149553656578064, "rewards/verify_math_reward/std": 0.4868776500225067, "step": 227 }, { "clip_ratio/high_max": 0.0015444342507180409, "clip_ratio/high_mean": 0.0004563076370232011, "clip_ratio/low_mean": 0.0003338523467846244, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.000790159988355299, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 3884.0, "completions/mean_length": 602.9866333007812, "completions/mean_terminated_length": 563.5620727539062, "completions/min_length": 39.0, "completions/min_terminated_length": 39.0, "epoch": 2.130650335374745, "grad_norm": 0.12890625, "learning_rate": 1e-06, "loss": 0.0007, "num_tokens": 132477817.0, "reward": 0.5491071939468384, "reward_std": 0.21147526800632477, "rewards/verify_math_reward/mean": 0.5491071343421936, "rewards/verify_math_reward/std": 0.49786055088043213, "step": 228 }, { "clip_ratio/high_max": 0.0015167887777352007, "clip_ratio/high_mean": 0.00043480312615429284, "clip_ratio/low_mean": 0.0002541928790833481, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.000688996006601883, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3922.0, "completions/mean_length": 672.4877319335938, "completions/mean_terminated_length": 614.1986694335938, "completions/min_length": 172.0, "completions/min_terminated_length": 172.0, "epoch": 2.1399825021872267, "grad_norm": 0.1142578125, "learning_rate": 1e-06, "loss": -0.0068, "num_tokens": 133113862.0, "reward": 0.5223214626312256, "reward_std": 0.187350794672966, "rewards/verify_math_reward/mean": 0.5223214030265808, "rewards/verify_math_reward/std": 0.49978047609329224, "step": 229 }, { "clip_ratio/high_max": 0.0012672489256146946, "clip_ratio/high_mean": 0.0003684084707629154, "clip_ratio/low_mean": 0.0003440370865064324, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007124455569282873, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3863.0, "completions/mean_length": 586.4453125, "completions/mean_terminated_length": 534.7757568359375, "completions/min_length": 47.0, "completions/min_terminated_length": 47.0, "epoch": 2.1493146689997085, "grad_norm": 0.1220703125, "learning_rate": 1e-06, "loss": 0.0011, "num_tokens": 133671661.0, "reward": 0.5691964626312256, "reward_std": 0.20316441357135773, "rewards/verify_math_reward/mean": 0.5691964030265808, "rewards/verify_math_reward/std": 0.4954652488231659, "step": 230 }, { "clip_ratio/high_max": 0.0013330569227036904, "clip_ratio/high_mean": 0.00041278249295828573, "clip_ratio/low_mean": 0.00036238569771285256, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007751681851004832, "completions/clipped_ratio": 0.006696428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 2981.0, "completions/mean_length": 569.5982666015625, "completions/mean_terminated_length": 545.82470703125, "completions/min_length": 87.0, "completions/min_terminated_length": 87.0, "epoch": 2.15864683581219, "grad_norm": 0.130859375, "learning_rate": 1e-06, "loss": 0.0034, "num_tokens": 134247181.0, "reward": 0.535714328289032, "reward_std": 0.2050095647573471, "rewards/verify_math_reward/mean": 0.5357142686843872, "rewards/verify_math_reward/std": 0.4990014135837555, "step": 231 }, { "clip_ratio/high_max": 0.001488387457357021, "clip_ratio/high_mean": 0.00040857345675249235, "clip_ratio/low_mean": 0.0003010989770473316, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007096724366419949, "completions/clipped_ratio": 0.0267857142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3861.0, "completions/mean_length": 704.6707763671875, "completions/mean_terminated_length": 611.3314208984375, "completions/min_length": 129.0, "completions/min_terminated_length": 129.0, "epoch": 2.167979002624672, "grad_norm": 0.123046875, "learning_rate": 1e-06, "loss": 0.0136, "num_tokens": 134870638.0, "reward": 0.5, "reward_std": 0.20538821816444397, "rewards/verify_math_reward/mean": 0.5, "rewards/verify_math_reward/std": 0.5002792477607727, "step": 232 }, { "clip_ratio/high_max": 0.0013531004747164843, "clip_ratio/high_mean": 0.0004159661771154788, "clip_ratio/low_mean": 0.00030782140515839274, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007237875747705402, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 3693.0, "completions/mean_length": 649.9085083007812, "completions/mean_terminated_length": 607.0757446289062, "completions/min_length": 38.0, "completions/min_terminated_length": 38.0, "epoch": 2.1773111694371536, "grad_norm": 0.1181640625, "learning_rate": 1e-06, "loss": -0.0005, "num_tokens": 135499164.0, "reward": 0.5256696939468384, "reward_std": 0.20936980843544006, "rewards/verify_math_reward/mean": 0.5256696343421936, "rewards/verify_math_reward/std": 0.4996195137500763, "step": 233 }, { "clip_ratio/high_max": 0.0016234310169238597, "clip_ratio/high_mean": 0.0005124375156810856, "clip_ratio/low_mean": 0.0003589206861533967, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008713581937627168, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3995.0, "completions/mean_length": 589.177490234375, "completions/mean_terminated_length": 541.5735473632812, "completions/min_length": 112.0, "completions/min_terminated_length": 112.0, "epoch": 2.1866433362496354, "grad_norm": 0.13671875, "learning_rate": 1e-06, "loss": 0.0045, "num_tokens": 136085179.0, "reward": 0.4810267984867096, "reward_std": 0.2249252200126648, "rewards/verify_math_reward/mean": 0.4810267984867096, "rewards/verify_math_reward/std": 0.49991899728775024, "step": 234 }, { "clip_ratio/high_max": 0.001494391443884524, "clip_ratio/high_mean": 0.0004722534943084611, "clip_ratio/low_mean": 0.0003999232769729133, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008721767658244062, "completions/clipped_ratio": 0.010044642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 4083.0, "completions/mean_length": 568.8326416015625, "completions/mean_terminated_length": 533.0439453125, "completions/min_length": 114.0, "completions/min_terminated_length": 114.0, "epoch": 2.195975503062117, "grad_norm": 0.1396484375, "learning_rate": 1e-06, "loss": -0.0012, "num_tokens": 136645285.0, "reward": 0.574776828289032, "reward_std": 0.22564129531383514, "rewards/verify_math_reward/mean": 0.5747767686843872, "rewards/verify_math_reward/std": 0.49465295672416687, "step": 235 }, { "clip_ratio/high_max": 0.001773574347680551, "clip_ratio/high_mean": 0.0005579647352078609, "clip_ratio/low_mean": 0.0003477517964256549, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009057165216290741, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 2375.0, "completions/mean_length": 592.9721069335938, "completions/mean_terminated_length": 537.3684692382812, "completions/min_length": 83.0, "completions/min_terminated_length": 83.0, "epoch": 2.205307669874599, "grad_norm": 0.140625, "learning_rate": 1e-06, "loss": 0.0062, "num_tokens": 137208676.0, "reward": 0.5803571939468384, "reward_std": 0.22774562239646912, "rewards/verify_math_reward/mean": 0.5803571343421936, "rewards/verify_math_reward/std": 0.4937761425971985, "step": 236 }, { "clip_ratio/high_max": 0.0015008118916739477, "clip_ratio/high_mean": 0.0004158200515576027, "clip_ratio/low_mean": 0.0003367571597436836, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007525772120970942, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 2849.0, "completions/mean_length": 594.4631958007812, "completions/mean_terminated_length": 550.9412841796875, "completions/min_length": 120.0, "completions/min_terminated_length": 120.0, "epoch": 2.214639836687081, "grad_norm": 0.1376953125, "learning_rate": 1e-06, "loss": 0.0075, "num_tokens": 137779667.0, "reward": 0.5178571939468384, "reward_std": 0.2336026430130005, "rewards/verify_math_reward/mean": 0.5178571343421936, "rewards/verify_math_reward/std": 0.4999600946903229, "step": 237 }, { "clip_ratio/high_max": 0.0015419518131238874, "clip_ratio/high_mean": 0.0004725132537259924, "clip_ratio/low_mean": 0.0002834307499597344, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007559440082332003, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 3808.0, "completions/mean_length": 602.0223388671875, "completions/mean_terminated_length": 558.5943603515625, "completions/min_length": 105.0, "completions/min_terminated_length": 105.0, "epoch": 2.2239720034995627, "grad_norm": 0.115234375, "learning_rate": 1e-06, "loss": -0.0181, "num_tokens": 138355759.0, "reward": 0.5379464626312256, "reward_std": 0.19572414457798004, "rewards/verify_math_reward/mean": 0.5379464030265808, "rewards/verify_math_reward/std": 0.4988364577293396, "step": 238 }, { "clip_ratio/high_max": 0.0018368209566688165, "clip_ratio/high_mean": 0.0006160234323715486, "clip_ratio/low_mean": 0.0002864181915356312, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009024416303873295, "completions/clipped_ratio": 0.0200892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3801.0, "completions/mean_length": 600.765625, "completions/mean_terminated_length": 529.109375, "completions/min_length": 18.0, "completions/min_terminated_length": 18.0, "epoch": 2.2333041703120444, "grad_norm": 0.1376953125, "learning_rate": 1e-06, "loss": 0.0158, "num_tokens": 138911509.0, "reward": 0.5636160969734192, "reward_std": 0.22559921443462372, "rewards/verify_math_reward/mean": 0.5636160969734192, "rewards/verify_math_reward/std": 0.49621346592903137, "step": 239 }, { "clip_ratio/high_max": 0.0012299857717152918, "clip_ratio/high_mean": 0.00033535454531374853, "clip_ratio/low_mean": 0.00033555841582710855, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0006709129556838889, "completions/clipped_ratio": 0.022321428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3514.0, "completions/mean_length": 633.8582763671875, "completions/mean_terminated_length": 554.8139038085938, "completions/min_length": 4.0, "completions/min_terminated_length": 4.0, "epoch": 2.242636337124526, "grad_norm": 0.11572265625, "learning_rate": 1e-06, "loss": 0.0057, "num_tokens": 139472670.0, "reward": 0.606026828289032, "reward_std": 0.19080783426761627, "rewards/verify_math_reward/mean": 0.6060267686843872, "rewards/verify_math_reward/std": 0.48890194296836853, "step": 240 }, { "clip_ratio/high_max": 0.0015930078670862713, "clip_ratio/high_mean": 0.0005159634893061593, "clip_ratio/low_mean": 0.00026747419946104856, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007834376783648622, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 2474.0, "completions/mean_length": 593.732177734375, "completions/mean_terminated_length": 550.201171875, "completions/min_length": 107.0, "completions/min_terminated_length": 107.0, "epoch": 2.251968503937008, "grad_norm": 0.126953125, "learning_rate": 1e-06, "loss": 0.0022, "num_tokens": 140048078.0, "reward": 0.5401785969734192, "reward_std": 0.2112131267786026, "rewards/verify_math_reward/mean": 0.5401785969734192, "rewards/verify_math_reward/std": 0.49866142868995667, "step": 241 }, { "clip_ratio/high_max": 0.0018969822485814802, "clip_ratio/high_mean": 0.0005742547214140359, "clip_ratio/low_mean": 0.0002685978399767919, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008428525607087067, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 2051.0, "completions/mean_length": 595.7935791015625, "completions/mean_terminated_length": 552.2881469726562, "completions/min_length": 47.0, "completions/min_terminated_length": 47.0, "epoch": 2.2613006707494896, "grad_norm": 0.14453125, "learning_rate": 1e-06, "loss": -0.0078, "num_tokens": 140616821.0, "reward": 0.5948660969734192, "reward_std": 0.23067595064640045, "rewards/verify_math_reward/mean": 0.5948660969734192, "rewards/verify_math_reward/std": 0.49119213223457336, "step": 242 }, { "clip_ratio/high_max": 0.0016339275844075019, "clip_ratio/high_mean": 0.0005235973521848791, "clip_ratio/low_mean": 0.0003519884385241312, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008755857907090103, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 2441.0, "completions/mean_length": 586.5245971679688, "completions/mean_terminated_length": 526.7718505859375, "completions/min_length": 118.0, "completions/min_terminated_length": 118.0, "epoch": 2.2706328375619713, "grad_norm": 0.13671875, "learning_rate": 1e-06, "loss": -0.0091, "num_tokens": 141166563.0, "reward": 0.5189732313156128, "reward_std": 0.22890609502792358, "rewards/verify_math_reward/mean": 0.5189732313156128, "rewards/verify_math_reward/std": 0.49991893768310547, "step": 243 }, { "clip_ratio/high_max": 0.0019044924256377271, "clip_ratio/high_mean": 0.0005128569018779672, "clip_ratio/low_mean": 0.00028219371199611487, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007950506101224164, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3243.0, "completions/mean_length": 552.2421875, "completions/mean_terminated_length": 491.90582275390625, "completions/min_length": 124.0, "completions/min_terminated_length": 124.0, "epoch": 2.279965004374453, "grad_norm": 0.1416015625, "learning_rate": 1e-06, "loss": -0.0058, "num_tokens": 141676996.0, "reward": 0.6205357313156128, "reward_std": 0.1829923838376999, "rewards/verify_math_reward/mean": 0.6205357313156128, "rewards/verify_math_reward/std": 0.4855247139930725, "step": 244 }, { "clip_ratio/high_max": 0.001306582753386465, "clip_ratio/high_mean": 0.0003661511698282993, "clip_ratio/low_mean": 0.0003034111775832571, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0006695623519590299, "completions/clipped_ratio": 0.022321428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 4086.0, "completions/mean_length": 645.771240234375, "completions/mean_terminated_length": 566.9988403320312, "completions/min_length": 18.0, "completions/min_terminated_length": 18.0, "epoch": 2.289297171186935, "grad_norm": 0.11669921875, "learning_rate": 1e-06, "loss": -0.0224, "num_tokens": 142256943.0, "reward": 0.5613839626312256, "reward_std": 0.1900942623615265, "rewards/verify_math_reward/mean": 0.5613839030265808, "rewards/verify_math_reward/std": 0.496494859457016, "step": 245 }, { "clip_ratio/high_max": 0.0014559138498952962, "clip_ratio/high_mean": 0.0004845738667427213, "clip_ratio/low_mean": 0.0004119984905628371, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008965723764049471, "completions/clipped_ratio": 0.010044642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3074.0, "completions/mean_length": 608.5402221679688, "completions/mean_terminated_length": 573.1544189453125, "completions/min_length": 30.0, "completions/min_terminated_length": 30.0, "epoch": 2.298629337999417, "grad_norm": 0.1337890625, "learning_rate": 1e-06, "loss": 0.002, "num_tokens": 142858323.0, "reward": 0.4743303656578064, "reward_std": 0.2416456937789917, "rewards/verify_math_reward/mean": 0.4743303656578064, "rewards/verify_math_reward/std": 0.4996195435523987, "step": 246 }, { "clip_ratio/high_max": 0.0017494164385425393, "clip_ratio/high_mean": 0.0005041158133280987, "clip_ratio/low_mean": 0.0003492841861998386, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008534000066902081, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 2547.0, "completions/mean_length": 605.7689819335938, "completions/mean_terminated_length": 558.3903198242188, "completions/min_length": 109.0, "completions/min_terminated_length": 109.0, "epoch": 2.3079615048118987, "grad_norm": 0.130859375, "learning_rate": 1e-06, "loss": 0.0111, "num_tokens": 143447628.0, "reward": 0.5055803656578064, "reward_std": 0.20888377726078033, "rewards/verify_math_reward/mean": 0.5055803656578064, "rewards/verify_math_reward/std": 0.5002480745315552, "step": 247 }, { "clip_ratio/high_max": 0.00194004879995191, "clip_ratio/high_mean": 0.0005720839756122587, "clip_ratio/low_mean": 0.00037973327562212944, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.000951817240093078, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3652.0, "completions/mean_length": 591.685302734375, "completions/mean_terminated_length": 532.0204467773438, "completions/min_length": 20.0, "completions/min_terminated_length": 20.0, "epoch": 2.3172936716243804, "grad_norm": 0.1494140625, "learning_rate": 1e-06, "loss": 0.0042, "num_tokens": 144007338.0, "reward": 0.5412946939468384, "reward_std": 0.24750901758670807, "rewards/verify_math_reward/mean": 0.5412946343421936, "rewards/verify_math_reward/std": 0.49857014417648315, "step": 248 }, { "clip_ratio/high_max": 0.0018254909682582365, "clip_ratio/high_mean": 0.0005285317274683621, "clip_ratio/low_mean": 0.00025506711529033055, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007835988594706578, "completions/clipped_ratio": 0.0200892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3725.0, "completions/mean_length": 648.9799194335938, "completions/mean_terminated_length": 578.3120727539062, "completions/min_length": 14.0, "completions/min_terminated_length": 14.0, "epoch": 2.326625838436862, "grad_norm": 0.12890625, "learning_rate": 1e-06, "loss": -0.0083, "num_tokens": 144605752.0, "reward": 0.5401785969734192, "reward_std": 0.21958816051483154, "rewards/verify_math_reward/mean": 0.5401785969734192, "rewards/verify_math_reward/std": 0.49866142868995667, "step": 249 }, { "clip_ratio/high_max": 0.0017700926309771603, "clip_ratio/high_mean": 0.0006402511294254509, "clip_ratio/low_mean": 0.0003122798659660475, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009525309833406936, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3511.0, "completions/mean_length": 592.958740234375, "completions/mean_terminated_length": 537.3548583984375, "completions/min_length": 60.0, "completions/min_terminated_length": 60.0, "epoch": 2.335958005249344, "grad_norm": 0.1376953125, "learning_rate": 1e-06, "loss": -0.0033, "num_tokens": 145162763.0, "reward": 0.5993303656578064, "reward_std": 0.24664321541786194, "rewards/verify_math_reward/mean": 0.5993303656578064, "rewards/verify_math_reward/std": 0.49030786752700806, "step": 250 }, { "clip_ratio/high_max": 0.0014909809515302186, "clip_ratio/high_mean": 0.0004996945815491927, "clip_ratio/low_mean": 0.0003681553966998763, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008678499707457377, "completions/clipped_ratio": 0.010044642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 2830.0, "completions/mean_length": 558.4620971679688, "completions/mean_terminated_length": 522.5681762695312, "completions/min_length": 4.0, "completions/min_terminated_length": 4.0, "epoch": 2.3452901720618256, "grad_norm": 0.14453125, "learning_rate": 1e-06, "loss": -0.0081, "num_tokens": 145718121.0, "reward": 0.520089328289032, "reward_std": 0.22703349590301514, "rewards/verify_math_reward/mean": 0.5200892686843872, "rewards/verify_math_reward/std": 0.4998753070831299, "step": 251 }, { "clip_ratio/high_max": 0.0018064291252812836, "clip_ratio/high_mean": 0.0005305278225478105, "clip_ratio/low_mean": 0.00040231651246358524, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009328443215963489, "completions/clipped_ratio": 0.010044642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 2810.0, "completions/mean_length": 547.921875, "completions/mean_terminated_length": 511.9210510253906, "completions/min_length": 76.0, "completions/min_terminated_length": 76.0, "epoch": 2.3546223388743073, "grad_norm": 0.14453125, "learning_rate": 1e-06, "loss": 0.0159, "num_tokens": 146257523.0, "reward": 0.5368303656578064, "reward_std": 0.22950340807437897, "rewards/verify_math_reward/mean": 0.5368303656578064, "rewards/verify_math_reward/std": 0.49892017245292664, "step": 252 }, { "clip_ratio/high_max": 0.0017361845375489793, "clip_ratio/high_mean": 0.0005629460900991035, "clip_ratio/low_mean": 0.00030345223558470025, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008663983217047644, "completions/clipped_ratio": 0.0200892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3486.0, "completions/mean_length": 627.9699096679688, "completions/mean_terminated_length": 556.871337890625, "completions/min_length": 16.0, "completions/min_terminated_length": 16.0, "epoch": 2.363954505686789, "grad_norm": 0.1279296875, "learning_rate": 1e-06, "loss": 0.0082, "num_tokens": 146831592.0, "reward": 0.582589328289032, "reward_std": 0.20125621557235718, "rewards/verify_math_reward/mean": 0.5825892686843872, "rewards/verify_math_reward/std": 0.4934072494506836, "step": 253 }, { "clip_ratio/high_max": 0.0014099233130764333, "clip_ratio/high_mean": 0.000422317779793957, "clip_ratio/low_mean": 0.00035918456478611915, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007815023473085603, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3229.0, "completions/mean_length": 655.5703125, "completions/mean_terminated_length": 596.9932250976562, "completions/min_length": 168.0, "completions/min_terminated_length": 168.0, "epoch": 2.3732866724992707, "grad_norm": 0.11669921875, "learning_rate": 1e-06, "loss": -0.0006, "num_tokens": 147439999.0, "reward": 0.4799107313156128, "reward_std": 0.22184516489505768, "rewards/verify_math_reward/mean": 0.4799107015132904, "rewards/verify_math_reward/std": 0.4998753070831299, "step": 254 }, { "clip_ratio/high_max": 0.00166749171421543, "clip_ratio/high_mean": 0.0005249900411854469, "clip_ratio/low_mean": 0.000330110839399822, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008551008886570344, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 2655.0, "completions/mean_length": 592.224365234375, "completions/mean_terminated_length": 532.5687255859375, "completions/min_length": 25.0, "completions/min_terminated_length": 25.0, "epoch": 2.382618839311753, "grad_norm": 0.1328125, "learning_rate": 1e-06, "loss": 0.0055, "num_tokens": 147994224.0, "reward": 0.6082589626312256, "reward_std": 0.20287340879440308, "rewards/verify_math_reward/mean": 0.6082589030265808, "rewards/verify_math_reward/std": 0.4884119927883148, "step": 255 }, { "clip_ratio/high_max": 0.0015114786774574895, "clip_ratio/high_mean": 0.0004187021561392612, "clip_ratio/low_mean": 0.0003389281773706898, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007576303396490403, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 4004.0, "completions/mean_length": 636.8147583007812, "completions/mean_terminated_length": 593.8192138671875, "completions/min_length": 27.0, "completions/min_terminated_length": 27.0, "epoch": 2.3919510061242346, "grad_norm": 0.12060546875, "learning_rate": 1e-06, "loss": 0.0091, "num_tokens": 148613450.0, "reward": 0.520089328289032, "reward_std": 0.216995969414711, "rewards/verify_math_reward/mean": 0.5200892686843872, "rewards/verify_math_reward/std": 0.4998753070831299, "step": 256 }, { "clip_ratio/high_max": 0.0016861584958860476, "clip_ratio/high_mean": 0.000553906721279418, "clip_ratio/low_mean": 0.00030574112531667197, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008596478355684667, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 3557.0, "completions/mean_length": 609.6261596679688, "completions/mean_terminated_length": 570.2765502929688, "completions/min_length": 89.0, "completions/min_terminated_length": 89.0, "epoch": 2.4012831729367163, "grad_norm": 0.140625, "learning_rate": 1e-06, "loss": 0.0059, "num_tokens": 149207227.0, "reward": 0.5580357313156128, "reward_std": 0.22966887056827545, "rewards/verify_math_reward/mean": 0.5580357313156128, "rewards/verify_math_reward/std": 0.49689778685569763, "step": 257 }, { "clip_ratio/high_max": 0.0018101280911650974, "clip_ratio/high_mean": 0.000577292491470871, "clip_ratio/low_mean": 0.0003695304706070601, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009468229518461158, "completions/clipped_ratio": 0.006696428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 2347.0, "completions/mean_length": 548.0592041015625, "completions/mean_terminated_length": 524.1404418945312, "completions/min_length": 69.0, "completions/min_terminated_length": 69.0, "epoch": 2.410615339749198, "grad_norm": 0.1494140625, "learning_rate": 1e-06, "loss": 0.0051, "num_tokens": 149758616.0, "reward": 0.5814732313156128, "reward_std": 0.25389915704727173, "rewards/verify_math_reward/mean": 0.5814732313156128, "rewards/verify_math_reward/std": 0.4935929775238037, "step": 258 }, { "clip_ratio/high_max": 0.00144162098695233, "clip_ratio/high_mean": 0.00045632900082637207, "clip_ratio/low_mean": 0.0002676564937473813, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007239855031002662, "completions/clipped_ratio": 0.0078125, "completions/max_length": 4096.0, "completions/max_terminated_length": 4021.0, "completions/mean_length": 563.9475708007812, "completions/mean_terminated_length": 536.1361083984375, "completions/min_length": 157.0, "completions/min_terminated_length": 157.0, "epoch": 2.41994750656168, "grad_norm": 0.130859375, "learning_rate": 1e-06, "loss": 0.0117, "num_tokens": 150326777.0, "reward": 0.5959821939468384, "reward_std": 0.19208095967769623, "rewards/verify_math_reward/mean": 0.5959821343421936, "rewards/verify_math_reward/std": 0.490975022315979, "step": 259 }, { "clip_ratio/high_max": 0.001589305682500708, "clip_ratio/high_mean": 0.0004874247865700454, "clip_ratio/low_mean": 0.0003013444198813886, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007887692086114839, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 2764.0, "completions/mean_length": 617.9017944335938, "completions/mean_terminated_length": 554.6636352539062, "completions/min_length": 112.0, "completions/min_terminated_length": 112.0, "epoch": 2.4292796733741615, "grad_norm": 0.1318359375, "learning_rate": 1e-06, "loss": -0.0193, "num_tokens": 150906513.0, "reward": 0.4988839626312256, "reward_std": 0.22274348139762878, "rewards/verify_math_reward/mean": 0.4988839328289032, "rewards/verify_math_reward/std": 0.5002779960632324, "step": 260 }, { "clip_ratio/high_max": 0.0017504588249721564, "clip_ratio/high_mean": 0.0004910838752039126, "clip_ratio/low_mean": 0.0002576393908384489, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007487232687708456, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 4012.0, "completions/mean_length": 596.943115234375, "completions/mean_terminated_length": 557.4503784179688, "completions/min_length": 94.0, "completions/min_terminated_length": 94.0, "epoch": 2.4386118401866432, "grad_norm": 0.1318359375, "learning_rate": 1e-06, "loss": 0.0071, "num_tokens": 151495694.0, "reward": 0.5558035969734192, "reward_std": 0.20238415896892548, "rewards/verify_math_reward/mean": 0.5558035969734192, "rewards/verify_math_reward/std": 0.49715372920036316, "step": 261 }, { "clip_ratio/high_max": 0.0016746958863222972, "clip_ratio/high_mean": 0.0004377821796879289, "clip_ratio/low_mean": 0.00027589590604293335, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007136780805012677, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 3955.0, "completions/mean_length": 592.3359375, "completions/mean_terminated_length": 548.78759765625, "completions/min_length": 3.0, "completions/min_terminated_length": 3.0, "epoch": 2.447944006999125, "grad_norm": 0.12255859375, "learning_rate": 1e-06, "loss": -0.0009, "num_tokens": 152070211.0, "reward": 0.520089328289032, "reward_std": 0.20230887830257416, "rewards/verify_math_reward/mean": 0.5200892686843872, "rewards/verify_math_reward/std": 0.4998753070831299, "step": 262 }, { "clip_ratio/high_max": 0.001553309908558731, "clip_ratio/high_mean": 0.0005235942248873471, "clip_ratio/low_mean": 0.0003422477983576755, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.000865842013809015, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3619.0, "completions/mean_length": 582.9564819335938, "completions/mean_terminated_length": 531.2355346679688, "completions/min_length": 78.0, "completions/min_terminated_length": 78.0, "epoch": 2.457276173811607, "grad_norm": 0.140625, "learning_rate": 1e-06, "loss": 0.0148, "num_tokens": 152624100.0, "reward": 0.5736607313156128, "reward_std": 0.20415011048316956, "rewards/verify_math_reward/mean": 0.5736607313156128, "rewards/verify_math_reward/std": 0.4948205351829529, "step": 263 }, { "clip_ratio/high_max": 0.0015362589047072106, "clip_ratio/high_mean": 0.0004781124704322792, "clip_ratio/low_mean": 0.0003200235181566313, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007981359863151738, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3408.0, "completions/mean_length": 593.8839721679688, "completions/mean_terminated_length": 546.3439331054688, "completions/min_length": 115.0, "completions/min_terminated_length": 115.0, "epoch": 2.466608340624089, "grad_norm": 0.142578125, "learning_rate": 1e-06, "loss": -0.0007, "num_tokens": 153201300.0, "reward": 0.5379464626312256, "reward_std": 0.2363481968641281, "rewards/verify_math_reward/mean": 0.5379464030265808, "rewards/verify_math_reward/std": 0.4988364577293396, "step": 264 }, { "clip_ratio/high_max": 0.0016864011940924684, "clip_ratio/high_mean": 0.0005238697140157456, "clip_ratio/low_mean": 0.00035080927580111165, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008746790044824593, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 4062.0, "completions/mean_length": 624.6004638671875, "completions/mean_terminated_length": 565.4960327148438, "completions/min_length": 24.0, "completions/min_terminated_length": 24.0, "epoch": 2.4759405074365706, "grad_norm": 0.130859375, "learning_rate": 1e-06, "loss": 0.0091, "num_tokens": 153788246.0, "reward": 0.546875, "reward_std": 0.20685499906539917, "rewards/verify_math_reward/mean": 0.546875, "rewards/verify_math_reward/std": 0.4980759024620056, "step": 265 }, { "clip_ratio/high_max": 0.0018087032476614695, "clip_ratio/high_mean": 0.0006073099975765217, "clip_ratio/low_mean": 0.00028049693025877787, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008878069238562603, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 2868.0, "completions/mean_length": 583.0424194335938, "completions/mean_terminated_length": 539.3785400390625, "completions/min_length": 4.0, "completions/min_terminated_length": 4.0, "epoch": 2.4852726742490523, "grad_norm": 0.1376953125, "learning_rate": 1e-06, "loss": 0.0251, "num_tokens": 154348940.0, "reward": 0.6116071939468384, "reward_std": 0.23770253360271454, "rewards/verify_math_reward/mean": 0.6116071343421936, "rewards/verify_math_reward/std": 0.48765692114830017, "step": 266 }, { "clip_ratio/high_max": 0.0017296092755714199, "clip_ratio/high_mean": 0.0006053657261873013, "clip_ratio/low_mean": 0.0003528901040681376, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009582558423062437, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3858.0, "completions/mean_length": 619.489990234375, "completions/mean_terminated_length": 568.306884765625, "completions/min_length": 4.0, "completions/min_terminated_length": 4.0, "epoch": 2.494604841061534, "grad_norm": 0.134765625, "learning_rate": 1e-06, "loss": -0.0018, "num_tokens": 154936963.0, "reward": 0.5424107313156128, "reward_std": 0.2424006164073944, "rewards/verify_math_reward/mean": 0.5424107313156128, "rewards/verify_math_reward/std": 0.4984763562679291, "step": 267 }, { "clip_ratio/high_max": 0.0015104042849998223, "clip_ratio/high_mean": 0.0004024213969842094, "clip_ratio/low_mean": 0.00026178848742119953, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0006642098906013416, "completions/clipped_ratio": 0.0189732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 4018.0, "completions/mean_length": 668.2701416015625, "completions/mean_terminated_length": 601.9772338867188, "completions/min_length": 135.0, "completions/min_terminated_length": 135.0, "epoch": 2.5039370078740157, "grad_norm": 0.12255859375, "learning_rate": 1e-06, "loss": -0.0003, "num_tokens": 155565077.0, "reward": 0.5145089626312256, "reward_std": 0.19558146595954895, "rewards/verify_math_reward/mean": 0.5145089030265808, "rewards/verify_math_reward/std": 0.5000685453414917, "step": 268 }, { "clip_ratio/high_max": 0.0015643926399206975, "clip_ratio/high_mean": 0.0005585485578194493, "clip_ratio/low_mean": 0.0002530500935336022, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.000811598647487699, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 3530.0, "completions/mean_length": 594.646240234375, "completions/mean_terminated_length": 551.1265869140625, "completions/min_length": 77.0, "completions/min_terminated_length": 77.0, "epoch": 2.5132691746864975, "grad_norm": 0.1435546875, "learning_rate": 1e-06, "loss": 0.0055, "num_tokens": 156137664.0, "reward": 0.5881696939468384, "reward_std": 0.22582674026489258, "rewards/verify_math_reward/mean": 0.5881696343421936, "rewards/verify_math_reward/std": 0.4924395978450775, "step": 269 }, { "clip_ratio/high_max": 0.0015179792881099274, "clip_ratio/high_mean": 0.0004894791999277004, "clip_ratio/low_mean": 0.0003458185989302365, "clip_ratio/low_min": 1.2322555448918138e-05, "clip_ratio/region_mean": 0.0008352977865797584, "completions/clipped_ratio": 0.010044642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3682.0, "completions/mean_length": 581.9085083007812, "completions/mean_terminated_length": 546.2525024414062, "completions/min_length": 76.0, "completions/min_terminated_length": 76.0, "epoch": 2.522601341498979, "grad_norm": 0.1455078125, "learning_rate": 1e-06, "loss": -0.0098, "num_tokens": 156715966.0, "reward": 0.5491071939468384, "reward_std": 0.22838753461837769, "rewards/verify_math_reward/mean": 0.5491071343421936, "rewards/verify_math_reward/std": 0.49786055088043213, "step": 270 }, { "clip_ratio/high_max": 0.001546673691336764, "clip_ratio/high_mean": 0.0004874001408552431, "clip_ratio/low_mean": 0.0003322237616885104, "clip_ratio/low_min": 9.648039849707857e-06, "clip_ratio/region_mean": 0.0008196238959499169, "completions/clipped_ratio": 0.010044642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3253.0, "completions/mean_length": 640.7533569335938, "completions/mean_terminated_length": 605.6944580078125, "completions/min_length": 102.0, "completions/min_terminated_length": 102.0, "epoch": 2.531933508311461, "grad_norm": 0.12158203125, "learning_rate": 1e-06, "loss": -0.0059, "num_tokens": 157364217.0, "reward": 0.4966517984867096, "reward_std": 0.21767067909240723, "rewards/verify_math_reward/mean": 0.4966517984867096, "rewards/verify_math_reward/std": 0.5002680420875549, "step": 271 }, { "clip_ratio/high_max": 0.0017599499005882535, "clip_ratio/high_mean": 0.0005300088896547095, "clip_ratio/low_mean": 0.00037613722565765784, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009061461050805519, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3020.0, "completions/mean_length": 600.2355346679688, "completions/mean_terminated_length": 548.7689819335938, "completions/min_length": 136.0, "completions/min_terminated_length": 136.0, "epoch": 2.5412656751239426, "grad_norm": 0.134765625, "learning_rate": 1e-06, "loss": -0.0015, "num_tokens": 157943868.0, "reward": 0.5580357313156128, "reward_std": 0.2154243439435959, "rewards/verify_math_reward/mean": 0.5580357313156128, "rewards/verify_math_reward/std": 0.49689778685569763, "step": 272 }, { "clip_ratio/high_max": 0.0017446381225454388, "clip_ratio/high_mean": 0.0005404869284575398, "clip_ratio/low_mean": 0.00038403678422582743, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009245237024515518, "completions/clipped_ratio": 0.010044642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 2131.0, "completions/mean_length": 592.7109375, "completions/mean_terminated_length": 557.16455078125, "completions/min_length": 4.0, "completions/min_terminated_length": 4.0, "epoch": 2.5505978419364244, "grad_norm": 0.1337890625, "learning_rate": 1e-06, "loss": 0.0117, "num_tokens": 158520209.0, "reward": 0.5502232313156128, "reward_std": 0.25667428970336914, "rewards/verify_math_reward/mean": 0.5502232313156128, "rewards/verify_math_reward/std": 0.49774909019470215, "step": 273 }, { "clip_ratio/high_max": 0.0017314323304162826, "clip_ratio/high_mean": 0.000502947056247649, "clip_ratio/low_mean": 0.0003295998160410818, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008325468934344826, "completions/clipped_ratio": 0.0323660714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3852.0, "completions/mean_length": 747.3739013671875, "completions/mean_terminated_length": 635.3667602539062, "completions/min_length": 96.0, "completions/min_terminated_length": 96.0, "epoch": 2.5599300087489065, "grad_norm": 0.125, "learning_rate": 1e-06, "loss": -0.0062, "num_tokens": 159155880.0, "reward": 0.4910714626312256, "reward_std": 0.22075042128562927, "rewards/verify_math_reward/mean": 0.4910714328289032, "rewards/verify_math_reward/std": 0.5001994967460632, "step": 274 }, { "clip_ratio/high_max": 0.0017812741334637394, "clip_ratio/high_mean": 0.0005633969503833214, "clip_ratio/low_mean": 0.0003263460660036799, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008897430261640693, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 2630.0, "completions/mean_length": 623.4408569335938, "completions/mean_terminated_length": 572.3159790039062, "completions/min_length": 113.0, "completions/min_terminated_length": 113.0, "epoch": 2.5692621755613883, "grad_norm": 0.1376953125, "learning_rate": 1e-06, "loss": 0.0221, "num_tokens": 159748315.0, "reward": 0.527901828289032, "reward_std": 0.24987341463565826, "rewards/verify_math_reward/mean": 0.5279017686843872, "rewards/verify_math_reward/std": 0.49949970841407776, "step": 275 }, { "clip_ratio/high_max": 0.0015368411059171194, "clip_ratio/high_mean": 0.0005303337151190135, "clip_ratio/low_mean": 0.00035316439902999264, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008834981335894554, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 2727.0, "completions/mean_length": 619.3694458007812, "completions/mean_terminated_length": 564.184814453125, "completions/min_length": 153.0, "completions/min_terminated_length": 153.0, "epoch": 2.57859434237387, "grad_norm": 0.1435546875, "learning_rate": 1e-06, "loss": 0.0162, "num_tokens": 160343198.0, "reward": 0.5792410969734192, "reward_std": 0.23514537513256073, "rewards/verify_math_reward/mean": 0.5792410969734192, "rewards/verify_math_reward/std": 0.49395665526390076, "step": 276 }, { "clip_ratio/high_max": 0.0014041623089724453, "clip_ratio/high_mean": 0.0003957954627367144, "clip_ratio/low_mean": 0.00030164567294832523, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0006974411371629685, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 2043.0, "completions/mean_length": 586.8873291015625, "completions/mean_terminated_length": 531.1870727539062, "completions/min_length": 29.0, "completions/min_terminated_length": 29.0, "epoch": 2.5879265091863517, "grad_norm": 0.126953125, "learning_rate": 1e-06, "loss": -0.0043, "num_tokens": 160898641.0, "reward": 0.5546875, "reward_std": 0.19204705953598022, "rewards/verify_math_reward/mean": 0.5546875, "rewards/verify_math_reward/std": 0.4972778558731079, "step": 277 }, { "clip_ratio/high_max": 0.0016594494354649214, "clip_ratio/high_mean": 0.0005165247798686323, "clip_ratio/low_mean": 0.0003543018701748224, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008708266668691067, "completions/clipped_ratio": 0.008928571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3373.0, "completions/mean_length": 611.4765625, "completions/mean_terminated_length": 580.08447265625, "completions/min_length": 7.0, "completions/min_terminated_length": 7.0, "epoch": 2.5972586759988334, "grad_norm": 0.1279296875, "learning_rate": 1e-06, "loss": -0.0113, "num_tokens": 161506348.0, "reward": 0.4910714626312256, "reward_std": 0.23747360706329346, "rewards/verify_math_reward/mean": 0.4910714328289032, "rewards/verify_math_reward/std": 0.5001994967460632, "step": 278 }, { "clip_ratio/high_max": 0.0018392898800811963, "clip_ratio/high_mean": 0.0004801133568435034, "clip_ratio/low_mean": 0.00031793066432328487, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007980440245773934, "completions/clipped_ratio": 0.0200892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3392.0, "completions/mean_length": 605.0592041015625, "completions/mean_terminated_length": 533.4909057617188, "completions/min_length": 11.0, "completions/min_terminated_length": 11.0, "epoch": 2.606590842811315, "grad_norm": 0.1396484375, "learning_rate": 1e-06, "loss": -0.0002, "num_tokens": 162061777.0, "reward": 0.5892857313156128, "reward_std": 0.21335633099079132, "rewards/verify_math_reward/mean": 0.5892857313156128, "rewards/verify_math_reward/std": 0.49223825335502625, "step": 279 }, { "clip_ratio/high_max": 0.00174964958387136, "clip_ratio/high_mean": 0.00047927262903613155, "clip_ratio/low_mean": 0.00037942975109217514, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.000858702378536691, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 3995.0, "completions/mean_length": 587.880615234375, "completions/mean_terminated_length": 548.2855834960938, "completions/min_length": 5.0, "completions/min_terminated_length": 5.0, "epoch": 2.615923009623797, "grad_norm": 0.125, "learning_rate": 1e-06, "loss": -0.0013, "num_tokens": 162631942.0, "reward": 0.515625, "reward_std": 0.2096005529165268, "rewards/verify_math_reward/mean": 0.515625, "rewards/verify_math_reward/std": 0.5000349283218384, "step": 280 }, { "clip_ratio/high_max": 0.001631348744922434, "clip_ratio/high_mean": 0.0004491139058018234, "clip_ratio/low_mean": 0.0002772787868252635, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007263926991072367, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3838.0, "completions/mean_length": 571.825927734375, "completions/mean_terminated_length": 515.8866577148438, "completions/min_length": 5.0, "completions/min_terminated_length": 5.0, "epoch": 2.625255176436279, "grad_norm": 0.12890625, "learning_rate": 1e-06, "loss": 0.0104, "num_tokens": 163167874.0, "reward": 0.6238839626312256, "reward_std": 0.21635474264621735, "rewards/verify_math_reward/mean": 0.6238839030265808, "rewards/verify_math_reward/std": 0.4846802353858948, "step": 281 }, { "clip_ratio/high_max": 0.0016738859203542233, "clip_ratio/high_mean": 0.0005313563602840077, "clip_ratio/low_mean": 0.00029245962582535867, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008238159780376009, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 2468.0, "completions/mean_length": 576.1864013671875, "completions/mean_terminated_length": 532.4373168945312, "completions/min_length": 3.0, "completions/min_terminated_length": 3.0, "epoch": 2.6345873432487608, "grad_norm": 0.130859375, "learning_rate": 1e-06, "loss": -0.0056, "num_tokens": 163723745.0, "reward": 0.613839328289032, "reward_std": 0.21819807589054108, "rewards/verify_math_reward/mean": 0.6138392686843872, "rewards/verify_math_reward/std": 0.48714008927345276, "step": 282 }, { "clip_ratio/high_max": 0.0019118020027235616, "clip_ratio/high_mean": 0.0005473132416682347, "clip_ratio/low_mean": 0.0004529294806161488, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010002427243307466, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3604.0, "completions/mean_length": 578.46875, "completions/mean_terminated_length": 526.6817626953125, "completions/min_length": 77.0, "completions/min_terminated_length": 77.0, "epoch": 2.6439195100612425, "grad_norm": 0.1513671875, "learning_rate": 1e-06, "loss": 0.0039, "num_tokens": 164269317.0, "reward": 0.5725446939468384, "reward_std": 0.2469463050365448, "rewards/verify_math_reward/mean": 0.5725446343421936, "rewards/verify_math_reward/std": 0.49498558044433594, "step": 283 }, { "clip_ratio/high_max": 0.0017921685775945662, "clip_ratio/high_mean": 0.0006251538120523037, "clip_ratio/low_mean": 0.0003910708369403437, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001016224638078711, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3993.0, "completions/mean_length": 572.0592041015625, "completions/mean_terminated_length": 520.1777954101562, "completions/min_length": 146.0, "completions/min_terminated_length": 146.0, "epoch": 2.653251676873724, "grad_norm": 0.146484375, "learning_rate": 1e-06, "loss": 0.0088, "num_tokens": 164823890.0, "reward": 0.5970982313156128, "reward_std": 0.2508198022842407, "rewards/verify_math_reward/mean": 0.5970982313156128, "rewards/verify_math_reward/std": 0.4907552897930145, "step": 284 }, { "clip_ratio/high_max": 0.0013512569712474942, "clip_ratio/high_mean": 0.00037087427836013376, "clip_ratio/low_mean": 0.00030325797808927746, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0006741322567904717, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 2597.0, "completions/mean_length": 607.693115234375, "completions/mean_terminated_length": 560.3405151367188, "completions/min_length": 75.0, "completions/min_terminated_length": 75.0, "epoch": 2.662583843686206, "grad_norm": 0.1220703125, "learning_rate": 1e-06, "loss": 0.0007, "num_tokens": 165408799.0, "reward": 0.5446428656578064, "reward_std": 0.1853991150856018, "rewards/verify_math_reward/mean": 0.5446428656578064, "rewards/verify_math_reward/std": 0.4982811510562897, "step": 285 }, { "clip_ratio/high_max": 0.0012537253724076436, "clip_ratio/high_mean": 0.0003354836278504081, "clip_ratio/low_mean": 0.00029683948127967597, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0006323231064015999, "completions/clipped_ratio": 0.0234375, "completions/max_length": 4096.0, "completions/max_terminated_length": 3307.0, "completions/mean_length": 684.5391235351562, "completions/mean_terminated_length": 602.6640014648438, "completions/min_length": 94.0, "completions/min_terminated_length": 94.0, "epoch": 2.6719160104986877, "grad_norm": 0.11962890625, "learning_rate": 1e-06, "loss": -0.0023, "num_tokens": 166025090.0, "reward": 0.4285714626312256, "reward_std": 0.2080891877412796, "rewards/verify_math_reward/mean": 0.4285714328289032, "rewards/verify_math_reward/std": 0.49514806270599365, "step": 286 }, { "clip_ratio/high_max": 0.0015210921355901519, "clip_ratio/high_mean": 0.0004264088308900682, "clip_ratio/low_mean": 0.0003292609922027623, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007556698219559621, "completions/clipped_ratio": 0.0200892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3677.0, "completions/mean_length": 643.9553833007812, "completions/mean_terminated_length": 573.1845092773438, "completions/min_length": 89.0, "completions/min_terminated_length": 89.0, "epoch": 2.6812481773111694, "grad_norm": 0.1357421875, "learning_rate": 1e-06, "loss": 0.0093, "num_tokens": 166622962.0, "reward": 0.4988839626312256, "reward_std": 0.23044700920581818, "rewards/verify_math_reward/mean": 0.4988839328289032, "rewards/verify_math_reward/std": 0.5002779960632324, "step": 287 }, { "clip_ratio/high_max": 0.0014823366091150092, "clip_ratio/high_mean": 0.000417918061657474, "clip_ratio/low_mean": 0.0003382189736385044, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007561370305211312, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 4003.0, "completions/mean_length": 620.40625, "completions/mean_terminated_length": 581.1783447265625, "completions/min_length": 107.0, "completions/min_terminated_length": 107.0, "epoch": 2.690580344123651, "grad_norm": 0.1240234375, "learning_rate": 1e-06, "loss": 0.0053, "num_tokens": 167222734.0, "reward": 0.5379464626312256, "reward_std": 0.20869651436805725, "rewards/verify_math_reward/mean": 0.5379464030265808, "rewards/verify_math_reward/std": 0.4988364577293396, "step": 288 }, { "clip_ratio/high_max": 0.0015053520455694525, "clip_ratio/high_mean": 0.00043842528748427867, "clip_ratio/low_mean": 0.0003198034838760577, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007582287707919022, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 2497.0, "completions/mean_length": 627.0245971679688, "completions/mean_terminated_length": 567.96142578125, "completions/min_length": 100.0, "completions/min_terminated_length": 100.0, "epoch": 2.699912510936133, "grad_norm": 0.1396484375, "learning_rate": 1e-06, "loss": 0.0093, "num_tokens": 167807396.0, "reward": 0.5022321939468384, "reward_std": 0.22823528945446014, "rewards/verify_math_reward/mean": 0.5022321343421936, "rewards/verify_math_reward/std": 0.5002743005752563, "step": 289 }, { "clip_ratio/high_max": 0.0016437880767625757, "clip_ratio/high_mean": 0.0005367208000279788, "clip_ratio/low_mean": 0.0003510263488806231, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008877471500454703, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 2576.0, "completions/mean_length": 576.654052734375, "completions/mean_terminated_length": 532.9107666015625, "completions/min_length": 97.0, "completions/min_terminated_length": 97.0, "epoch": 2.7092446777486145, "grad_norm": 0.1298828125, "learning_rate": 1e-06, "loss": -0.0043, "num_tokens": 168366470.0, "reward": 0.5837053656578064, "reward_std": 0.23623982071876526, "rewards/verify_math_reward/mean": 0.5837053656578064, "rewards/verify_math_reward/std": 0.49321895837783813, "step": 290 }, { "clip_ratio/high_max": 0.0017529187853142503, "clip_ratio/high_mean": 0.00048686784452911525, "clip_ratio/low_mean": 0.00035784269800842594, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008447105346931494, "completions/clipped_ratio": 0.0078125, "completions/max_length": 4096.0, "completions/max_terminated_length": 2241.0, "completions/mean_length": 580.7421875, "completions/mean_terminated_length": 553.06298828125, "completions/min_length": 140.0, "completions/min_terminated_length": 140.0, "epoch": 2.7185768445610963, "grad_norm": 0.123046875, "learning_rate": 1e-06, "loss": -0.0033, "num_tokens": 168948439.0, "reward": 0.559151828289032, "reward_std": 0.20779791474342346, "rewards/verify_math_reward/mean": 0.5591517686843872, "rewards/verify_math_reward/std": 0.496766060590744, "step": 291 }, { "clip_ratio/high_max": 0.0014450591979766614, "clip_ratio/high_mean": 0.00043080291516162106, "clip_ratio/low_mean": 0.0003529568850808573, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007837598013793468, "completions/clipped_ratio": 0.0234375, "completions/max_length": 4096.0, "completions/max_terminated_length": 3306.0, "completions/mean_length": 639.0078125, "completions/mean_terminated_length": 556.0399780273438, "completions/min_length": 111.0, "completions/min_terminated_length": 111.0, "epoch": 2.7279090113735784, "grad_norm": 0.1416015625, "learning_rate": 1e-06, "loss": -0.0169, "num_tokens": 169537342.0, "reward": 0.4888392984867096, "reward_std": 0.23206281661987305, "rewards/verify_math_reward/mean": 0.4888392984867096, "rewards/verify_math_reward/std": 0.5001546144485474, "step": 292 }, { "clip_ratio/high_max": 0.0015294742570404196, "clip_ratio/high_mean": 0.0005287959324959957, "clip_ratio/low_mean": 0.000296823709390992, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008256196379079483, "completions/clipped_ratio": 0.0279017857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3879.0, "completions/mean_length": 701.5647583007812, "completions/mean_terminated_length": 604.1354370117188, "completions/min_length": 140.0, "completions/min_terminated_length": 140.0, "epoch": 2.73724117818606, "grad_norm": 0.1279296875, "learning_rate": 1e-06, "loss": 0.0048, "num_tokens": 170154504.0, "reward": 0.5055803656578064, "reward_std": 0.2500934600830078, "rewards/verify_math_reward/mean": 0.5055803656578064, "rewards/verify_math_reward/std": 0.5002480745315552, "step": 293 }, { "clip_ratio/high_max": 0.0019843373520416208, "clip_ratio/high_mean": 0.0005925050747919158, "clip_ratio/low_mean": 0.00025544197194449225, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008479470420752477, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 4007.0, "completions/mean_length": 610.2779541015625, "completions/mean_terminated_length": 550.9296264648438, "completions/min_length": 24.0, "completions/min_terminated_length": 24.0, "epoch": 2.746573344998542, "grad_norm": 0.1328125, "learning_rate": 1e-06, "loss": 0.0038, "num_tokens": 170740705.0, "reward": 0.5066964626312256, "reward_std": 0.22233231365680695, "rewards/verify_math_reward/mean": 0.5066964030265808, "rewards/verify_math_reward/std": 0.5002344250679016, "step": 294 }, { "clip_ratio/high_max": 0.0014366603463713545, "clip_ratio/high_mean": 0.0004662901551455434, "clip_ratio/low_mean": 0.00031543318300464307, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.000781723333602713, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 3181.0, "completions/mean_length": 591.6484375, "completions/mean_terminated_length": 548.091552734375, "completions/min_length": 55.0, "completions/min_terminated_length": 55.0, "epoch": 2.7559055118110236, "grad_norm": 0.12353515625, "learning_rate": 1e-06, "loss": 0.0062, "num_tokens": 171311686.0, "reward": 0.5892857313156128, "reward_std": 0.22834154963493347, "rewards/verify_math_reward/mean": 0.5892857313156128, "rewards/verify_math_reward/std": 0.49223825335502625, "step": 295 }, { "clip_ratio/high_max": 0.0017532859492348507, "clip_ratio/high_mean": 0.0005667617406288628, "clip_ratio/low_mean": 0.0003789789525399101, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009457406877118046, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 3122.0, "completions/mean_length": 601.7902221679688, "completions/mean_terminated_length": 562.3521728515625, "completions/min_length": 24.0, "completions/min_terminated_length": 24.0, "epoch": 2.7652376786235053, "grad_norm": 0.1416015625, "learning_rate": 1e-06, "loss": 0.0161, "num_tokens": 171898594.0, "reward": 0.5424107313156128, "reward_std": 0.25690504908561707, "rewards/verify_math_reward/mean": 0.5424107313156128, "rewards/verify_math_reward/std": 0.4984763562679291, "step": 296 }, { "clip_ratio/high_max": 0.0013215181261330144, "clip_ratio/high_mean": 0.00039051591647876194, "clip_ratio/low_mean": 0.00028563219234456483, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0006761481117791845, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3446.0, "completions/mean_length": 607.9944458007812, "completions/mean_terminated_length": 560.6459350585938, "completions/min_length": 125.0, "completions/min_terminated_length": 125.0, "epoch": 2.774569845435987, "grad_norm": 0.1328125, "learning_rate": 1e-06, "loss": 0.009, "num_tokens": 172482493.0, "reward": 0.494419664144516, "reward_std": 0.2008771300315857, "rewards/verify_math_reward/mean": 0.4944196343421936, "rewards/verify_math_reward/std": 0.5002480745315552, "step": 297 }, { "clip_ratio/high_max": 0.0014070765564611065, "clip_ratio/high_mean": 0.0004336246713592118, "clip_ratio/low_mean": 0.00030252068540903565, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007361453563135001, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 2354.0, "completions/mean_length": 580.5770263671875, "completions/mean_terminated_length": 540.8995361328125, "completions/min_length": 65.0, "completions/min_terminated_length": 65.0, "epoch": 2.783902012248469, "grad_norm": 0.11572265625, "learning_rate": 1e-06, "loss": -0.0086, "num_tokens": 173054002.0, "reward": 0.5189732313156128, "reward_std": 0.20038999617099762, "rewards/verify_math_reward/mean": 0.5189732313156128, "rewards/verify_math_reward/std": 0.49991893768310547, "step": 298 }, { "clip_ratio/high_max": 0.0015851540683797793, "clip_ratio/high_mean": 0.0005031180047581074, "clip_ratio/low_mean": 0.0003453926424299425, "clip_ratio/low_min": 8.398280442634132e-06, "clip_ratio/region_mean": 0.0008485106491207262, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 1963.0, "completions/mean_length": 601.7824096679688, "completions/mean_terminated_length": 542.2894897460938, "completions/min_length": 138.0, "completions/min_terminated_length": 138.0, "epoch": 2.793234179060951, "grad_norm": 0.14453125, "learning_rate": 1e-06, "loss": 0.0011, "num_tokens": 173619591.0, "reward": 0.5133928656578064, "reward_std": 0.23067525029182434, "rewards/verify_math_reward/mean": 0.5133928656578064, "rewards/verify_math_reward/std": 0.500099778175354, "step": 299 }, { "clip_ratio/high_max": 0.0016521747693332145, "clip_ratio/high_mean": 0.0004650787705031689, "clip_ratio/low_mean": 0.0003833997711808479, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008484785412292695, "completions/clipped_ratio": 0.0189732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3692.0, "completions/mean_length": 659.65625, "completions/mean_terminated_length": 593.19677734375, "completions/min_length": 131.0, "completions/min_terminated_length": 131.0, "epoch": 2.8025663458734327, "grad_norm": 0.126953125, "learning_rate": 1e-06, "loss": 0.0187, "num_tokens": 174224971.0, "reward": 0.5524553656578064, "reward_std": 0.2390919178724289, "rewards/verify_math_reward/mean": 0.5524553656578064, "rewards/verify_math_reward/std": 0.49751853942871094, "step": 300 }, { "clip_ratio/high_max": 0.001523267292213859, "clip_ratio/high_mean": 0.0004439122083113034, "clip_ratio/low_mean": 0.00029721465909915423, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007411268616124289, "completions/clipped_ratio": 0.021205357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 2847.0, "completions/mean_length": 612.0513916015625, "completions/mean_terminated_length": 536.5723876953125, "completions/min_length": 156.0, "completions/min_terminated_length": 156.0, "epoch": 2.8118985126859144, "grad_norm": 0.1220703125, "learning_rate": 1e-06, "loss": 0.0033, "num_tokens": 174776193.0, "reward": 0.5368303656578064, "reward_std": 0.20902322232723236, "rewards/verify_math_reward/mean": 0.5368303656578064, "rewards/verify_math_reward/std": 0.49892017245292664, "step": 301 }, { "clip_ratio/high_max": 0.0013422923657344654, "clip_ratio/high_mean": 0.00042776693385349063, "clip_ratio/low_mean": 0.0003904350706989135, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008182020037565962, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 2936.0, "completions/mean_length": 616.65625, "completions/mean_terminated_length": 573.41015625, "completions/min_length": 126.0, "completions/min_terminated_length": 126.0, "epoch": 2.821230679498396, "grad_norm": 0.130859375, "learning_rate": 1e-06, "loss": 0.0149, "num_tokens": 175367349.0, "reward": 0.5, "reward_std": 0.21286533772945404, "rewards/verify_math_reward/mean": 0.5, "rewards/verify_math_reward/std": 0.5002792477607727, "step": 302 }, { "clip_ratio/high_max": 0.0016865780526131857, "clip_ratio/high_mean": 0.0005325157746938203, "clip_ratio/low_mean": 0.0002991496540971639, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008316654452755756, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 2618.0, "completions/mean_length": 659.0714721679688, "completions/mean_terminated_length": 600.553955078125, "completions/min_length": 5.0, "completions/min_terminated_length": 5.0, "epoch": 2.830562846310878, "grad_norm": 0.12451171875, "learning_rate": 1e-06, "loss": -0.0035, "num_tokens": 175986301.0, "reward": 0.520089328289032, "reward_std": 0.2272149920463562, "rewards/verify_math_reward/mean": 0.5200892686843872, "rewards/verify_math_reward/std": 0.4998753070831299, "step": 303 }, { "clip_ratio/high_max": 0.0018782893257593969, "clip_ratio/high_mean": 0.0005835897909491905, "clip_ratio/low_mean": 0.0004582951835345739, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010418849533380126, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3538.0, "completions/mean_length": 611.466552734375, "completions/mean_terminated_length": 560.1653442382812, "completions/min_length": 111.0, "completions/min_terminated_length": 111.0, "epoch": 2.8398950131233596, "grad_norm": 0.140625, "learning_rate": 1e-06, "loss": -0.0037, "num_tokens": 176571951.0, "reward": 0.5714285969734192, "reward_std": 0.2577284872531891, "rewards/verify_math_reward/mean": 0.5714285969734192, "rewards/verify_math_reward/std": 0.49514803290367126, "step": 304 }, { "clip_ratio/high_max": 0.001666442349232966, "clip_ratio/high_mean": 0.0005039190423303808, "clip_ratio/low_mean": 0.00028067684303323404, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007845958907637396, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3629.0, "completions/mean_length": 607.333740234375, "completions/mean_terminated_length": 547.935302734375, "completions/min_length": 73.0, "completions/min_terminated_length": 73.0, "epoch": 2.8492271799358413, "grad_norm": 0.138671875, "learning_rate": 1e-06, "loss": -0.0075, "num_tokens": 177153946.0, "reward": 0.5446428656578064, "reward_std": 0.2236124873161316, "rewards/verify_math_reward/mean": 0.5446428656578064, "rewards/verify_math_reward/std": 0.4982811510562897, "step": 305 }, { "clip_ratio/high_max": 0.0017134395202447195, "clip_ratio/high_mean": 0.0004923922685975413, "clip_ratio/low_mean": 0.0003737313411420473, "clip_ratio/low_min": 1.3075314200250432e-05, "clip_ratio/region_mean": 0.0008661236079205992, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 2530.0, "completions/mean_length": 589.0379638671875, "completions/mean_terminated_length": 529.3280639648438, "completions/min_length": 61.0, "completions/min_terminated_length": 61.0, "epoch": 2.858559346748323, "grad_norm": 0.150390625, "learning_rate": 1e-06, "loss": -0.0043, "num_tokens": 177703420.0, "reward": 0.5558035969734192, "reward_std": 0.2413061559200287, "rewards/verify_math_reward/mean": 0.5558035969734192, "rewards/verify_math_reward/std": 0.49715372920036316, "step": 306 }, { "clip_ratio/high_max": 0.0014759283094463171, "clip_ratio/high_mean": 0.0004467450282845675, "clip_ratio/low_mean": 0.00032371382712881314, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007704588579144911, "completions/clipped_ratio": 0.010044642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3726.0, "completions/mean_length": 579.2489013671875, "completions/mean_terminated_length": 543.56591796875, "completions/min_length": 125.0, "completions/min_terminated_length": 125.0, "epoch": 2.8678915135608047, "grad_norm": 0.12890625, "learning_rate": 1e-06, "loss": -0.0031, "num_tokens": 178269123.0, "reward": 0.512276828289032, "reward_std": 0.2036968618631363, "rewards/verify_math_reward/mean": 0.5122767686843872, "rewards/verify_math_reward/std": 0.500128448009491, "step": 307 }, { "clip_ratio/high_max": 0.00149100732687657, "clip_ratio/high_mean": 0.00047716773906358867, "clip_ratio/low_mean": 0.0002795190918050139, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007566868334833998, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 2938.0, "completions/mean_length": 575.779052734375, "completions/mean_terminated_length": 532.0248413085938, "completions/min_length": 141.0, "completions/min_terminated_length": 141.0, "epoch": 2.8772236803732865, "grad_norm": 0.134765625, "learning_rate": 1e-06, "loss": -0.0076, "num_tokens": 178826773.0, "reward": 0.5859375, "reward_std": 0.20287089049816132, "rewards/verify_math_reward/mean": 0.5859375, "rewards/verify_math_reward/std": 0.4928344786167145, "step": 308 }, { "clip_ratio/high_max": 0.0019092849361186381, "clip_ratio/high_mean": 0.0005840047251695069, "clip_ratio/low_mean": 0.000348398102801184, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009324028178525623, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 2049.0, "completions/mean_length": 591.1138916015625, "completions/mean_terminated_length": 539.5130004882812, "completions/min_length": 92.0, "completions/min_terminated_length": 92.0, "epoch": 2.886555847185768, "grad_norm": 0.13671875, "learning_rate": 1e-06, "loss": -0.0027, "num_tokens": 179390299.0, "reward": 0.5022321939468384, "reward_std": 0.23154105246067047, "rewards/verify_math_reward/mean": 0.5022321343421936, "rewards/verify_math_reward/std": 0.5002743005752563, "step": 309 }, { "clip_ratio/high_max": 0.0016030447877710685, "clip_ratio/high_mean": 0.0005661971076733607, "clip_ratio/low_mean": 0.00038362684472303954, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009498239314780221, "completions/clipped_ratio": 0.008928571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3257.0, "completions/mean_length": 623.732177734375, "completions/mean_terminated_length": 592.450439453125, "completions/min_length": 120.0, "completions/min_terminated_length": 120.0, "epoch": 2.8958880139982504, "grad_norm": 0.1337890625, "learning_rate": 1e-06, "loss": -0.0087, "num_tokens": 180001731.0, "reward": 0.4810267984867096, "reward_std": 0.2676451504230499, "rewards/verify_math_reward/mean": 0.4810267984867096, "rewards/verify_math_reward/std": 0.49991893768310547, "step": 310 }, { "clip_ratio/high_max": 0.0015500138479183079, "clip_ratio/high_mean": 0.0004708632347956154, "clip_ratio/low_mean": 0.00030234170117182657, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007732049239166372, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 3854.0, "completions/mean_length": 570.9799194335938, "completions/mean_terminated_length": 531.1941528320312, "completions/min_length": 27.0, "completions/min_terminated_length": 27.0, "epoch": 2.905220180810732, "grad_norm": 0.1357421875, "learning_rate": 1e-06, "loss": 0.0169, "num_tokens": 180571553.0, "reward": 0.606026828289032, "reward_std": 0.20308955013751984, "rewards/verify_math_reward/mean": 0.6060267686843872, "rewards/verify_math_reward/std": 0.48890191316604614, "step": 311 }, { "clip_ratio/high_max": 0.0016935139556153445, "clip_ratio/high_mean": 0.0004774006470142922, "clip_ratio/low_mean": 0.00027716780800801644, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007545684447904932, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 1988.0, "completions/mean_length": 585.8671875, "completions/mean_terminated_length": 546.2494506835938, "completions/min_length": 14.0, "completions/min_terminated_length": 14.0, "epoch": 2.914552347623214, "grad_norm": 0.1259765625, "learning_rate": 1e-06, "loss": -0.0118, "num_tokens": 181145874.0, "reward": 0.5803571939468384, "reward_std": 0.2156084030866623, "rewards/verify_math_reward/mean": 0.5803571343421936, "rewards/verify_math_reward/std": 0.4937761425971985, "step": 312 }, { "clip_ratio/high_max": 0.0015024951590021374, "clip_ratio/high_mean": 0.0003828913951338109, "clip_ratio/low_mean": 0.00031090754453089176, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0006937989546713652, "completions/clipped_ratio": 0.0189732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 4087.0, "completions/mean_length": 664.521240234375, "completions/mean_terminated_length": 598.1558227539062, "completions/min_length": 126.0, "completions/min_terminated_length": 126.0, "epoch": 2.9238845144356955, "grad_norm": 0.123046875, "learning_rate": 1e-06, "loss": -0.0064, "num_tokens": 181765309.0, "reward": 0.5189732313156128, "reward_std": 0.1989218294620514, "rewards/verify_math_reward/mean": 0.5189732313156128, "rewards/verify_math_reward/std": 0.49991893768310547, "step": 313 }, { "clip_ratio/high_max": 0.001601365249371156, "clip_ratio/high_mean": 0.000455309519338698, "clip_ratio/low_mean": 0.00038884761158897163, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008441571353614563, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3978.0, "completions/mean_length": 650.693115234375, "completions/mean_terminated_length": 592.032958984375, "completions/min_length": 22.0, "completions/min_terminated_length": 22.0, "epoch": 2.9332166812481772, "grad_norm": 0.1484375, "learning_rate": 1e-06, "loss": 0.0047, "num_tokens": 182376362.0, "reward": 0.4799107313156128, "reward_std": 0.25197917222976685, "rewards/verify_math_reward/mean": 0.4799107015132904, "rewards/verify_math_reward/std": 0.4998753070831299, "step": 314 }, { "clip_ratio/high_max": 0.0015134224504436133, "clip_ratio/high_mean": 0.0004907421830466774, "clip_ratio/low_mean": 0.00037144607688333053, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008621882589068264, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 2644.0, "completions/mean_length": 599.0692138671875, "completions/mean_terminated_length": 543.5623779296875, "completions/min_length": 138.0, "completions/min_terminated_length": 138.0, "epoch": 2.942548848060659, "grad_norm": 0.142578125, "learning_rate": 1e-06, "loss": 0.0087, "num_tokens": 182950496.0, "reward": 0.5111607313156128, "reward_std": 0.23799677193164825, "rewards/verify_math_reward/mean": 0.5111607313156128, "rewards/verify_math_reward/std": 0.5001546144485474, "step": 315 }, { "clip_ratio/high_max": 0.0018595209185150452, "clip_ratio/high_mean": 0.0005672425918419322, "clip_ratio/low_mean": 0.0004028926704222613, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009701352637421223, "completions/clipped_ratio": 0.0078125, "completions/max_length": 4096.0, "completions/max_terminated_length": 3730.0, "completions/mean_length": 561.0502319335938, "completions/mean_terminated_length": 533.2160034179688, "completions/min_length": 11.0, "completions/min_terminated_length": 11.0, "epoch": 2.9518810148731407, "grad_norm": 0.14453125, "learning_rate": 1e-06, "loss": 0.0041, "num_tokens": 183519597.0, "reward": 0.5613839626312256, "reward_std": 0.23755210638046265, "rewards/verify_math_reward/mean": 0.5613839030265808, "rewards/verify_math_reward/std": 0.496494859457016, "step": 316 }, { "clip_ratio/high_max": 0.0015648174157831818, "clip_ratio/high_mean": 0.000499258840022776, "clip_ratio/low_mean": 0.0003862829264562606, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008855417518134345, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 4018.0, "completions/mean_length": 655.7176513671875, "completions/mean_terminated_length": 601.1099853515625, "completions/min_length": 3.0, "completions/min_terminated_length": 3.0, "epoch": 2.961213181685623, "grad_norm": 0.12890625, "learning_rate": 1e-06, "loss": -0.0054, "num_tokens": 184137088.0, "reward": 0.4743303656578064, "reward_std": 0.2368360310792923, "rewards/verify_math_reward/mean": 0.4743303656578064, "rewards/verify_math_reward/std": 0.4996195137500763, "step": 317 }, { "clip_ratio/high_max": 0.001962169610123965, "clip_ratio/high_mean": 0.0005945045068074251, "clip_ratio/low_mean": 0.00036597634982626914, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009604808565200074, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 2564.0, "completions/mean_length": 570.4230346679688, "completions/mean_terminated_length": 522.5645141601562, "completions/min_length": 144.0, "completions/min_terminated_length": 144.0, "epoch": 2.9705453484981046, "grad_norm": 0.1318359375, "learning_rate": 1e-06, "loss": -0.0053, "num_tokens": 184688187.0, "reward": 0.5569196939468384, "reward_std": 0.2202625721693039, "rewards/verify_math_reward/mean": 0.5569196343421936, "rewards/verify_math_reward/std": 0.4970270097255707, "step": 318 }, { "clip_ratio/high_max": 0.0014619004105043132, "clip_ratio/high_mean": 0.0004460722163912578, "clip_ratio/low_mean": 0.00025319012502222904, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0006992623475525761, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3325.0, "completions/mean_length": 603.9296875, "completions/mean_terminated_length": 548.5, "completions/min_length": 137.0, "completions/min_terminated_length": 137.0, "epoch": 2.9798775153105863, "grad_norm": 0.12158203125, "learning_rate": 1e-06, "loss": -0.0156, "num_tokens": 185261748.0, "reward": 0.5479910969734192, "reward_std": 0.20369574427604675, "rewards/verify_math_reward/mean": 0.5479910969734192, "rewards/verify_math_reward/std": 0.49796950817108154, "step": 319 }, { "clip_ratio/high_max": 0.001867756514911889, "clip_ratio/high_mean": 0.000636332901649439, "clip_ratio/low_mean": 0.00027814046939056425, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009144733539869776, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 3300.0, "completions/mean_length": 596.3292846679688, "completions/mean_terminated_length": 552.8305053710938, "completions/min_length": 98.0, "completions/min_terminated_length": 98.0, "epoch": 2.989209682123068, "grad_norm": 0.1396484375, "learning_rate": 1e-06, "loss": -0.0067, "num_tokens": 185847707.0, "reward": 0.5837053656578064, "reward_std": 0.24923540651798248, "rewards/verify_math_reward/mean": 0.5837053656578064, "rewards/verify_math_reward/std": 0.49321895837783813, "step": 320 }, { "clip_ratio/high_max": 0.0017106063587561948, "clip_ratio/high_mean": 0.000480181593047746, "clip_ratio/low_mean": 0.00027902893793907424, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007592105284857098, "completions/clipped_ratio": 0.011363636363636354, "completions/max_length": 4096.0, "completions/max_terminated_length": 2428.0, "completions/mean_length": 591.7415161132812, "completions/mean_terminated_length": 551.462646484375, "completions/min_length": 111.0, "completions/min_terminated_length": 111.0, "epoch": 2.9985418489355498, "grad_norm": 0.1298828125, "learning_rate": 1e-06, "loss": 0.0014, "num_tokens": 186435154.0, "reward": 0.582589328289032, "reward_std": 0.21676844358444214, "rewards/verify_math_reward/mean": 0.5825892686843872, "rewards/verify_math_reward/std": 0.4934072494506836, "step": 321 }, { "clip_ratio/high_max": 0.0014145847071631579, "clip_ratio/high_mean": 0.00041427204268984497, "clip_ratio/low_mean": 0.00032360194654756924, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007378739865089301, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 2879.0, "completions/mean_length": 694.2511596679688, "completions/mean_terminated_length": 632.401123046875, "completions/min_length": 44.0, "completions/min_terminated_length": 44.0, "epoch": 3.0093321668124817, "grad_norm": 0.125, "learning_rate": 1e-06, "loss": -0.0156, "num_tokens": 187081955.0, "reward": 0.4564732313156128, "reward_std": 0.2112138420343399, "rewards/verify_math_reward/mean": 0.4564732015132904, "rewards/verify_math_reward/std": 0.49838000535964966, "step": 322 }, { "clip_ratio/high_max": 0.001394630447975942, "clip_ratio/high_mean": 0.00043612389333702595, "clip_ratio/low_mean": 0.00036425040411813825, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008003742923392565, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 3180.0, "completions/mean_length": 638.3192138671875, "completions/mean_terminated_length": 599.29345703125, "completions/min_length": 31.0, "completions/min_terminated_length": 31.0, "epoch": 3.0186643336249634, "grad_norm": 0.1328125, "learning_rate": 1e-06, "loss": -0.0028, "num_tokens": 187695425.0, "reward": 0.5401785969734192, "reward_std": 0.24562332034111023, "rewards/verify_math_reward/mean": 0.5401785969734192, "rewards/verify_math_reward/std": 0.49866142868995667, "step": 323 }, { "clip_ratio/high_max": 0.0017467351999584935, "clip_ratio/high_mean": 0.0005857529299646558, "clip_ratio/low_mean": 0.00039468116983698565, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009804340907066944, "completions/clipped_ratio": 0.006696428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3849.0, "completions/mean_length": 589.4152221679688, "completions/mean_terminated_length": 565.7752685546875, "completions/min_length": 75.0, "completions/min_terminated_length": 75.0, "epoch": 3.027996500437445, "grad_norm": 0.1328125, "learning_rate": 1e-06, "loss": 0.0039, "num_tokens": 188288629.0, "reward": 0.5022321939468384, "reward_std": 0.25806480646133423, "rewards/verify_math_reward/mean": 0.5022321343421936, "rewards/verify_math_reward/std": 0.5002742409706116, "step": 324 }, { "clip_ratio/high_max": 0.0014759719633730128, "clip_ratio/high_mean": 0.00043675317124325375, "clip_ratio/low_mean": 0.0002437118456555254, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0006804650251979183, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 2493.0, "completions/mean_length": 593.7053833007812, "completions/mean_terminated_length": 542.1427001953125, "completions/min_length": 77.0, "completions/min_terminated_length": 77.0, "epoch": 3.037328667249927, "grad_norm": 0.12451171875, "learning_rate": 1e-06, "loss": -0.0028, "num_tokens": 188857933.0, "reward": 0.543526828289032, "reward_std": 0.19704709947109222, "rewards/verify_math_reward/mean": 0.5435267686843872, "rewards/verify_math_reward/std": 0.49838000535964966, "step": 325 }, { "clip_ratio/high_max": 0.0016783677301646094, "clip_ratio/high_mean": 0.0005184165333957935, "clip_ratio/low_mean": 0.00023275836247194093, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007511748958677344, "completions/clipped_ratio": 0.0022321428571429047, "completions/max_length": 4096.0, "completions/max_terminated_length": 2205.0, "completions/mean_length": 545.5535888671875, "completions/mean_terminated_length": 537.6107177734375, "completions/min_length": 130.0, "completions/min_terminated_length": 130.0, "epoch": 3.046660834062409, "grad_norm": 0.1279296875, "learning_rate": 1e-06, "loss": -0.0005, "num_tokens": 189418821.0, "reward": 0.5926339626312256, "reward_std": 0.21091073751449585, "rewards/verify_math_reward/mean": 0.5926339030265808, "rewards/verify_math_reward/std": 0.49161848425865173, "step": 326 }, { "clip_ratio/high_max": 0.0015624871757609071, "clip_ratio/high_mean": 0.0004476380358937604, "clip_ratio/low_mean": 0.00041369982841388264, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008613378604422905, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3347.0, "completions/mean_length": 631.15625, "completions/mean_terminated_length": 580.1449584960938, "completions/min_length": 3.0, "completions/min_terminated_length": 3.0, "epoch": 3.055993000874891, "grad_norm": 0.140625, "learning_rate": 1e-06, "loss": 0.0063, "num_tokens": 190023241.0, "reward": 0.543526828289032, "reward_std": 0.22337539494037628, "rewards/verify_math_reward/mean": 0.5435267686843872, "rewards/verify_math_reward/std": 0.49838000535964966, "step": 327 }, { "clip_ratio/high_max": 0.0014935850049369037, "clip_ratio/high_mean": 0.0004928659504912503, "clip_ratio/low_mean": 0.0002954453170787019, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007883112648414681, "completions/clipped_ratio": 0.0267857142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3430.0, "completions/mean_length": 646.3527221679688, "completions/mean_terminated_length": 551.408203125, "completions/min_length": 60.0, "completions/min_terminated_length": 60.0, "epoch": 3.0653251676873725, "grad_norm": 0.140625, "learning_rate": 1e-06, "loss": 0.0041, "num_tokens": 190596821.0, "reward": 0.5491071939468384, "reward_std": 0.21553170680999756, "rewards/verify_math_reward/mean": 0.5491071343421936, "rewards/verify_math_reward/std": 0.49786055088043213, "step": 328 }, { "clip_ratio/high_max": 0.0013526212587748887, "clip_ratio/high_mean": 0.00037775289058572525, "clip_ratio/low_mean": 0.00024532223949336185, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.000623075127350603, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 4073.0, "completions/mean_length": 570.247802734375, "completions/mean_terminated_length": 530.4537353515625, "completions/min_length": 129.0, "completions/min_terminated_length": 129.0, "epoch": 3.0746573344998542, "grad_norm": 0.12109375, "learning_rate": 1e-06, "loss": -0.0031, "num_tokens": 191157611.0, "reward": 0.5814732313156128, "reward_std": 0.19223138689994812, "rewards/verify_math_reward/mean": 0.5814732313156128, "rewards/verify_math_reward/std": 0.4935929775238037, "step": 329 }, { "clip_ratio/high_max": 0.001386022498991224, "clip_ratio/high_mean": 0.00037963793920425815, "clip_ratio/low_mean": 0.00035208107738071703, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.000731719011128007, "completions/clipped_ratio": 0.0189732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 2418.0, "completions/mean_length": 642.7254638671875, "completions/mean_terminated_length": 575.9385375976562, "completions/min_length": 36.0, "completions/min_terminated_length": 36.0, "epoch": 3.083989501312336, "grad_norm": 0.1279296875, "learning_rate": 1e-06, "loss": -0.0036, "num_tokens": 191759597.0, "reward": 0.4676339626312256, "reward_std": 0.21019576489925385, "rewards/verify_math_reward/mean": 0.4676339328289032, "rewards/verify_math_reward/std": 0.4992299973964691, "step": 330 }, { "clip_ratio/high_max": 0.0016220136021729559, "clip_ratio/high_mean": 0.000503526099919327, "clip_ratio/low_mean": 0.0004214132576407792, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009249393442587461, "completions/clipped_ratio": 0.0189732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 2543.0, "completions/mean_length": 606.6038208007812, "completions/mean_terminated_length": 539.1182861328125, "completions/min_length": 38.0, "completions/min_terminated_length": 38.0, "epoch": 3.0933216681248177, "grad_norm": 0.1455078125, "learning_rate": 1e-06, "loss": 0.023, "num_tokens": 192320258.0, "reward": 0.5569196939468384, "reward_std": 0.2525743842124939, "rewards/verify_math_reward/mean": 0.5569196343421936, "rewards/verify_math_reward/std": 0.49702703952789307, "step": 331 }, { "clip_ratio/high_max": 0.0015725383964309003, "clip_ratio/high_mean": 0.0004890870800409175, "clip_ratio/low_mean": 0.0002551917355049227, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007442788146363455, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 3548.0, "completions/mean_length": 590.8939819335938, "completions/mean_terminated_length": 547.3276977539062, "completions/min_length": 52.0, "completions/min_terminated_length": 52.0, "epoch": 3.1026538349372994, "grad_norm": 0.123046875, "learning_rate": 1e-06, "loss": -0.0035, "num_tokens": 192891859.0, "reward": 0.5870535969734192, "reward_std": 0.19257839024066925, "rewards/verify_math_reward/mean": 0.5870535969734192, "rewards/verify_math_reward/std": 0.49263834953308105, "step": 332 }, { "clip_ratio/high_max": 0.0016490092166350223, "clip_ratio/high_mean": 0.00044072518517168646, "clip_ratio/low_mean": 0.0002751491902017733, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007158743751460861, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3858.0, "completions/mean_length": 568.625, "completions/mean_terminated_length": 508.56756591796875, "completions/min_length": 20.0, "completions/min_terminated_length": 20.0, "epoch": 3.111986001749781, "grad_norm": 0.1279296875, "learning_rate": 1e-06, "loss": -0.0062, "num_tokens": 193435235.0, "reward": 0.6082589626312256, "reward_std": 0.1978398710489273, "rewards/verify_math_reward/mean": 0.6082589030265808, "rewards/verify_math_reward/std": 0.4884119927883148, "step": 333 }, { "clip_ratio/high_max": 0.001815153741517861, "clip_ratio/high_mean": 0.0004776723098984803, "clip_ratio/low_mean": 0.00034385452943297423, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008215268439926149, "completions/clipped_ratio": 0.0200892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 2269.0, "completions/mean_length": 628.724365234375, "completions/mean_terminated_length": 557.6412353515625, "completions/min_length": 49.0, "completions/min_terminated_length": 49.0, "epoch": 3.121318168562263, "grad_norm": 0.1279296875, "learning_rate": 1e-06, "loss": 0.008, "num_tokens": 194025676.0, "reward": 0.5479910969734192, "reward_std": 0.20440296828746796, "rewards/verify_math_reward/mean": 0.5479910969734192, "rewards/verify_math_reward/std": 0.49796950817108154, "step": 334 }, { "clip_ratio/high_max": 0.001681742455730273, "clip_ratio/high_mean": 0.0004999326515644498, "clip_ratio/low_mean": 0.0003150533422058288, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008149859904733603, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3684.0, "completions/mean_length": 553.552490234375, "completions/mean_terminated_length": 505.4649658203125, "completions/min_length": 3.0, "completions/min_terminated_length": 3.0, "epoch": 3.130650335374745, "grad_norm": 0.1396484375, "learning_rate": 1e-06, "loss": -0.0122, "num_tokens": 194554051.0, "reward": 0.606026828289032, "reward_std": 0.2215416580438614, "rewards/verify_math_reward/mean": 0.6060267686843872, "rewards/verify_math_reward/std": 0.48890191316604614, "step": 335 }, { "clip_ratio/high_max": 0.0015570206323900493, "clip_ratio/high_mean": 0.00044933508752365015, "clip_ratio/low_mean": 0.00035593529878497066, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.000805270383352763, "completions/clipped_ratio": 0.0078125, "completions/max_length": 4096.0, "completions/max_terminated_length": 2289.0, "completions/mean_length": 592.3214721679688, "completions/mean_terminated_length": 564.7334594726562, "completions/min_length": 145.0, "completions/min_terminated_length": 145.0, "epoch": 3.1399825021872267, "grad_norm": 0.12353515625, "learning_rate": 1e-06, "loss": -0.0031, "num_tokens": 195146435.0, "reward": 0.486607164144516, "reward_std": 0.22462141513824463, "rewards/verify_math_reward/mean": 0.4866071343421936, "rewards/verify_math_reward/std": 0.500099778175354, "step": 336 }, { "clip_ratio/high_max": 0.001941438624271541, "clip_ratio/high_mean": 0.0005635270540551574, "clip_ratio/low_mean": 0.0002845019428150408, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008480289925500983, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 2034.0, "completions/mean_length": 568.0357666015625, "completions/mean_terminated_length": 507.9682312011719, "completions/min_length": 5.0, "completions/min_terminated_length": 5.0, "epoch": 3.1493146689997085, "grad_norm": 0.138671875, "learning_rate": 1e-06, "loss": -0.0014, "num_tokens": 195681259.0, "reward": 0.6205357313156128, "reward_std": 0.2012576162815094, "rewards/verify_math_reward/mean": 0.6205357313156128, "rewards/verify_math_reward/std": 0.4855247139930725, "step": 337 }, { "clip_ratio/high_max": 0.001428396155461087, "clip_ratio/high_mean": 0.0004483592942960968, "clip_ratio/low_mean": 0.0003310731822239177, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007794324692440568, "completions/clipped_ratio": 0.025669642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3295.0, "completions/mean_length": 707.8125610351562, "completions/mean_terminated_length": 618.5475463867188, "completions/min_length": 97.0, "completions/min_terminated_length": 97.0, "epoch": 3.15864683581219, "grad_norm": 0.12451171875, "learning_rate": 1e-06, "loss": 0.0008, "num_tokens": 196335835.0, "reward": 0.4386160969734192, "reward_std": 0.1998247504234314, "rewards/verify_math_reward/mean": 0.4386160671710968, "rewards/verify_math_reward/std": 0.496494859457016, "step": 338 }, { "clip_ratio/high_max": 0.0014731534920429112, "clip_ratio/high_mean": 0.0004133693425956153, "clip_ratio/low_mean": 0.0002422568936708558, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0006556262449066708, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 4068.0, "completions/mean_length": 638.1674194335938, "completions/mean_terminated_length": 591.228515625, "completions/min_length": 135.0, "completions/min_terminated_length": 135.0, "epoch": 3.167979002624672, "grad_norm": 0.11181640625, "learning_rate": 1e-06, "loss": 0.0063, "num_tokens": 196951009.0, "reward": 0.5491071939468384, "reward_std": 0.19087491929531097, "rewards/verify_math_reward/mean": 0.5491071343421936, "rewards/verify_math_reward/std": 0.49786055088043213, "step": 339 }, { "clip_ratio/high_max": 0.0015800321816641372, "clip_ratio/high_mean": 0.0004552013997454196, "clip_ratio/low_mean": 0.00030304992742458126, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007582513308079797, "completions/clipped_ratio": 0.0200892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 2668.0, "completions/mean_length": 634.8002319335938, "completions/mean_terminated_length": 563.8417358398438, "completions/min_length": 144.0, "completions/min_terminated_length": 144.0, "epoch": 3.1773111694371536, "grad_norm": 0.1318359375, "learning_rate": 1e-06, "loss": 0.0116, "num_tokens": 197530958.0, "reward": 0.5870535969734192, "reward_std": 0.21887320280075073, "rewards/verify_math_reward/mean": 0.5870535969734192, "rewards/verify_math_reward/std": 0.49263837933540344, "step": 340 }, { "clip_ratio/high_max": 0.0016613913776382105, "clip_ratio/high_mean": 0.0005399099186433887, "clip_ratio/low_mean": 0.00029779120700368367, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008377011199627304, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 2085.0, "completions/mean_length": 605.421875, "completions/mean_terminated_length": 554.0316772460938, "completions/min_length": 42.0, "completions/min_terminated_length": 42.0, "epoch": 3.1866433362496354, "grad_norm": 0.1376953125, "learning_rate": 1e-06, "loss": -0.0059, "num_tokens": 198102840.0, "reward": 0.5613839626312256, "reward_std": 0.2239074409008026, "rewards/verify_math_reward/mean": 0.5613839030265808, "rewards/verify_math_reward/std": 0.496494859457016, "step": 341 }, { "clip_ratio/high_max": 0.0015391380329674575, "clip_ratio/high_mean": 0.00045375877505193785, "clip_ratio/low_mean": 0.0003501511055219453, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008039098701146941, "completions/clipped_ratio": 0.021205357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3684.0, "completions/mean_length": 662.763427734375, "completions/mean_terminated_length": 588.3831176757812, "completions/min_length": 44.0, "completions/min_terminated_length": 44.0, "epoch": 3.195975503062117, "grad_norm": 0.11865234375, "learning_rate": 1e-06, "loss": 0.0004, "num_tokens": 198712324.0, "reward": 0.4765625298023224, "reward_std": 0.2266170084476471, "rewards/verify_math_reward/mean": 0.4765625, "rewards/verify_math_reward/std": 0.49972933530807495, "step": 342 }, { "clip_ratio/high_max": 0.0016844553128976258, "clip_ratio/high_mean": 0.0005326514890953149, "clip_ratio/low_mean": 0.0004135742810831289, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.000946225770348974, "completions/clipped_ratio": 0.024553571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3723.0, "completions/mean_length": 651.4006958007812, "completions/mean_terminated_length": 564.6944580078125, "completions/min_length": 122.0, "completions/min_terminated_length": 122.0, "epoch": 3.205307669874599, "grad_norm": 0.134765625, "learning_rate": 1e-06, "loss": -0.0026, "num_tokens": 199291971.0, "reward": 0.5725446939468384, "reward_std": 0.2266169786453247, "rewards/verify_math_reward/mean": 0.5725446343421936, "rewards/verify_math_reward/std": 0.49498558044433594, "step": 343 }, { "clip_ratio/high_max": 0.0014262949243857292, "clip_ratio/high_mean": 0.0004605766582699289, "clip_ratio/low_mean": 0.0003218647548237641, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007824414187780349, "completions/clipped_ratio": 0.010044642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 2371.0, "completions/mean_length": 605.4564819335938, "completions/mean_terminated_length": 570.0394287109375, "completions/min_length": 3.0, "completions/min_terminated_length": 3.0, "epoch": 3.214639836687081, "grad_norm": 0.130859375, "learning_rate": 1e-06, "loss": 0.0025, "num_tokens": 199886156.0, "reward": 0.4799107313156128, "reward_std": 0.22515344619750977, "rewards/verify_math_reward/mean": 0.4799107015132904, "rewards/verify_math_reward/std": 0.4998752772808075, "step": 344 }, { "clip_ratio/high_max": 0.0017159509225166403, "clip_ratio/high_mean": 0.00045555809583675, "clip_ratio/low_mean": 0.00033502772180327156, "clip_ratio/low_min": 1.3053467228019144e-05, "clip_ratio/region_mean": 0.0007905858237791108, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3325.0, "completions/mean_length": 657.5770263671875, "completions/mean_terminated_length": 610.901611328125, "completions/min_length": 7.0, "completions/min_terminated_length": 7.0, "epoch": 3.2239720034995627, "grad_norm": 0.115234375, "learning_rate": 1e-06, "loss": 0.0012, "num_tokens": 200520977.0, "reward": 0.4955357313156128, "reward_std": 0.2032090127468109, "rewards/verify_math_reward/mean": 0.4955357015132904, "rewards/verify_math_reward/std": 0.500259280204773, "step": 345 }, { "clip_ratio/high_max": 0.0014997234084148658, "clip_ratio/high_mean": 0.00043576945154200075, "clip_ratio/low_mean": 0.0002325831610505702, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0006683526182769128, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 4005.0, "completions/mean_length": 603.0859375, "completions/mean_terminated_length": 547.6428833007812, "completions/min_length": 124.0, "completions/min_terminated_length": 124.0, "epoch": 3.2333041703120444, "grad_norm": 0.1103515625, "learning_rate": 1e-06, "loss": -0.0022, "num_tokens": 201087950.0, "reward": 0.625, "reward_std": 0.18663331866264343, "rewards/verify_math_reward/mean": 0.625, "rewards/verify_math_reward/std": 0.48439329862594604, "step": 346 }, { "clip_ratio/high_max": 0.0015245356153172906, "clip_ratio/high_mean": 0.0005462753662186515, "clip_ratio/low_mean": 0.0003204307129180961, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008667060842526553, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 2766.0, "completions/mean_length": 591.3772583007812, "completions/mean_terminated_length": 547.8169555664062, "completions/min_length": 14.0, "completions/min_terminated_length": 14.0, "epoch": 3.242636337124526, "grad_norm": 0.1318359375, "learning_rate": 1e-06, "loss": 0.003, "num_tokens": 201664000.0, "reward": 0.5502232313156128, "reward_std": 0.21154901385307312, "rewards/verify_math_reward/mean": 0.5502232313156128, "rewards/verify_math_reward/std": 0.49774909019470215, "step": 347 }, { "clip_ratio/high_max": 0.0017170511309814174, "clip_ratio/high_mean": 0.0005302536926592438, "clip_ratio/low_mean": 0.00031125928774144995, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008415129768764018, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 3363.0, "completions/mean_length": 576.482177734375, "completions/mean_terminated_length": 536.7584838867188, "completions/min_length": 11.0, "completions/min_terminated_length": 11.0, "epoch": 3.251968503937008, "grad_norm": 0.1220703125, "learning_rate": 1e-06, "loss": 0.0122, "num_tokens": 202230768.0, "reward": 0.5535714626312256, "reward_std": 0.2273990511894226, "rewards/verify_math_reward/mean": 0.5535714030265808, "rewards/verify_math_reward/std": 0.4973994791507721, "step": 348 }, { "clip_ratio/high_max": 0.0014262903450799058, "clip_ratio/high_mean": 0.0004359676765943732, "clip_ratio/low_mean": 0.0002852419337386891, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007212096061266493, "completions/clipped_ratio": 0.0279017857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3873.0, "completions/mean_length": 720.9888916015625, "completions/mean_terminated_length": 624.1170654296875, "completions/min_length": 7.0, "completions/min_terminated_length": 7.0, "epoch": 3.2613006707494896, "grad_norm": 0.11669921875, "learning_rate": 1e-06, "loss": 0.0132, "num_tokens": 202869222.0, "reward": 0.424107164144516, "reward_std": 0.21331927180290222, "rewards/verify_math_reward/mean": 0.4241071343421936, "rewards/verify_math_reward/std": 0.49448272585868835, "step": 349 }, { "clip_ratio/high_max": 0.0014594297417716007, "clip_ratio/high_mean": 0.0003818393024062061, "clip_ratio/low_mean": 0.00033925501043086115, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007210943153950211, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 3572.0, "completions/mean_length": 605.5770263671875, "completions/mean_terminated_length": 562.1932373046875, "completions/min_length": 105.0, "completions/min_terminated_length": 105.0, "epoch": 3.2706328375619713, "grad_norm": 0.126953125, "learning_rate": 1e-06, "loss": -0.0054, "num_tokens": 203459227.0, "reward": 0.5066964626312256, "reward_std": 0.21271197497844696, "rewards/verify_math_reward/mean": 0.5066964030265808, "rewards/verify_math_reward/std": 0.5002344250679016, "step": 350 }, { "clip_ratio/high_max": 0.0013140371429471998, "clip_ratio/high_mean": 0.0003632106477198249, "clip_ratio/low_mean": 0.0003321894366763445, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0006954000928089954, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 2725.0, "completions/mean_length": 612.2332763671875, "completions/mean_terminated_length": 560.943359375, "completions/min_length": 120.0, "completions/min_terminated_length": 120.0, "epoch": 3.279965004374453, "grad_norm": 0.1240234375, "learning_rate": 1e-06, "loss": 0.0024, "num_tokens": 204041196.0, "reward": 0.5, "reward_std": 0.1934378445148468, "rewards/verify_math_reward/mean": 0.5, "rewards/verify_math_reward/std": 0.5002792477607727, "step": 351 }, { "clip_ratio/high_max": 0.0015405017084049177, "clip_ratio/high_mean": 0.0004661469188249612, "clip_ratio/low_mean": 0.00035996260032788996, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008261095308625954, "completions/clipped_ratio": 0.021205357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3609.0, "completions/mean_length": 703.1707763671875, "completions/mean_terminated_length": 629.6658935546875, "completions/min_length": 63.0, "completions/min_terminated_length": 63.0, "epoch": 3.289297171186935, "grad_norm": 0.119140625, "learning_rate": 1e-06, "loss": -0.0113, "num_tokens": 204688061.0, "reward": 0.5234375, "reward_std": 0.22301450371742249, "rewards/verify_math_reward/mean": 0.5234375, "rewards/verify_math_reward/std": 0.49972933530807495, "step": 352 }, { "clip_ratio/high_max": 0.001756315132297459, "clip_ratio/high_mean": 0.0005779319496923563, "clip_ratio/low_mean": 0.0003970873019625287, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009750192630235688, "completions/clipped_ratio": 0.0189732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3336.0, "completions/mean_length": 614.4296875, "completions/mean_terminated_length": 547.0955200195312, "completions/min_length": 119.0, "completions/min_terminated_length": 119.0, "epoch": 3.298629337999417, "grad_norm": 0.15234375, "learning_rate": 1e-06, "loss": -0.0024, "num_tokens": 205262478.0, "reward": 0.5569196939468384, "reward_std": 0.23830027878284454, "rewards/verify_math_reward/mean": 0.5569196343421936, "rewards/verify_math_reward/std": 0.49702703952789307, "step": 353 }, { "clip_ratio/high_max": 0.0017022573820213438, "clip_ratio/high_mean": 0.0005431572393490569, "clip_ratio/low_mean": 0.0003468133288606623, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008899705635485589, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3990.0, "completions/mean_length": 645.8560791015625, "completions/mean_terminated_length": 587.113525390625, "completions/min_length": 146.0, "completions/min_terminated_length": 146.0, "epoch": 3.3079615048118987, "grad_norm": 0.134765625, "learning_rate": 1e-06, "loss": 0.0024, "num_tokens": 205870645.0, "reward": 0.4966517984867096, "reward_std": 0.2380392700433731, "rewards/verify_math_reward/mean": 0.4966517984867096, "rewards/verify_math_reward/std": 0.5002680420875549, "step": 354 }, { "clip_ratio/high_max": 0.0016648589744363562, "clip_ratio/high_mean": 0.00046666631124026026, "clip_ratio/low_mean": 0.0002961184809464612, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007627847935509635, "completions/clipped_ratio": 0.010044642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 2953.0, "completions/mean_length": 532.0748291015625, "completions/mean_terminated_length": 495.9131774902344, "completions/min_length": 99.0, "completions/min_terminated_length": 99.0, "epoch": 3.3172936716243804, "grad_norm": 0.126953125, "learning_rate": 1e-06, "loss": 0.0079, "num_tokens": 206387888.0, "reward": 0.6350446939468384, "reward_std": 0.19020302593708038, "rewards/verify_math_reward/mean": 0.6350446343421936, "rewards/verify_math_reward/std": 0.481686532497406, "step": 355 }, { "clip_ratio/high_max": 0.0017283953184232814, "clip_ratio/high_mean": 0.0005860750586634822, "clip_ratio/low_mean": 0.0003859317884007396, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009720068519527558, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3219.0, "completions/mean_length": 619.8225708007812, "completions/mean_terminated_length": 568.6444091796875, "completions/min_length": 120.0, "completions/min_terminated_length": 120.0, "epoch": 3.326625838436862, "grad_norm": 0.138671875, "learning_rate": 1e-06, "loss": -0.0031, "num_tokens": 206980761.0, "reward": 0.5803571939468384, "reward_std": 0.24964657425880432, "rewards/verify_math_reward/mean": 0.5803571343421936, "rewards/verify_math_reward/std": 0.4937761425971985, "step": 356 }, { "clip_ratio/high_max": 0.0013587948415079154, "clip_ratio/high_mean": 0.0004088918583420309, "clip_ratio/low_mean": 0.00033309950413240585, "clip_ratio/low_min": 2.6799684746947605e-05, "clip_ratio/region_mean": 0.0007419913554258528, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3829.0, "completions/mean_length": 653.4263916015625, "completions/mean_terminated_length": 602.742919921875, "completions/min_length": 136.0, "completions/min_terminated_length": 136.0, "epoch": 3.335958005249344, "grad_norm": 0.11474609375, "learning_rate": 1e-06, "loss": 0.0071, "num_tokens": 207610087.0, "reward": 0.4988839626312256, "reward_std": 0.2051931917667389, "rewards/verify_math_reward/mean": 0.4988839328289032, "rewards/verify_math_reward/std": 0.5002779960632324, "step": 357 }, { "clip_ratio/high_max": 0.0013561228170146933, "clip_ratio/high_mean": 0.0004154040818775684, "clip_ratio/low_mean": 0.00034203096652163367, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007574350465802127, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3759.0, "completions/mean_length": 605.6194458007812, "completions/mean_terminated_length": 546.1918334960938, "completions/min_length": 87.0, "completions/min_terminated_length": 87.0, "epoch": 3.3452901720618256, "grad_norm": 0.12890625, "learning_rate": 1e-06, "loss": 0.009, "num_tokens": 208175050.0, "reward": 0.5725446939468384, "reward_std": 0.21132118999958038, "rewards/verify_math_reward/mean": 0.5725446343421936, "rewards/verify_math_reward/std": 0.49498558044433594, "step": 358 }, { "clip_ratio/high_max": 0.0013102528355375398, "clip_ratio/high_mean": 0.0004396332717533369, "clip_ratio/low_mean": 0.00024482973651629436, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0006844630124760442, "completions/clipped_ratio": 0.0234375, "completions/max_length": 4096.0, "completions/max_terminated_length": 2814.0, "completions/mean_length": 674.3873291015625, "completions/mean_terminated_length": 592.2685546875, "completions/min_length": 113.0, "completions/min_terminated_length": 113.0, "epoch": 3.3546223388743073, "grad_norm": 0.11376953125, "learning_rate": 1e-06, "loss": 0.0072, "num_tokens": 208788005.0, "reward": 0.5290178656578064, "reward_std": 0.203317791223526, "rewards/verify_math_reward/mean": 0.5290178656578064, "rewards/verify_math_reward/std": 0.49943605065345764, "step": 359 }, { "clip_ratio/high_max": 0.0014575891673302976, "clip_ratio/high_mean": 0.000394679750229443, "clip_ratio/low_mean": 0.00023881431718564272, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0006334940627539254, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 4064.0, "completions/mean_length": 614.2355346679688, "completions/mean_terminated_length": 558.9694213867188, "completions/min_length": 145.0, "completions/min_terminated_length": 145.0, "epoch": 3.363954505686789, "grad_norm": 0.12109375, "learning_rate": 1e-06, "loss": -0.0014, "num_tokens": 209371928.0, "reward": 0.5658482313156128, "reward_std": 0.1863730102777481, "rewards/verify_math_reward/mean": 0.5658482313156128, "rewards/verify_math_reward/std": 0.49592188000679016, "step": 360 }, { "clip_ratio/high_max": 0.0019375069632587838, "clip_ratio/high_mean": 0.000640851886373639, "clip_ratio/low_mean": 0.0002994591994820439, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009403110825587646, "completions/clipped_ratio": 0.0189732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3413.0, "completions/mean_length": 596.9989013671875, "completions/mean_terminated_length": 529.32763671875, "completions/min_length": 14.0, "completions/min_terminated_length": 14.0, "epoch": 3.3732866724992707, "grad_norm": 0.1328125, "learning_rate": 1e-06, "loss": 0.0065, "num_tokens": 209937927.0, "reward": 0.5401785969734192, "reward_std": 0.21286281943321228, "rewards/verify_math_reward/mean": 0.5401785969734192, "rewards/verify_math_reward/std": 0.49866142868995667, "step": 361 }, { "clip_ratio/high_max": 0.0016875027740752557, "clip_ratio/high_mean": 0.0005038644831074635, "clip_ratio/low_mean": 0.0003414921168314322, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008453565860691015, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 4068.0, "completions/mean_length": 591.1004638671875, "completions/mean_terminated_length": 531.4256591796875, "completions/min_length": 139.0, "completions/min_terminated_length": 139.0, "epoch": 3.382618839311753, "grad_norm": 0.138671875, "learning_rate": 1e-06, "loss": 0.0016, "num_tokens": 210492265.0, "reward": 0.5535714626312256, "reward_std": 0.23942752182483673, "rewards/verify_math_reward/mean": 0.5535714030265808, "rewards/verify_math_reward/std": 0.4973995089530945, "step": 362 }, { "clip_ratio/high_max": 0.001530025368083443, "clip_ratio/high_mean": 0.00047298084245994687, "clip_ratio/low_mean": 0.00033506173508612846, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008080425795924384, "completions/clipped_ratio": 0.010044642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3205.0, "completions/mean_length": 608.78125, "completions/mean_terminated_length": 573.39794921875, "completions/min_length": 18.0, "completions/min_terminated_length": 18.0, "epoch": 3.3919510061242346, "grad_norm": 0.12255859375, "learning_rate": 1e-06, "loss": 0.0033, "num_tokens": 211090965.0, "reward": 0.4854910969734192, "reward_std": 0.20549741387367249, "rewards/verify_math_reward/mean": 0.4854910671710968, "rewards/verify_math_reward/std": 0.5000686049461365, "step": 363 }, { "clip_ratio/high_max": 0.0013068964781268733, "clip_ratio/high_mean": 0.00035726715623241034, "clip_ratio/low_mean": 0.0003496256874768733, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007068928348417103, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 3769.0, "completions/mean_length": 575.950927734375, "completions/mean_terminated_length": 532.1988525390625, "completions/min_length": 11.0, "completions/min_terminated_length": 11.0, "epoch": 3.4012831729367163, "grad_norm": 0.11962890625, "learning_rate": 1e-06, "loss": -0.004, "num_tokens": 211659809.0, "reward": 0.5267857313156128, "reward_std": 0.20223219692707062, "rewards/verify_math_reward/mean": 0.5267857313156128, "rewards/verify_math_reward/std": 0.4995608627796173, "step": 364 }, { "clip_ratio/high_max": 0.0015464695907212445, "clip_ratio/high_mean": 0.000393204986494311, "clip_ratio/low_mean": 0.00035978218443233345, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007529871804763388, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 2180.0, "completions/mean_length": 605.3080444335938, "completions/mean_terminated_length": 549.9002075195312, "completions/min_length": 94.0, "completions/min_terminated_length": 94.0, "epoch": 3.410615339749198, "grad_norm": 0.12451171875, "learning_rate": 1e-06, "loss": -0.0062, "num_tokens": 212229781.0, "reward": 0.5390625, "reward_std": 0.2008771300315857, "rewards/verify_math_reward/mean": 0.5390625, "rewards/verify_math_reward/std": 0.4987502098083496, "step": 365 }, { "clip_ratio/high_max": 0.0016385521175834583, "clip_ratio/high_mean": 0.0005387498508753197, "clip_ratio/low_mean": 0.0003505492582007719, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008892991136235651, "completions/clipped_ratio": 0.010044642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3251.0, "completions/mean_length": 626.3426513671875, "completions/mean_terminated_length": 591.1375122070312, "completions/min_length": 149.0, "completions/min_terminated_length": 149.0, "epoch": 3.41994750656168, "grad_norm": 0.14453125, "learning_rate": 1e-06, "loss": 0.0031, "num_tokens": 212856120.0, "reward": 0.5022321939468384, "reward_std": 0.2511875033378601, "rewards/verify_math_reward/mean": 0.5022321343421936, "rewards/verify_math_reward/std": 0.5002742409706116, "step": 366 }, { "clip_ratio/high_max": 0.0014653511052529211, "clip_ratio/high_mean": 0.00045386035731098673, "clip_ratio/low_mean": 0.0003321712921433573, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007860316327423789, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 4017.0, "completions/mean_length": 626.8504638671875, "completions/mean_terminated_length": 575.7757568359375, "completions/min_length": 122.0, "completions/min_terminated_length": 122.0, "epoch": 3.4292796733741615, "grad_norm": 0.1298828125, "learning_rate": 1e-06, "loss": 0.0079, "num_tokens": 213462738.0, "reward": 0.5290178656578064, "reward_std": 0.22079278528690338, "rewards/verify_math_reward/mean": 0.5290178656578064, "rewards/verify_math_reward/std": 0.49943605065345764, "step": 367 }, { "clip_ratio/high_max": 0.002329958844711655, "clip_ratio/high_mean": 0.0008108197534966166, "clip_ratio/low_mean": 0.00036530438001136645, "clip_ratio/low_min": 1.1512249329825863e-05, "clip_ratio/region_mean": 0.0011761241366912145, "completions/clipped_ratio": 0.006696428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3308.0, "completions/mean_length": 538.3538208007812, "completions/mean_terminated_length": 514.3696899414062, "completions/min_length": 122.0, "completions/min_terminated_length": 122.0, "epoch": 3.4386118401866432, "grad_norm": 0.1455078125, "learning_rate": 1e-06, "loss": 0.0112, "num_tokens": 214004607.0, "reward": 0.6439732313156128, "reward_std": 0.2364267110824585, "rewards/verify_math_reward/mean": 0.6439732313156128, "rewards/verify_math_reward/std": 0.47909072041511536, "step": 368 }, { "clip_ratio/high_max": 0.0015956845563778188, "clip_ratio/high_mean": 0.0004438259143171308, "clip_ratio/low_mean": 0.00036947598960068717, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008133019109664019, "completions/clipped_ratio": 0.0234375, "completions/max_length": 4096.0, "completions/max_terminated_length": 2603.0, "completions/mean_length": 623.9017944335938, "completions/mean_terminated_length": 540.5714111328125, "completions/min_length": 152.0, "completions/min_terminated_length": 152.0, "epoch": 3.447944006999125, "grad_norm": 0.1376953125, "learning_rate": 1e-06, "loss": -0.0055, "num_tokens": 214577959.0, "reward": 0.5011160969734192, "reward_std": 0.22281017899513245, "rewards/verify_math_reward/mean": 0.5011160969734192, "rewards/verify_math_reward/std": 0.5002780556678772, "step": 369 }, { "clip_ratio/high_max": 0.001967671241800417, "clip_ratio/high_mean": 0.0005917860780755291, "clip_ratio/low_mean": 0.0002516439379860458, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008434300088993041, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 4071.0, "completions/mean_length": 629.9620971679688, "completions/mean_terminated_length": 582.9118041992188, "completions/min_length": 86.0, "completions/min_terminated_length": 86.0, "epoch": 3.457276173811607, "grad_norm": 0.1318359375, "learning_rate": 1e-06, "loss": -0.0158, "num_tokens": 215179117.0, "reward": 0.5223214626312256, "reward_std": 0.2123001217842102, "rewards/verify_math_reward/mean": 0.5223214030265808, "rewards/verify_math_reward/std": 0.49978047609329224, "step": 370 }, { "clip_ratio/high_max": 0.001820850586227607, "clip_ratio/high_mean": 0.0005464704081532545, "clip_ratio/low_mean": 0.00031654952817916637, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008630199354229262, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 2399.0, "completions/mean_length": 567.0625, "completions/mean_terminated_length": 515.1076049804688, "completions/min_length": 7.0, "completions/min_terminated_length": 7.0, "epoch": 3.466608340624089, "grad_norm": 0.1416015625, "learning_rate": 1e-06, "loss": -0.0019, "num_tokens": 215719165.0, "reward": 0.6037946939468384, "reward_std": 0.22116298973560333, "rewards/verify_math_reward/mean": 0.6037946343421936, "rewards/verify_math_reward/std": 0.48938122391700745, "step": 371 }, { "clip_ratio/high_max": 0.0015019711145214387, "clip_ratio/high_mean": 0.0004210207941923727, "clip_ratio/low_mean": 0.0004358102618198245, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008568310363443743, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3960.0, "completions/mean_length": 615.7645263671875, "completions/mean_terminated_length": 556.5097045898438, "completions/min_length": 98.0, "completions/min_terminated_length": 98.0, "epoch": 3.4759405074365706, "grad_norm": 0.1357421875, "learning_rate": 1e-06, "loss": 0.0035, "num_tokens": 216305442.0, "reward": 0.5290178656578064, "reward_std": 0.22484782338142395, "rewards/verify_math_reward/mean": 0.5290178656578064, "rewards/verify_math_reward/std": 0.49943605065345764, "step": 372 }, { "clip_ratio/high_max": 0.0015328749641412287, "clip_ratio/high_mean": 0.0004805345110980852, "clip_ratio/low_mean": 0.00033163407965730585, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008121686014419538, "completions/clipped_ratio": 0.0189732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 2962.0, "completions/mean_length": 620.708740234375, "completions/mean_terminated_length": 553.4959716796875, "completions/min_length": 151.0, "completions/min_terminated_length": 151.0, "epoch": 3.4852726742490523, "grad_norm": 0.130859375, "learning_rate": 1e-06, "loss": 0.0134, "num_tokens": 216881333.0, "reward": 0.5502232313156128, "reward_std": 0.2137625515460968, "rewards/verify_math_reward/mean": 0.5502232313156128, "rewards/verify_math_reward/std": 0.49774909019470215, "step": 373 }, { "clip_ratio/high_max": 0.0015800654764461797, "clip_ratio/high_mean": 0.00048514291279389, "clip_ratio/low_mean": 0.0002821739765295206, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007673168915971473, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3587.0, "completions/mean_length": 604.5245971679688, "completions/mean_terminated_length": 545.078369140625, "completions/min_length": 5.0, "completions/min_terminated_length": 5.0, "epoch": 3.494604841061534, "grad_norm": 0.1259765625, "learning_rate": 1e-06, "loss": 0.0043, "num_tokens": 217459131.0, "reward": 0.5234375, "reward_std": 0.20031329989433289, "rewards/verify_math_reward/mean": 0.5234375, "rewards/verify_math_reward/std": 0.49972933530807495, "step": 374 }, { "clip_ratio/high_max": 0.00146641343144438, "clip_ratio/high_mean": 0.0004182629563729279, "clip_ratio/low_mean": 0.0002642707102040731, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0006825336649853853, "completions/clipped_ratio": 0.008928571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3947.0, "completions/mean_length": 590.1908569335938, "completions/mean_terminated_length": 558.6069946289062, "completions/min_length": 114.0, "completions/min_terminated_length": 114.0, "epoch": 3.5039370078740157, "grad_norm": 0.1064453125, "learning_rate": 1e-06, "loss": -0.0007, "num_tokens": 218033150.0, "reward": 0.5078125, "reward_std": 0.1702958196401596, "rewards/verify_math_reward/mean": 0.5078125, "rewards/verify_math_reward/std": 0.5002182126045227, "step": 375 }, { "clip_ratio/high_max": 0.0013207476849856903, "clip_ratio/high_mean": 0.00044950790788789163, "clip_ratio/low_mean": 0.00037110483003743866, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008206127286030096, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 4013.0, "completions/mean_length": 641.2890625, "completions/mean_terminated_length": 602.296875, "completions/min_length": 123.0, "completions/min_terminated_length": 123.0, "epoch": 3.5132691746864975, "grad_norm": 0.1318359375, "learning_rate": 1e-06, "loss": 0.0065, "num_tokens": 218655201.0, "reward": 0.5535714626312256, "reward_std": 0.24956989288330078, "rewards/verify_math_reward/mean": 0.5535714030265808, "rewards/verify_math_reward/std": 0.4973994493484497, "step": 376 }, { "clip_ratio/high_max": 0.0015803857331775362, "clip_ratio/high_mean": 0.0004891510018296685, "clip_ratio/low_mean": 0.0003099080593074177, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007990590806912223, "completions/clipped_ratio": 0.0234375, "completions/max_length": 4096.0, "completions/max_terminated_length": 3329.0, "completions/mean_length": 662.146240234375, "completions/mean_terminated_length": 579.7337036132812, "completions/min_length": 85.0, "completions/min_terminated_length": 85.0, "epoch": 3.522601341498979, "grad_norm": 0.12451171875, "learning_rate": 1e-06, "loss": -0.0178, "num_tokens": 219258764.0, "reward": 0.5301339626312256, "reward_std": 0.2094796746969223, "rewards/verify_math_reward/mean": 0.5301339030265808, "rewards/verify_math_reward/std": 0.49936985969543457, "step": 377 }, { "clip_ratio/high_max": 0.0017390277644153684, "clip_ratio/high_mean": 0.0004640953416128468, "clip_ratio/low_mean": 0.00038881219870745554, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.00085290753213485, "completions/clipped_ratio": 0.0189732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 4024.0, "completions/mean_length": 633.2020263671875, "completions/mean_terminated_length": 566.23095703125, "completions/min_length": 115.0, "completions/min_terminated_length": 115.0, "epoch": 3.531933508311461, "grad_norm": 0.1279296875, "learning_rate": 1e-06, "loss": 0.0114, "num_tokens": 219851337.0, "reward": 0.486607164144516, "reward_std": 0.2292456030845642, "rewards/verify_math_reward/mean": 0.4866071343421936, "rewards/verify_math_reward/std": 0.500099778175354, "step": 378 }, { "clip_ratio/high_max": 0.0017334263684460893, "clip_ratio/high_mean": 0.0005789322231066762, "clip_ratio/low_mean": 0.0003493214683203405, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009282536930186325, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3009.0, "completions/mean_length": 622.6808471679688, "completions/mean_terminated_length": 559.529541015625, "completions/min_length": 50.0, "completions/min_terminated_length": 50.0, "epoch": 3.5412656751239426, "grad_norm": 0.140625, "learning_rate": 1e-06, "loss": -0.0052, "num_tokens": 220428363.0, "reward": 0.5625, "reward_std": 0.2352944165468216, "rewards/verify_math_reward/mean": 0.5625, "rewards/verify_math_reward/std": 0.49635544419288635, "step": 379 }, { "clip_ratio/high_max": 0.0017076302010536892, "clip_ratio/high_mean": 0.0005019646705477498, "clip_ratio/low_mean": 0.00030041423542570556, "clip_ratio/low_min": 2.3854961909819394e-05, "clip_ratio/region_mean": 0.0008023789068829501, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 2646.0, "completions/mean_length": 595.015625, "completions/mean_terminated_length": 555.5011596679688, "completions/min_length": 106.0, "completions/min_terminated_length": 106.0, "epoch": 3.5505978419364244, "grad_norm": 0.140625, "learning_rate": 1e-06, "loss": 0.0008, "num_tokens": 221020617.0, "reward": 0.5368303656578064, "reward_std": 0.22045326232910156, "rewards/verify_math_reward/mean": 0.5368303656578064, "rewards/verify_math_reward/std": 0.49892017245292664, "step": 380 }, { "clip_ratio/high_max": 0.0017493419800302945, "clip_ratio/high_mean": 0.0005218662131483143, "clip_ratio/low_mean": 0.000359154598868372, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.000881020800989063, "completions/clipped_ratio": 0.0078125, "completions/max_length": 4096.0, "completions/max_terminated_length": 3898.0, "completions/mean_length": 558.4017944335938, "completions/mean_terminated_length": 530.5466918945312, "completions/min_length": 111.0, "completions/min_terminated_length": 111.0, "epoch": 3.5599300087489065, "grad_norm": 0.1416015625, "learning_rate": 1e-06, "loss": 0.0239, "num_tokens": 221575505.0, "reward": 0.5915178656578064, "reward_std": 0.24036367237567902, "rewards/verify_math_reward/mean": 0.5915178656578064, "rewards/verify_math_reward/std": 0.49182769656181335, "step": 381 }, { "clip_ratio/high_max": 0.0020407741503731813, "clip_ratio/high_mean": 0.000637064392776665, "clip_ratio/low_mean": 0.0004148168359279225, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001051881230523577, "completions/clipped_ratio": 0.0078125, "completions/max_length": 4096.0, "completions/max_terminated_length": 2145.0, "completions/mean_length": 565.5245971679688, "completions/mean_terminated_length": 537.7255859375, "completions/min_length": 108.0, "completions/min_terminated_length": 108.0, "epoch": 3.5692621755613883, "grad_norm": 0.19921875, "learning_rate": 1e-06, "loss": 0.0002, "num_tokens": 222144375.0, "reward": 0.5412946939468384, "reward_std": 0.257803350687027, "rewards/verify_math_reward/mean": 0.5412946343421936, "rewards/verify_math_reward/std": 0.49857014417648315, "step": 382 }, { "clip_ratio/high_max": 0.0013758735576629988, "clip_ratio/high_mean": 0.0004422849675620455, "clip_ratio/low_mean": 0.00032788527369120857, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007701702406848199, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 2782.0, "completions/mean_length": 594.3627319335938, "completions/mean_terminated_length": 534.7434692382812, "completions/min_length": 11.0, "completions/min_terminated_length": 11.0, "epoch": 3.57859434237387, "grad_norm": 0.1396484375, "learning_rate": 1e-06, "loss": -0.0094, "num_tokens": 222711492.0, "reward": 0.5345982313156128, "reward_std": 0.22773607075214386, "rewards/verify_math_reward/mean": 0.5345982313156128, "rewards/verify_math_reward/std": 0.4990801215171814, "step": 383 }, { "clip_ratio/high_max": 0.0015417123049701331, "clip_ratio/high_mean": 0.0005001279358793909, "clip_ratio/low_mean": 0.00032574273166119383, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008258706702690688, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3672.0, "completions/mean_length": 587.8560791015625, "completions/mean_terminated_length": 528.1260375976562, "completions/min_length": 30.0, "completions/min_terminated_length": 30.0, "epoch": 3.5879265091863517, "grad_norm": 0.142578125, "learning_rate": 1e-06, "loss": 0.0124, "num_tokens": 223270291.0, "reward": 0.5502232313156128, "reward_std": 0.24626091122627258, "rewards/verify_math_reward/mean": 0.5502232313156128, "rewards/verify_math_reward/std": 0.49774909019470215, "step": 384 }, { "clip_ratio/high_max": 0.0015141884978220332, "clip_ratio/high_mean": 0.00044295220095591503, "clip_ratio/low_mean": 0.00036933203512035107, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008122842345983372, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 2543.0, "completions/mean_length": 574.6607666015625, "completions/mean_terminated_length": 510.6363525390625, "completions/min_length": 116.0, "completions/min_terminated_length": 116.0, "epoch": 3.5972586759988334, "grad_norm": 0.1337890625, "learning_rate": 1e-06, "loss": 0.0034, "num_tokens": 223812923.0, "reward": 0.5412946939468384, "reward_std": 0.1931735873222351, "rewards/verify_math_reward/mean": 0.5412946343421936, "rewards/verify_math_reward/std": 0.49857014417648315, "step": 385 }, { "clip_ratio/high_max": 0.0015524083719355986, "clip_ratio/high_mean": 0.0005091902276035398, "clip_ratio/low_mean": 0.0003068782958735028, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008160685219991137, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 3821.0, "completions/mean_length": 616.1517944335938, "completions/mean_terminated_length": 572.8994750976562, "completions/min_length": 102.0, "completions/min_terminated_length": 102.0, "epoch": 3.606590842811315, "grad_norm": 0.130859375, "learning_rate": 1e-06, "loss": 0.0039, "num_tokens": 224403227.0, "reward": 0.515625, "reward_std": 0.22012129426002502, "rewards/verify_math_reward/mean": 0.515625, "rewards/verify_math_reward/std": 0.5000349283218384, "step": 386 }, { "clip_ratio/high_max": 0.0015967811632435769, "clip_ratio/high_mean": 0.000492642038352642, "clip_ratio/low_mean": 0.00031995178494526044, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008125938293233048, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 2418.0, "completions/mean_length": 624.2824096679688, "completions/mean_terminated_length": 569.1757202148438, "completions/min_length": 160.0, "completions/min_terminated_length": 160.0, "epoch": 3.615923009623797, "grad_norm": 0.11865234375, "learning_rate": 1e-06, "loss": -0.0015, "num_tokens": 224995624.0, "reward": 0.5546875, "reward_std": 0.21925367414951324, "rewards/verify_math_reward/mean": 0.5546875, "rewards/verify_math_reward/std": 0.4972778558731079, "step": 387 }, { "clip_ratio/high_max": 0.0016285891024381272, "clip_ratio/high_mean": 0.0005083081557586411, "clip_ratio/low_mean": 0.00033271119912114955, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008410193522649934, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 2419.0, "completions/mean_length": 615.1964721679688, "completions/mean_terminated_length": 555.9319458007812, "completions/min_length": 138.0, "completions/min_terminated_length": 138.0, "epoch": 3.625255176436279, "grad_norm": 0.1357421875, "learning_rate": 1e-06, "loss": -0.0039, "num_tokens": 225574736.0, "reward": 0.5234375, "reward_std": 0.23361219465732574, "rewards/verify_math_reward/mean": 0.5234375, "rewards/verify_math_reward/std": 0.49972933530807495, "step": 388 }, { "clip_ratio/high_max": 0.0016358569091607933, "clip_ratio/high_mean": 0.000530800161186562, "clip_ratio/low_mean": 0.0003404246615446027, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008712248186384386, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 3656.0, "completions/mean_length": 539.0245971679688, "completions/mean_terminated_length": 498.87811279296875, "completions/min_length": 16.0, "completions/min_terminated_length": 16.0, "epoch": 3.6345873432487608, "grad_norm": 0.1396484375, "learning_rate": 1e-06, "loss": 0.014, "num_tokens": 226106062.0, "reward": 0.5926339626312256, "reward_std": 0.22236622869968414, "rewards/verify_math_reward/mean": 0.5926339030265808, "rewards/verify_math_reward/std": 0.49161845445632935, "step": 389 }, { "clip_ratio/high_max": 0.001320096257586556, "clip_ratio/high_mean": 0.0004383282646358566, "clip_ratio/low_mean": 0.0002619372946810472, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007002655456744833, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 1914.0, "completions/mean_length": 628.404052734375, "completions/mean_terminated_length": 573.36279296875, "completions/min_length": 141.0, "completions/min_terminated_length": 141.0, "epoch": 3.6439195100612425, "grad_norm": 0.1142578125, "learning_rate": 1e-06, "loss": 0.0002, "num_tokens": 226692000.0, "reward": 0.4754464626312256, "reward_std": 0.21357779204845428, "rewards/verify_math_reward/mean": 0.4754464328289032, "rewards/verify_math_reward/std": 0.4996756613254547, "step": 390 }, { "clip_ratio/high_max": 0.0016108330692077288, "clip_ratio/high_mean": 0.0004955009296736534, "clip_ratio/low_mean": 0.00030426281182371895, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007997637512744404, "completions/clipped_ratio": 0.021205357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 2909.0, "completions/mean_length": 626.8326416015625, "completions/mean_terminated_length": 551.6738891601562, "completions/min_length": 62.0, "completions/min_terminated_length": 62.0, "epoch": 3.653251676873724, "grad_norm": 0.134765625, "learning_rate": 1e-06, "loss": -0.012, "num_tokens": 227262362.0, "reward": 0.5870535969734192, "reward_std": 0.2295461893081665, "rewards/verify_math_reward/mean": 0.5870535969734192, "rewards/verify_math_reward/std": 0.49263834953308105, "step": 391 }, { "clip_ratio/high_max": 0.0015622963219357189, "clip_ratio/high_mean": 0.000444982869339583, "clip_ratio/low_mean": 0.00027761796604863775, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.00072260084152731, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 2569.0, "completions/mean_length": 629.1897583007812, "completions/mean_terminated_length": 570.1634521484375, "completions/min_length": 33.0, "completions/min_terminated_length": 33.0, "epoch": 3.662583843686206, "grad_norm": 0.1337890625, "learning_rate": 1e-06, "loss": -0.0143, "num_tokens": 227863660.0, "reward": 0.5133928656578064, "reward_std": 0.18840178847312927, "rewards/verify_math_reward/mean": 0.5133928656578064, "rewards/verify_math_reward/std": 0.500099778175354, "step": 392 }, { "clip_ratio/high_max": 0.001648791678235284, "clip_ratio/high_mean": 0.00042685270636866335, "clip_ratio/low_mean": 0.00025807614952100266, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0006849288547527976, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 3245.0, "completions/mean_length": 567.2902221679688, "completions/mean_terminated_length": 527.4627685546875, "completions/min_length": 114.0, "completions/min_terminated_length": 114.0, "epoch": 3.6719160104986877, "grad_norm": 0.12158203125, "learning_rate": 1e-06, "loss": 0.0124, "num_tokens": 228426176.0, "reward": 0.621651828289032, "reward_std": 0.19114413857460022, "rewards/verify_math_reward/mean": 0.6216517686843872, "rewards/verify_math_reward/std": 0.4852459728717804, "step": 393 }, { "clip_ratio/high_max": 0.0016169582395377802, "clip_ratio/high_mean": 0.0004791138062500977, "clip_ratio/low_mean": 0.00034066148032252386, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.000819775290437974, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3942.0, "completions/mean_length": 626.880615234375, "completions/mean_terminated_length": 575.8063354492188, "completions/min_length": 139.0, "completions/min_terminated_length": 139.0, "epoch": 3.6812481773111694, "grad_norm": 0.134765625, "learning_rate": 1e-06, "loss": -0.0062, "num_tokens": 229027141.0, "reward": 0.5379464626312256, "reward_std": 0.21989238262176514, "rewards/verify_math_reward/mean": 0.5379464030265808, "rewards/verify_math_reward/std": 0.4988364279270172, "step": 394 }, { "clip_ratio/high_max": 0.0017965815895877313, "clip_ratio/high_mean": 0.00053020203131382, "clip_ratio/low_mean": 0.000366269854453094, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008964718890638324, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3633.0, "completions/mean_length": 607.7232666015625, "completions/mean_terminated_length": 560.37109375, "completions/min_length": 96.0, "completions/min_terminated_length": 96.0, "epoch": 3.690580344123651, "grad_norm": 0.12109375, "learning_rate": 1e-06, "loss": 0.0107, "num_tokens": 229606733.0, "reward": 0.5133928656578064, "reward_std": 0.20493288338184357, "rewards/verify_math_reward/mean": 0.5133928656578064, "rewards/verify_math_reward/std": 0.500099778175354, "step": 395 }, { "clip_ratio/high_max": 0.0014480613535852171, "clip_ratio/high_mean": 0.0004475641899261973, "clip_ratio/low_mean": 0.00039229245521710254, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008398566287723952, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3653.0, "completions/mean_length": 585.700927734375, "completions/mean_terminated_length": 534.0203857421875, "completions/min_length": 161.0, "completions/min_terminated_length": 161.0, "epoch": 3.699912510936133, "grad_norm": 0.134765625, "learning_rate": 1e-06, "loss": -0.002, "num_tokens": 230167217.0, "reward": 0.566964328289032, "reward_std": 0.2270306646823883, "rewards/verify_math_reward/mean": 0.5669642686843872, "rewards/verify_math_reward/std": 0.49577224254608154, "step": 396 }, { "clip_ratio/high_max": 0.0014812636945862323, "clip_ratio/high_mean": 0.00048173480990953976, "clip_ratio/low_mean": 0.0003426109453812387, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008243457532444154, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 2302.0, "completions/mean_length": 560.0045166015625, "completions/mean_terminated_length": 516.0542602539062, "completions/min_length": 85.0, "completions/min_terminated_length": 85.0, "epoch": 3.7092446777486145, "grad_norm": 0.1376953125, "learning_rate": 1e-06, "loss": 0.0075, "num_tokens": 230710285.0, "reward": 0.578125, "reward_std": 0.22939532995224, "rewards/verify_math_reward/mean": 0.578125, "rewards/verify_math_reward/std": 0.4941346049308777, "step": 397 }, { "clip_ratio/high_max": 0.0014677215967822121, "clip_ratio/high_mean": 0.0004499109904827492, "clip_ratio/low_mean": 0.00034677758276302484, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007966885686983005, "completions/clipped_ratio": 0.022321428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 2903.0, "completions/mean_length": 656.2310791015625, "completions/mean_terminated_length": 577.6974487304688, "completions/min_length": 69.0, "completions/min_terminated_length": 69.0, "epoch": 3.7185768445610963, "grad_norm": 0.125, "learning_rate": 1e-06, "loss": -0.0014, "num_tokens": 231306964.0, "reward": 0.494419664144516, "reward_std": 0.22244179248809814, "rewards/verify_math_reward/mean": 0.4944196343421936, "rewards/verify_math_reward/std": 0.5002480745315552, "step": 398 }, { "clip_ratio/high_max": 0.001415038406776148, "clip_ratio/high_mean": 0.0004363909156381851, "clip_ratio/low_mean": 0.00031491438573993946, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007513053005823167, "completions/clipped_ratio": 0.010044642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 2049.0, "completions/mean_length": 545.9955444335938, "completions/mean_terminated_length": 509.97515869140625, "completions/min_length": 4.0, "completions/min_terminated_length": 4.0, "epoch": 3.7279090113735784, "grad_norm": 0.1279296875, "learning_rate": 1e-06, "loss": 0.0086, "num_tokens": 231839504.0, "reward": 0.6261160969734192, "reward_std": 0.19663412868976593, "rewards/verify_math_reward/mean": 0.6261160969734192, "rewards/verify_math_reward/std": 0.48410359025001526, "step": 399 }, { "clip_ratio/high_max": 0.0020534365648927633, "clip_ratio/high_mean": 0.0006376208657457028, "clip_ratio/low_mean": 0.00036247534490030375, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010000962010963121, "completions/clipped_ratio": 0.010044642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3426.0, "completions/mean_length": 612.484375, "completions/mean_terminated_length": 577.138671875, "completions/min_length": 124.0, "completions/min_terminated_length": 124.0, "epoch": 3.73724117818606, "grad_norm": 0.14453125, "learning_rate": 1e-06, "loss": 0.0101, "num_tokens": 232431410.0, "reward": 0.5814732313156128, "reward_std": 0.2620042860507965, "rewards/verify_math_reward/mean": 0.5814732313156128, "rewards/verify_math_reward/std": 0.4935930073261261, "step": 400 }, { "clip_ratio/high_max": 0.0014936348379706033, "clip_ratio/high_mean": 0.0003989423350958532, "clip_ratio/low_mean": 0.0002663618034830506, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0006653041418758221, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3471.0, "completions/mean_length": 607.7154541015625, "completions/mean_terminated_length": 560.3631591796875, "completions/min_length": 150.0, "completions/min_terminated_length": 150.0, "epoch": 3.746573344998542, "grad_norm": 0.12890625, "learning_rate": 1e-06, "loss": 0.0035, "num_tokens": 233008435.0, "reward": 0.5959821939468384, "reward_std": 0.21324008703231812, "rewards/verify_math_reward/mean": 0.5959821343421936, "rewards/verify_math_reward/std": 0.490975022315979, "step": 401 }, { "clip_ratio/high_max": 0.0015859054765314795, "clip_ratio/high_mean": 0.0004815630691155093, "clip_ratio/low_mean": 0.0003511838276608614, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008327468967763707, "completions/clipped_ratio": 0.008928571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 2521.0, "completions/mean_length": 609.8292846679688, "completions/mean_terminated_length": 578.4223022460938, "completions/min_length": 133.0, "completions/min_terminated_length": 133.0, "epoch": 3.7559055118110236, "grad_norm": 0.11865234375, "learning_rate": 1e-06, "loss": 0.0078, "num_tokens": 233607274.0, "reward": 0.5691964626312256, "reward_std": 0.21654202044010162, "rewards/verify_math_reward/mean": 0.5691964030265808, "rewards/verify_math_reward/std": 0.4954652488231659, "step": 402 }, { "clip_ratio/high_max": 0.001635469800021383, "clip_ratio/high_mean": 0.00048614210186315177, "clip_ratio/low_mean": 0.0004072176566296548, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.000893359760993917, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 3190.0, "completions/mean_length": 589.989990234375, "completions/mean_terminated_length": 550.4187622070312, "completions/min_length": 51.0, "completions/min_terminated_length": 51.0, "epoch": 3.7652376786235053, "grad_norm": 0.140625, "learning_rate": 1e-06, "loss": 0.0123, "num_tokens": 234175689.0, "reward": 0.5334821939468384, "reward_std": 0.24494822323322296, "rewards/verify_math_reward/mean": 0.5334821343421936, "rewards/verify_math_reward/std": 0.49915632605552673, "step": 403 }, { "clip_ratio/high_max": 0.0014582817402697401, "clip_ratio/high_mean": 0.00039351377563434653, "clip_ratio/low_mean": 0.0002876310138617555, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0006811447938162019, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3502.0, "completions/mean_length": 620.8973388671875, "completions/mean_terminated_length": 565.7369995117188, "completions/min_length": 124.0, "completions/min_terminated_length": 124.0, "epoch": 3.774569845435987, "grad_norm": 0.11376953125, "learning_rate": 1e-06, "loss": -0.0116, "num_tokens": 234759253.0, "reward": 0.6004464626312256, "reward_std": 0.18690545856952667, "rewards/verify_math_reward/mean": 0.6004464030265808, "rewards/verify_math_reward/std": 0.49008017778396606, "step": 404 }, { "clip_ratio/high_max": 0.0019488130801619263, "clip_ratio/high_mean": 0.0006539549137869471, "clip_ratio/low_mean": 0.00035105997562823177, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010050149021481047, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 4026.0, "completions/mean_length": 625.8449096679688, "completions/mean_terminated_length": 566.7616577148438, "completions/min_length": 131.0, "completions/min_terminated_length": 131.0, "epoch": 3.783902012248469, "grad_norm": 0.1279296875, "learning_rate": 1e-06, "loss": -0.0019, "num_tokens": 235359882.0, "reward": 0.566964328289032, "reward_std": 0.23953881859779358, "rewards/verify_math_reward/mean": 0.5669642686843872, "rewards/verify_math_reward/std": 0.49577224254608154, "step": 405 }, { "clip_ratio/high_max": 0.0019392261774555664, "clip_ratio/high_mean": 0.0005943076939729508, "clip_ratio/low_mean": 0.0002923669904930648, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.000886674691173539, "completions/clipped_ratio": 0.0200892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3921.0, "completions/mean_length": 620.9442138671875, "completions/mean_terminated_length": 549.7015991210938, "completions/min_length": 99.0, "completions/min_terminated_length": 99.0, "epoch": 3.793234179060951, "grad_norm": 0.1494140625, "learning_rate": 1e-06, "loss": -0.0238, "num_tokens": 235922384.0, "reward": 0.5323660969734192, "reward_std": 0.2530961036682129, "rewards/verify_math_reward/mean": 0.5323660969734192, "rewards/verify_math_reward/std": 0.4992299973964691, "step": 406 }, { "clip_ratio/high_max": 0.001731750751787331, "clip_ratio/high_mean": 0.0006225847790801708, "clip_ratio/low_mean": 0.00038206763542802946, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010046524057543138, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3661.0, "completions/mean_length": 601.0435791015625, "completions/mean_terminated_length": 545.5680541992188, "completions/min_length": 3.0, "completions/min_terminated_length": 3.0, "epoch": 3.8025663458734327, "grad_norm": 0.1376953125, "learning_rate": 1e-06, "loss": -0.0022, "num_tokens": 236485407.0, "reward": 0.5837053656578064, "reward_std": 0.24588504433631897, "rewards/verify_math_reward/mean": 0.5837053656578064, "rewards/verify_math_reward/std": 0.49321895837783813, "step": 407 }, { "clip_ratio/high_max": 0.0017494614930910757, "clip_ratio/high_mean": 0.0005477968547893397, "clip_ratio/low_mean": 0.00029243565404613037, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.000840232525661122, "completions/clipped_ratio": 0.0078125, "completions/max_length": 4096.0, "completions/max_terminated_length": 3526.0, "completions/mean_length": 553.638427734375, "completions/mean_terminated_length": 525.7457885742188, "completions/min_length": 20.0, "completions/min_terminated_length": 20.0, "epoch": 3.8118985126859144, "grad_norm": 0.1328125, "learning_rate": 1e-06, "loss": -0.0089, "num_tokens": 237031867.0, "reward": 0.5770089626312256, "reward_std": 0.21098490059375763, "rewards/verify_math_reward/mean": 0.5770089030265808, "rewards/verify_math_reward/std": 0.4943099319934845, "step": 408 }, { "clip_ratio/high_max": 0.001624859107323573, "clip_ratio/high_mean": 0.00047867421790215303, "clip_ratio/low_mean": 0.0002843144603730252, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.000762988667247555, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3868.0, "completions/mean_length": 675.4029541015625, "completions/mean_terminated_length": 621.1077270507812, "completions/min_length": 132.0, "completions/min_terminated_length": 132.0, "epoch": 3.821230679498396, "grad_norm": 0.1259765625, "learning_rate": 1e-06, "loss": 0.0004, "num_tokens": 237666148.0, "reward": 0.4821428656578064, "reward_std": 0.22394628822803497, "rewards/verify_math_reward/mean": 0.4821428656578064, "rewards/verify_math_reward/std": 0.4999600946903229, "step": 409 }, { "clip_ratio/high_max": 0.0013347278263609041, "clip_ratio/high_mean": 0.0003814470600218556, "clip_ratio/low_mean": 0.00021836383575646323, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0005998109022584686, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3994.0, "completions/mean_length": 638.505615234375, "completions/mean_terminated_length": 587.6024780273438, "completions/min_length": 69.0, "completions/min_terminated_length": 69.0, "epoch": 3.830562846310878, "grad_norm": 0.12060546875, "learning_rate": 1e-06, "loss": 0.002, "num_tokens": 238276521.0, "reward": 0.5078125, "reward_std": 0.17033155262470245, "rewards/verify_math_reward/mean": 0.5078125, "rewards/verify_math_reward/std": 0.5002182126045227, "step": 410 }, { "clip_ratio/high_max": 0.0016647904694764293, "clip_ratio/high_mean": 0.00046309007188938267, "clip_ratio/low_mean": 0.0002389593830685044, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007020494558673818, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3988.0, "completions/mean_length": 591.6506958007812, "completions/mean_terminated_length": 540.0577392578125, "completions/min_length": 83.0, "completions/min_terminated_length": 83.0, "epoch": 3.8398950131233596, "grad_norm": 0.126953125, "learning_rate": 1e-06, "loss": -0.0117, "num_tokens": 238838208.0, "reward": 0.566964328289032, "reward_std": 0.18570081889629364, "rewards/verify_math_reward/mean": 0.5669642686843872, "rewards/verify_math_reward/std": 0.49577224254608154, "step": 411 }, { "clip_ratio/high_max": 0.001612948665751901, "clip_ratio/high_mean": 0.0005077806624740333, "clip_ratio/low_mean": 0.000302331301668346, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008101119651655608, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 2635.0, "completions/mean_length": 601.8460083007812, "completions/mean_terminated_length": 546.3832397460938, "completions/min_length": 115.0, "completions/min_terminated_length": 115.0, "epoch": 3.8492271799358413, "grad_norm": 0.1201171875, "learning_rate": 1e-06, "loss": 0.0111, "num_tokens": 239404694.0, "reward": 0.5613839626312256, "reward_std": 0.2007237821817398, "rewards/verify_math_reward/mean": 0.5613839030265808, "rewards/verify_math_reward/std": 0.496494859457016, "step": 412 }, { "clip_ratio/high_max": 0.0017465831315348623, "clip_ratio/high_mean": 0.0005380814523050503, "clip_ratio/low_mean": 0.00022690568289363, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007649871331523173, "completions/clipped_ratio": 0.0078125, "completions/max_length": 4096.0, "completions/max_terminated_length": 3951.0, "completions/mean_length": 529.2221069335938, "completions/mean_terminated_length": 501.13726806640625, "completions/min_length": 135.0, "completions/min_terminated_length": 135.0, "epoch": 3.858559346748323, "grad_norm": 0.1435546875, "learning_rate": 1e-06, "loss": 0.0124, "num_tokens": 239929485.0, "reward": 0.6540178656578064, "reward_std": 0.2108679562807083, "rewards/verify_math_reward/mean": 0.6540178656578064, "rewards/verify_math_reward/std": 0.4759531021118164, "step": 413 }, { "clip_ratio/high_max": 0.0016045059946918627, "clip_ratio/high_mean": 0.00044197996794537175, "clip_ratio/low_mean": 0.00037812140465121047, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008201013843063265, "completions/clipped_ratio": 0.006696428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3245.0, "completions/mean_length": 589.1328125, "completions/mean_terminated_length": 565.4910278320312, "completions/min_length": 112.0, "completions/min_terminated_length": 112.0, "epoch": 3.8678915135608047, "grad_norm": 0.1318359375, "learning_rate": 1e-06, "loss": 0.0096, "num_tokens": 240528444.0, "reward": 0.574776828289032, "reward_std": 0.22913500666618347, "rewards/verify_math_reward/mean": 0.5747767686843872, "rewards/verify_math_reward/std": 0.49465295672416687, "step": 414 }, { "clip_ratio/high_max": 0.0019203094470867654, "clip_ratio/high_mean": 0.0005664961618094821, "clip_ratio/low_mean": 0.0003205215285788654, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.000887017688000924, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 2075.0, "completions/mean_length": 554.265625, "completions/mean_terminated_length": 506.18780517578125, "completions/min_length": 61.0, "completions/min_terminated_length": 61.0, "epoch": 3.8772236803732865, "grad_norm": 0.13671875, "learning_rate": 1e-06, "loss": 0.0008, "num_tokens": 241059874.0, "reward": 0.629464328289032, "reward_std": 0.22195394337177277, "rewards/verify_math_reward/mean": 0.6294642686843872, "rewards/verify_math_reward/std": 0.4832179844379425, "step": 415 }, { "clip_ratio/high_max": 0.0016401830325776245, "clip_ratio/high_mean": 0.0004495620095212871, "clip_ratio/low_mean": 0.0003676475903375831, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.000817209598608315, "completions/clipped_ratio": 0.0200892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3368.0, "completions/mean_length": 658.3069458007812, "completions/mean_terminated_length": 587.830322265625, "completions/min_length": 169.0, "completions/min_terminated_length": 169.0, "epoch": 3.886555847185768, "grad_norm": 0.1240234375, "learning_rate": 1e-06, "loss": 0.0003, "num_tokens": 241672093.0, "reward": 0.5066964626312256, "reward_std": 0.21293838322162628, "rewards/verify_math_reward/mean": 0.5066964030265808, "rewards/verify_math_reward/std": 0.5002344250679016, "step": 416 }, { "clip_ratio/high_max": 0.0015579139508190565, "clip_ratio/high_mean": 0.0003916371828154297, "clip_ratio/low_mean": 0.0003195531555775233, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007111903305485612, "completions/clipped_ratio": 0.021205357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 2643.0, "completions/mean_length": 632.763427734375, "completions/mean_terminated_length": 557.733154296875, "completions/min_length": 47.0, "completions/min_terminated_length": 47.0, "epoch": 3.8958880139982504, "grad_norm": 0.1279296875, "learning_rate": 1e-06, "loss": -0.0119, "num_tokens": 242254433.0, "reward": 0.543526828289032, "reward_std": 0.19971348345279694, "rewards/verify_math_reward/mean": 0.5435267686843872, "rewards/verify_math_reward/std": 0.49838000535964966, "step": 417 }, { "clip_ratio/high_max": 0.001751403093294357, "clip_ratio/high_mean": 0.0005641086927425931, "clip_ratio/low_mean": 0.00022720499475781253, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007913136905699503, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 2345.0, "completions/mean_length": 567.3192138671875, "completions/mean_terminated_length": 527.4921264648438, "completions/min_length": 130.0, "completions/min_terminated_length": 130.0, "epoch": 3.905220180810732, "grad_norm": 0.126953125, "learning_rate": 1e-06, "loss": 0.0059, "num_tokens": 242813303.0, "reward": 0.5792410969734192, "reward_std": 0.21053095161914825, "rewards/verify_math_reward/mean": 0.5792410969734192, "rewards/verify_math_reward/std": 0.49395665526390076, "step": 418 }, { "clip_ratio/high_max": 0.0013893151572119677, "clip_ratio/high_mean": 0.0004139341785958095, "clip_ratio/low_mean": 0.0003862174690993925, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008001516480362625, "completions/clipped_ratio": 0.022321428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3950.0, "completions/mean_length": 672.1395263671875, "completions/mean_terminated_length": 593.9691772460938, "completions/min_length": 105.0, "completions/min_terminated_length": 105.0, "epoch": 3.914552347623214, "grad_norm": 0.13671875, "learning_rate": 1e-06, "loss": 0.0163, "num_tokens": 243418828.0, "reward": 0.5245535969734192, "reward_std": 0.24765029549598694, "rewards/verify_math_reward/mean": 0.5245535969734192, "rewards/verify_math_reward/std": 0.4996756911277771, "step": 419 }, { "clip_ratio/high_max": 0.001797820483261603, "clip_ratio/high_mean": 0.000599380720359477, "clip_ratio/low_mean": 0.0003531840602590819, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009525647956252214, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 2685.0, "completions/mean_length": 582.4788208007812, "completions/mean_terminated_length": 518.5965576171875, "completions/min_length": 60.0, "completions/min_terminated_length": 60.0, "epoch": 3.9238845144356955, "grad_norm": 0.146484375, "learning_rate": 1e-06, "loss": -0.0125, "num_tokens": 243963105.0, "reward": 0.5926339626312256, "reward_std": 0.24521991610527039, "rewards/verify_math_reward/mean": 0.5926339030265808, "rewards/verify_math_reward/std": 0.49161845445632935, "step": 420 }, { "clip_ratio/high_max": 0.0016470877526444383, "clip_ratio/high_mean": 0.0004988932628293696, "clip_ratio/low_mean": 0.000288941778762819, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007878350324972416, "completions/clipped_ratio": 0.0189732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3752.0, "completions/mean_length": 646.966552734375, "completions/mean_terminated_length": 580.2616577148438, "completions/min_length": 100.0, "completions/min_terminated_length": 100.0, "epoch": 3.9332166812481772, "grad_norm": 0.12451171875, "learning_rate": 1e-06, "loss": 0.0162, "num_tokens": 244566419.0, "reward": 0.5145089626312256, "reward_std": 0.2191769778728485, "rewards/verify_math_reward/mean": 0.5145089030265808, "rewards/verify_math_reward/std": 0.5000685453414917, "step": 421 }, { "clip_ratio/high_max": 0.0013836159505444812, "clip_ratio/high_mean": 0.00041446368686592905, "clip_ratio/low_mean": 0.00038482563741126796, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007992893088157871, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 2508.0, "completions/mean_length": 617.4810791015625, "completions/mean_terminated_length": 570.2613525390625, "completions/min_length": 7.0, "completions/min_terminated_length": 7.0, "epoch": 3.942548848060659, "grad_norm": 0.1298828125, "learning_rate": 1e-06, "loss": -0.0099, "num_tokens": 245156538.0, "reward": 0.543526828289032, "reward_std": 0.2271055281162262, "rewards/verify_math_reward/mean": 0.5435267686843872, "rewards/verify_math_reward/std": 0.49838000535964966, "step": 422 }, { "clip_ratio/high_max": 0.0016540226624783827, "clip_ratio/high_mean": 0.0005068623531769845, "clip_ratio/low_mean": 0.00024002239706533146, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007468847365998954, "completions/clipped_ratio": 0.024553571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 2556.0, "completions/mean_length": 639.5814819335938, "completions/mean_terminated_length": 552.5777587890625, "completions/min_length": 89.0, "completions/min_terminated_length": 89.0, "epoch": 3.9518810148731407, "grad_norm": 0.11865234375, "learning_rate": 1e-06, "loss": 0.0021, "num_tokens": 245730651.0, "reward": 0.5446428656578064, "reward_std": 0.186296746134758, "rewards/verify_math_reward/mean": 0.5446428656578064, "rewards/verify_math_reward/std": 0.4982811510562897, "step": 423 }, { "clip_ratio/high_max": 0.0014401540483959252, "clip_ratio/high_mean": 0.0004272742631883375, "clip_ratio/low_mean": 0.00035068887564193574, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007779631423545652, "completions/clipped_ratio": 0.010044642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 2381.0, "completions/mean_length": 566.4263916015625, "completions/mean_terminated_length": 530.61328125, "completions/min_length": 64.0, "completions/min_terminated_length": 64.0, "epoch": 3.961213181685623, "grad_norm": 0.1318359375, "learning_rate": 1e-06, "loss": -0.0025, "num_tokens": 246283817.0, "reward": 0.5803571939468384, "reward_std": 0.24570778012275696, "rewards/verify_math_reward/mean": 0.5803571343421936, "rewards/verify_math_reward/std": 0.4937761425971985, "step": 424 }, { "clip_ratio/high_max": 0.0015108439911273308, "clip_ratio/high_mean": 0.000489139272644934, "clip_ratio/low_mean": 0.00032226459723005974, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.000811403877378325, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 2270.0, "completions/mean_length": 614.3761596679688, "completions/mean_terminated_length": 567.1142578125, "completions/min_length": 39.0, "completions/min_terminated_length": 39.0, "epoch": 3.9705453484981046, "grad_norm": 0.1357421875, "learning_rate": 1e-06, "loss": 0.0159, "num_tokens": 246873738.0, "reward": 0.5647321939468384, "reward_std": 0.23112311959266663, "rewards/verify_math_reward/mean": 0.5647321343421936, "rewards/verify_math_reward/std": 0.49606892466545105, "step": 425 }, { "clip_ratio/high_max": 0.0018837725056073396, "clip_ratio/high_mean": 0.0005286419341246074, "clip_ratio/low_mean": 0.0003960915551033395, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009247334883184521, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 3416.0, "completions/mean_length": 567.536865234375, "completions/mean_terminated_length": 523.6802368164062, "completions/min_length": 144.0, "completions/min_terminated_length": 144.0, "epoch": 3.9798775153105863, "grad_norm": 0.14453125, "learning_rate": 1e-06, "loss": 0.0023, "num_tokens": 247427475.0, "reward": 0.5680803656578064, "reward_std": 0.22950975596904755, "rewards/verify_math_reward/mean": 0.5680803656578064, "rewards/verify_math_reward/std": 0.4956200122833252, "step": 426 }, { "clip_ratio/high_max": 0.0016168537194971577, "clip_ratio/high_mean": 0.000455863840670645, "clip_ratio/low_mean": 0.0003151222116457575, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007709860510658473, "completions/clipped_ratio": 0.0279017857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 2253.0, "completions/mean_length": 644.3873291015625, "completions/mean_terminated_length": 545.31689453125, "completions/min_length": 109.0, "completions/min_terminated_length": 109.0, "epoch": 3.989209682123068, "grad_norm": 0.1259765625, "learning_rate": 1e-06, "loss": 0.0, "num_tokens": 247992814.0, "reward": 0.5837053656578064, "reward_std": 0.21294020116329193, "rewards/verify_math_reward/mean": 0.5837053656578064, "rewards/verify_math_reward/std": 0.49321892857551575, "step": 427 }, { "clip_ratio/high_max": 0.0016863984574229107, "clip_ratio/high_mean": 0.0005149487785729434, "clip_ratio/low_mean": 0.00029305350585673295, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008080022853391711, "completions/clipped_ratio": 0.011363636363636354, "completions/max_length": 4096.0, "completions/max_terminated_length": 2076.0, "completions/mean_length": 554.9915161132812, "completions/mean_terminated_length": 514.2902221679688, "completions/min_length": 155.0, "completions/min_terminated_length": 155.0, "epoch": 3.9985418489355498, "grad_norm": 0.126953125, "learning_rate": 1e-06, "loss": 0.0154, "num_tokens": 248560892.0, "reward": 0.5948660969734192, "reward_std": 0.20238234102725983, "rewards/verify_math_reward/mean": 0.5948660969734192, "rewards/verify_math_reward/std": 0.49119213223457336, "step": 428 }, { "clip_ratio/high_max": 0.0017036622812156565, "clip_ratio/high_mean": 0.000501368228924548, "clip_ratio/low_mean": 0.00033213314895874646, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008335013781106682, "completions/clipped_ratio": 0.004464285714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 2681.0, "completions/mean_length": 559.75, "completions/mean_terminated_length": 543.8923950195312, "completions/min_length": 30.0, "completions/min_terminated_length": 30.0, "epoch": 4.009332166812482, "grad_norm": 0.134765625, "learning_rate": 1e-06, "loss": 0.0129, "num_tokens": 249140140.0, "reward": 0.5792410969734192, "reward_std": 0.21214744448661804, "rewards/verify_math_reward/mean": 0.5792410969734192, "rewards/verify_math_reward/std": 0.49395665526390076, "step": 429 }, { "clip_ratio/high_max": 0.0016583693195570959, "clip_ratio/high_mean": 0.00048098201523316675, "clip_ratio/low_mean": 0.00031456732381229813, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007955493456393015, "completions/clipped_ratio": 0.005580357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 2533.0, "completions/mean_length": 568.6473388671875, "completions/mean_terminated_length": 548.85302734375, "completions/min_length": 122.0, "completions/min_terminated_length": 122.0, "epoch": 4.0186643336249634, "grad_norm": 0.1279296875, "learning_rate": 1e-06, "loss": 0.0052, "num_tokens": 249713904.0, "reward": 0.5535714626312256, "reward_std": 0.21654090285301208, "rewards/verify_math_reward/mean": 0.5535714030265808, "rewards/verify_math_reward/std": 0.4973994791507721, "step": 430 }, { "clip_ratio/high_max": 0.0015662377345506684, "clip_ratio/high_mean": 0.00041621800278335286, "clip_ratio/low_mean": 0.00026458191166511824, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0006807999138800369, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 3791.0, "completions/mean_length": 606.9453125, "completions/mean_terminated_length": 567.5654907226562, "completions/min_length": 126.0, "completions/min_terminated_length": 126.0, "epoch": 4.027996500437445, "grad_norm": 0.1171875, "learning_rate": 1e-06, "loss": 0.0143, "num_tokens": 250295159.0, "reward": 0.5535714626312256, "reward_std": 0.19129958748817444, "rewards/verify_math_reward/mean": 0.5535714030265808, "rewards/verify_math_reward/std": 0.4973995089530945, "step": 431 }, { "clip_ratio/high_max": 0.0017931187894646428, "clip_ratio/high_mean": 0.0005656477014781558, "clip_ratio/low_mean": 0.00038114246308396105, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009467901654716115, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 4039.0, "completions/mean_length": 632.5859375, "completions/mean_terminated_length": 573.6174926757812, "completions/min_length": 100.0, "completions/min_terminated_length": 100.0, "epoch": 4.037328667249927, "grad_norm": 0.134765625, "learning_rate": 1e-06, "loss": 0.0163, "num_tokens": 250886500.0, "reward": 0.5625, "reward_std": 0.24784713983535767, "rewards/verify_math_reward/mean": 0.5625, "rewards/verify_math_reward/std": 0.49635544419288635, "step": 432 }, { "clip_ratio/high_max": 0.0014611402048103628, "clip_ratio/high_mean": 0.0005159039941418087, "clip_ratio/low_mean": 0.0003199606596808735, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008358646600754582, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 2508.0, "completions/mean_length": 637.5491333007812, "completions/mean_terminated_length": 574.6681518554688, "completions/min_length": 136.0, "completions/min_terminated_length": 136.0, "epoch": 4.046660834062409, "grad_norm": 0.1201171875, "learning_rate": 1e-06, "loss": 0.0152, "num_tokens": 251477264.0, "reward": 0.5401785969734192, "reward_std": 0.2152363657951355, "rewards/verify_math_reward/mean": 0.5401785969734192, "rewards/verify_math_reward/std": 0.49866142868995667, "step": 433 }, { "clip_ratio/high_max": 0.0018375051686234656, "clip_ratio/high_mean": 0.0005550296677938604, "clip_ratio/low_mean": 0.00039655115983805445, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009515808333162568, "completions/clipped_ratio": 0.022321428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 4068.0, "completions/mean_length": 661.5592041015625, "completions/mean_terminated_length": 583.147216796875, "completions/min_length": 129.0, "completions/min_terminated_length": 129.0, "epoch": 4.05599300087489, "grad_norm": 0.1376953125, "learning_rate": 1e-06, "loss": -0.0122, "num_tokens": 252087973.0, "reward": 0.504464328289032, "reward_std": 0.281840443611145, "rewards/verify_math_reward/mean": 0.5044642686843872, "rewards/verify_math_reward/std": 0.5002593398094177, "step": 434 }, { "clip_ratio/high_max": 0.002112425703671761, "clip_ratio/high_mean": 0.0006604397185583366, "clip_ratio/low_mean": 0.00025667889258329524, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009171186156891054, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 2235.0, "completions/mean_length": 546.5245971679688, "completions/mean_terminated_length": 502.40679931640625, "completions/min_length": 124.0, "completions/min_terminated_length": 124.0, "epoch": 4.065325167687372, "grad_norm": 0.1318359375, "learning_rate": 1e-06, "loss": -0.0039, "num_tokens": 252616603.0, "reward": 0.5837053656578064, "reward_std": 0.208427295088768, "rewards/verify_math_reward/mean": 0.5837053656578064, "rewards/verify_math_reward/std": 0.49321895837783813, "step": 435 }, { "clip_ratio/high_max": 0.0015524970385740744, "clip_ratio/high_mean": 0.0004217042617256084, "clip_ratio/low_mean": 0.00025636546195073606, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.000678069733112352, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 4004.0, "completions/mean_length": 630.3504638671875, "completions/mean_terminated_length": 571.3439331054688, "completions/min_length": 131.0, "completions/min_terminated_length": 131.0, "epoch": 4.074657334499854, "grad_norm": 0.115234375, "learning_rate": 1e-06, "loss": 0.0052, "num_tokens": 253210261.0, "reward": 0.5993303656578064, "reward_std": 0.18606990575790405, "rewards/verify_math_reward/mean": 0.5993303656578064, "rewards/verify_math_reward/std": 0.49030786752700806, "step": 436 }, { "clip_ratio/high_max": 0.0017454762273700908, "clip_ratio/high_mean": 0.0005565919273067266, "clip_ratio/low_mean": 0.00034766365502036933, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.000904255585737701, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3619.0, "completions/mean_length": 627.833740234375, "completions/mean_terminated_length": 568.7843627929688, "completions/min_length": 101.0, "completions/min_terminated_length": 101.0, "epoch": 4.083989501312336, "grad_norm": 0.1357421875, "learning_rate": 1e-06, "loss": -0.0069, "num_tokens": 253801016.0, "reward": 0.5011160969734192, "reward_std": 0.23149968683719635, "rewards/verify_math_reward/mean": 0.5011160969734192, "rewards/verify_math_reward/std": 0.5002779960632324, "step": 437 }, { "clip_ratio/high_max": 0.0015510613402511808, "clip_ratio/high_mean": 0.0004516644767136313, "clip_ratio/low_mean": 0.000284588589465784, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007362530659520417, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 4074.0, "completions/mean_length": 638.171875, "completions/mean_terminated_length": 599.1444702148438, "completions/min_length": 138.0, "completions/min_terminated_length": 138.0, "epoch": 4.093321668124818, "grad_norm": 0.12890625, "learning_rate": 1e-06, "loss": 0.0048, "num_tokens": 254413066.0, "reward": 0.5, "reward_std": 0.226617693901062, "rewards/verify_math_reward/mean": 0.5, "rewards/verify_math_reward/std": 0.5002792477607727, "step": 438 }, { "clip_ratio/high_max": 0.0015267543431036756, "clip_ratio/high_mean": 0.0004476620911191276, "clip_ratio/low_mean": 0.00036250121388547996, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008101632975012762, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3829.0, "completions/mean_length": 663.5089721679688, "completions/mean_terminated_length": 609.0249633789062, "completions/min_length": 12.0, "completions/min_terminated_length": 12.0, "epoch": 4.1026538349373, "grad_norm": 0.11767578125, "learning_rate": 1e-06, "loss": 0.0021, "num_tokens": 255040826.0, "reward": 0.5223214626312256, "reward_std": 0.20516182482242584, "rewards/verify_math_reward/mean": 0.5223214030265808, "rewards/verify_math_reward/std": 0.49978047609329224, "step": 439 }, { "clip_ratio/high_max": 0.0015841341746636317, "clip_ratio/high_mean": 0.00048040379022040725, "clip_ratio/low_mean": 0.00030345916957230656, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007838629612706427, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3889.0, "completions/mean_length": 652.515625, "completions/mean_terminated_length": 601.8187866210938, "completions/min_length": 76.0, "completions/min_terminated_length": 76.0, "epoch": 4.111986001749782, "grad_norm": 0.1162109375, "learning_rate": 1e-06, "loss": 0.0137, "num_tokens": 255654320.0, "reward": 0.5078125, "reward_std": 0.20636852085590363, "rewards/verify_math_reward/mean": 0.5078125, "rewards/verify_math_reward/std": 0.5002182126045227, "step": 440 }, { "clip_ratio/high_max": 0.001609556543371582, "clip_ratio/high_mean": 0.00042774063945216767, "clip_ratio/low_mean": 0.0003944202933325869, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008221609241445549, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3666.0, "completions/mean_length": 633.2734375, "completions/mean_terminated_length": 586.2681274414062, "completions/min_length": 125.0, "completions/min_terminated_length": 125.0, "epoch": 4.121318168562263, "grad_norm": 0.1416015625, "learning_rate": 1e-06, "loss": 0.0039, "num_tokens": 256256301.0, "reward": 0.5558035969734192, "reward_std": 0.2284938097000122, "rewards/verify_math_reward/mean": 0.5558035969734192, "rewards/verify_math_reward/std": 0.49715372920036316, "step": 441 }, { "clip_ratio/high_max": 0.0014061840456633945, "clip_ratio/high_mean": 0.0004493843003956499, "clip_ratio/low_mean": 0.0002969730242057267, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007463573329005158, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 2892.0, "completions/mean_length": 641.786865234375, "completions/mean_terminated_length": 586.9580688476562, "completions/min_length": 11.0, "completions/min_terminated_length": 11.0, "epoch": 4.130650335374745, "grad_norm": 0.1240234375, "learning_rate": 1e-06, "loss": 0.0091, "num_tokens": 256864294.0, "reward": 0.4921875298023224, "reward_std": 0.2139505296945572, "rewards/verify_math_reward/mean": 0.4921875, "rewards/verify_math_reward/std": 0.5002182126045227, "step": 442 }, { "clip_ratio/high_max": 0.0016358745269826613, "clip_ratio/high_mean": 0.0004645560889002809, "clip_ratio/low_mean": 0.00026109794521289587, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007256540357047925, "completions/clipped_ratio": 0.0189732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3155.0, "completions/mean_length": 637.7902221679688, "completions/mean_terminated_length": 570.9078369140625, "completions/min_length": 107.0, "completions/min_terminated_length": 107.0, "epoch": 4.139982502187227, "grad_norm": 0.12255859375, "learning_rate": 1e-06, "loss": -0.0083, "num_tokens": 257464482.0, "reward": 0.5580357313156128, "reward_std": 0.19016912579536438, "rewards/verify_math_reward/mean": 0.5580357313156128, "rewards/verify_math_reward/std": 0.49689781665802, "step": 443 }, { "clip_ratio/high_max": 0.0016762115683377488, "clip_ratio/high_mean": 0.0005131442762831284, "clip_ratio/low_mean": 0.0003506496645968582, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008637939467917022, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 2173.0, "completions/mean_length": 593.4765625, "completions/mean_terminated_length": 533.8422241210938, "completions/min_length": 116.0, "completions/min_terminated_length": 116.0, "epoch": 4.1493146689997085, "grad_norm": 0.140625, "learning_rate": 1e-06, "loss": 0.0004, "num_tokens": 258031901.0, "reward": 0.5267857313156128, "reward_std": 0.23262692987918854, "rewards/verify_math_reward/mean": 0.5267857313156128, "rewards/verify_math_reward/std": 0.4995608627796173, "step": 444 }, { "clip_ratio/high_max": 0.0014192719418133493, "clip_ratio/high_mean": 0.0004401346074018875, "clip_ratio/low_mean": 0.0003835150885151961, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008236496951212757, "completions/clipped_ratio": 0.0200892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3408.0, "completions/mean_length": 610.4888916015625, "completions/mean_terminated_length": 539.0319213867188, "completions/min_length": 120.0, "completions/min_terminated_length": 120.0, "epoch": 4.15864683581219, "grad_norm": 0.1337890625, "learning_rate": 1e-06, "loss": 0.0029, "num_tokens": 258596179.0, "reward": 0.5758928656578064, "reward_std": 0.22518664598464966, "rewards/verify_math_reward/mean": 0.5758928656578064, "rewards/verify_math_reward/std": 0.49448272585868835, "step": 445 }, { "clip_ratio/high_max": 0.0015581911557092099, "clip_ratio/high_mean": 0.0004907202585400228, "clip_ratio/low_mean": 0.0003781174611958704, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008688377092767041, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 2735.0, "completions/mean_length": 594.5167846679688, "completions/mean_terminated_length": 542.9660034179688, "completions/min_length": 71.0, "completions/min_terminated_length": 71.0, "epoch": 4.167979002624672, "grad_norm": 0.1435546875, "learning_rate": 1e-06, "loss": -0.0041, "num_tokens": 259161642.0, "reward": 0.5491071939468384, "reward_std": 0.24577559530735016, "rewards/verify_math_reward/mean": 0.5491071343421936, "rewards/verify_math_reward/std": 0.49786055088043213, "step": 446 }, { "clip_ratio/high_max": 0.0014534153542626882, "clip_ratio/high_mean": 0.0004460589500467904, "clip_ratio/low_mean": 0.00036021004962094594, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008062690130827832, "completions/clipped_ratio": 0.0189732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3412.0, "completions/mean_length": 657.3158569335938, "completions/mean_terminated_length": 590.8111572265625, "completions/min_length": 139.0, "completions/min_terminated_length": 139.0, "epoch": 4.177311169437154, "grad_norm": 0.12890625, "learning_rate": 1e-06, "loss": 0.0041, "num_tokens": 259771373.0, "reward": 0.520089328289032, "reward_std": 0.22277876734733582, "rewards/verify_math_reward/mean": 0.5200892686843872, "rewards/verify_math_reward/std": 0.4998753070831299, "step": 447 }, { "clip_ratio/high_max": 0.0016046481323428452, "clip_ratio/high_mean": 0.00048080523583848844, "clip_ratio/low_mean": 0.00036281801590121177, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008436232583335368, "completions/clipped_ratio": 0.005580357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3937.0, "completions/mean_length": 583.4933471679688, "completions/mean_terminated_length": 563.7822875976562, "completions/min_length": 137.0, "completions/min_terminated_length": 137.0, "epoch": 4.186643336249635, "grad_norm": 0.125, "learning_rate": 1e-06, "loss": 0.0096, "num_tokens": 260369807.0, "reward": 0.546875, "reward_std": 0.21391661465168, "rewards/verify_math_reward/mean": 0.546875, "rewards/verify_math_reward/std": 0.4980759024620056, "step": 448 }, { "clip_ratio/high_max": 0.001517039590908098, "clip_ratio/high_mean": 0.0004863342280714278, "clip_ratio/low_mean": 0.00039316169159064884, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.00087949591306824, "completions/clipped_ratio": 0.0234375, "completions/max_length": 4096.0, "completions/max_terminated_length": 3586.0, "completions/mean_length": 651.0714721679688, "completions/mean_terminated_length": 568.3931274414062, "completions/min_length": 82.0, "completions/min_terminated_length": 82.0, "epoch": 4.195975503062117, "grad_norm": 0.1357421875, "learning_rate": 1e-06, "loss": 0.0118, "num_tokens": 260956271.0, "reward": 0.5189732313156128, "reward_std": 0.24201901257038116, "rewards/verify_math_reward/mean": 0.5189732313156128, "rewards/verify_math_reward/std": 0.49991893768310547, "step": 449 }, { "clip_ratio/high_max": 0.0017466909521317575, "clip_ratio/high_mean": 0.000555154106223199, "clip_ratio/low_mean": 0.00028920324893988436, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008443573506156099, "completions/clipped_ratio": 0.010044642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 2951.0, "completions/mean_length": 580.5892944335938, "completions/mean_terminated_length": 544.919921875, "completions/min_length": 17.0, "completions/min_terminated_length": 17.0, "epoch": 4.205307669874599, "grad_norm": 0.1396484375, "learning_rate": 1e-06, "loss": -0.0079, "num_tokens": 261527951.0, "reward": 0.5658482313156128, "reward_std": 0.2472045123577118, "rewards/verify_math_reward/mean": 0.5658482313156128, "rewards/verify_math_reward/std": 0.49592188000679016, "step": 450 }, { "clip_ratio/high_max": 0.0015777427634020569, "clip_ratio/high_mean": 0.00042024009394481254, "clip_ratio/low_mean": 0.00032073673787635926, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007409768413708662, "completions/clipped_ratio": 0.008928571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 2481.0, "completions/mean_length": 601.8527221679688, "completions/mean_terminated_length": 570.3739013671875, "completions/min_length": 116.0, "completions/min_terminated_length": 116.0, "epoch": 4.2146398366870805, "grad_norm": 0.11328125, "learning_rate": 1e-06, "loss": 0.0009, "num_tokens": 262125963.0, "reward": 0.5680803656578064, "reward_std": 0.19197037816047668, "rewards/verify_math_reward/mean": 0.5680803656578064, "rewards/verify_math_reward/std": 0.4956200420856476, "step": 451 }, { "clip_ratio/high_max": 0.0016140327397806686, "clip_ratio/high_mean": 0.0005076759837265854, "clip_ratio/low_mean": 0.00027791280172095867, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007855887779442128, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 3606.0, "completions/mean_length": 600.4765625, "completions/mean_terminated_length": 557.0294189453125, "completions/min_length": 135.0, "completions/min_terminated_length": 135.0, "epoch": 4.223972003499562, "grad_norm": 0.12890625, "learning_rate": 1e-06, "loss": 0.0008, "num_tokens": 262705902.0, "reward": 0.543526828289032, "reward_std": 0.20700766146183014, "rewards/verify_math_reward/mean": 0.5435267686843872, "rewards/verify_math_reward/std": 0.49838000535964966, "step": 452 }, { "clip_ratio/high_max": 0.0014768202390769147, "clip_ratio/high_mean": 0.0004388366351122386, "clip_ratio/low_mean": 0.00034660389769669564, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007854405412217602, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 2417.0, "completions/mean_length": 631.904052734375, "completions/mean_terminated_length": 592.805908203125, "completions/min_length": 131.0, "completions/min_terminated_length": 131.0, "epoch": 4.233304170312044, "grad_norm": 0.12255859375, "learning_rate": 1e-06, "loss": -0.0009, "num_tokens": 263325008.0, "reward": 0.4888392984867096, "reward_std": 0.22417522966861725, "rewards/verify_math_reward/mean": 0.4888392984867096, "rewards/verify_math_reward/std": 0.5001546144485474, "step": 453 }, { "clip_ratio/high_max": 0.0017326985635008896, "clip_ratio/high_mean": 0.0005633963983200374, "clip_ratio/low_mean": 0.00031264214987913874, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008760385499044787, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 3285.0, "completions/mean_length": 590.2288208007812, "completions/mean_terminated_length": 546.6542358398438, "completions/min_length": 76.0, "completions/min_terminated_length": 76.0, "epoch": 4.242636337124526, "grad_norm": 0.1357421875, "learning_rate": 1e-06, "loss": 0.0117, "num_tokens": 263905565.0, "reward": 0.5502232313156128, "reward_std": 0.24048060178756714, "rewards/verify_math_reward/mean": 0.5502232313156128, "rewards/verify_math_reward/std": 0.49774909019470215, "step": 454 }, { "clip_ratio/high_max": 0.001486037075665081, "clip_ratio/high_mean": 0.000446635130060713, "clip_ratio/low_mean": 0.00038580304101287766, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.000832438171528338, "completions/clipped_ratio": 0.010044642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 2711.0, "completions/mean_length": 614.6295166015625, "completions/mean_terminated_length": 579.3054809570312, "completions/min_length": 95.0, "completions/min_terminated_length": 95.0, "epoch": 4.251968503937007, "grad_norm": 0.123046875, "learning_rate": 1e-06, "loss": 0.0089, "num_tokens": 264504393.0, "reward": 0.5970982313156128, "reward_std": 0.2015165388584137, "rewards/verify_math_reward/mean": 0.5970982313156128, "rewards/verify_math_reward/std": 0.4907552897930145, "step": 455 }, { "clip_ratio/high_max": 0.0016170304697880056, "clip_ratio/high_mean": 0.00039441224362235516, "clip_ratio/low_mean": 0.00030087512379850523, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0006952873673071736, "completions/clipped_ratio": 0.021205357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 2521.0, "completions/mean_length": 616.1295166015625, "completions/mean_terminated_length": 540.7388916015625, "completions/min_length": 121.0, "completions/min_terminated_length": 121.0, "epoch": 4.26130067074949, "grad_norm": 0.12109375, "learning_rate": 1e-06, "loss": -0.002, "num_tokens": 265063453.0, "reward": 0.5558035969734192, "reward_std": 0.18727383017539978, "rewards/verify_math_reward/mean": 0.5558035969734192, "rewards/verify_math_reward/std": 0.49715372920036316, "step": 456 }, { "clip_ratio/high_max": 0.0014758168308617314, "clip_ratio/high_mean": 0.0004300506926711023, "clip_ratio/low_mean": 0.0003016561261119932, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.000731706818442035, "completions/clipped_ratio": 0.025669642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 2437.0, "completions/mean_length": 639.622802734375, "completions/mean_terminated_length": 548.561279296875, "completions/min_length": 90.0, "completions/min_terminated_length": 90.0, "epoch": 4.270632837561972, "grad_norm": 0.1337890625, "learning_rate": 1e-06, "loss": -0.007, "num_tokens": 265627979.0, "reward": 0.5234375, "reward_std": 0.20357809960842133, "rewards/verify_math_reward/mean": 0.5234375, "rewards/verify_math_reward/std": 0.49972933530807495, "step": 457 }, { "clip_ratio/high_max": 0.0014188833247317234, "clip_ratio/high_mean": 0.00043401154653111007, "clip_ratio/low_mean": 0.00031582017356868164, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007498317309000413, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3527.0, "completions/mean_length": 641.505615234375, "completions/mean_terminated_length": 594.6119995117188, "completions/min_length": 103.0, "completions/min_terminated_length": 103.0, "epoch": 4.2799650043744535, "grad_norm": 0.1171875, "learning_rate": 1e-06, "loss": 0.0035, "num_tokens": 266239288.0, "reward": 0.5178571939468384, "reward_std": 0.2029048055410385, "rewards/verify_math_reward/mean": 0.5178571343421936, "rewards/verify_math_reward/std": 0.4999600946903229, "step": 458 }, { "clip_ratio/high_max": 0.001465431610995438, "clip_ratio/high_mean": 0.0004236939357724623, "clip_ratio/low_mean": 0.00031415979174198583, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007378537129625329, "completions/clipped_ratio": 0.0189732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 2443.0, "completions/mean_length": 624.091552734375, "completions/mean_terminated_length": 556.9442138671875, "completions/min_length": 4.0, "completions/min_terminated_length": 4.0, "epoch": 4.289297171186935, "grad_norm": 0.12255859375, "learning_rate": 1e-06, "loss": 0.0074, "num_tokens": 266817618.0, "reward": 0.5401785969734192, "reward_std": 0.19892391562461853, "rewards/verify_math_reward/mean": 0.5401785969734192, "rewards/verify_math_reward/std": 0.49866142868995667, "step": 459 }, { "clip_ratio/high_max": 0.001607860074727796, "clip_ratio/high_mean": 0.0004799057594482292, "clip_ratio/low_mean": 0.00028580797902577615, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007657137480236997, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3891.0, "completions/mean_length": 656.0881958007812, "completions/mean_terminated_length": 601.4863891601562, "completions/min_length": 138.0, "completions/min_terminated_length": 138.0, "epoch": 4.298629337999417, "grad_norm": 0.115234375, "learning_rate": 1e-06, "loss": -0.0022, "num_tokens": 267448353.0, "reward": 0.5111607313156128, "reward_std": 0.21452394127845764, "rewards/verify_math_reward/mean": 0.5111607313156128, "rewards/verify_math_reward/std": 0.5001546144485474, "step": 460 }, { "clip_ratio/high_max": 0.0017103273767133942, "clip_ratio/high_mean": 0.0005531097926905204, "clip_ratio/low_mean": 0.0002648757543965985, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008179855553862581, "completions/clipped_ratio": 0.010044642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3580.0, "completions/mean_length": 626.6551513671875, "completions/mean_terminated_length": 591.4531860351562, "completions/min_length": 140.0, "completions/min_terminated_length": 140.0, "epoch": 4.307961504811899, "grad_norm": 0.138671875, "learning_rate": 1e-06, "loss": 0.0011, "num_tokens": 268064324.0, "reward": 0.4955357313156128, "reward_std": 0.22988249361515045, "rewards/verify_math_reward/mean": 0.4955357015132904, "rewards/verify_math_reward/std": 0.500259280204773, "step": 461 }, { "clip_ratio/high_max": 0.0013549203795264475, "clip_ratio/high_mean": 0.0004196689631044137, "clip_ratio/low_mean": 0.0002853072559219072, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007049762270980864, "completions/clipped_ratio": 0.005580357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 2004.0, "completions/mean_length": 553.388427734375, "completions/mean_terminated_length": 533.5084228515625, "completions/min_length": 128.0, "completions/min_terminated_length": 128.0, "epoch": 4.31729367162438, "grad_norm": 0.125, "learning_rate": 1e-06, "loss": -0.0048, "num_tokens": 268634336.0, "reward": 0.5915178656578064, "reward_std": 0.19009242951869965, "rewards/verify_math_reward/mean": 0.5915178656578064, "rewards/verify_math_reward/std": 0.49182769656181335, "step": 462 }, { "clip_ratio/high_max": 0.0016750827362557175, "clip_ratio/high_mean": 0.0004493110382099985, "clip_ratio/low_mean": 0.00028353875381981197, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007328497913476895, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3642.0, "completions/mean_length": 611.6194458007812, "completions/mean_terminated_length": 556.3118286132812, "completions/min_length": 112.0, "completions/min_terminated_length": 112.0, "epoch": 4.326625838436862, "grad_norm": 0.12890625, "learning_rate": 1e-06, "loss": 0.003, "num_tokens": 269215531.0, "reward": 0.5267857313156128, "reward_std": 0.21793846786022186, "rewards/verify_math_reward/mean": 0.5267857313156128, "rewards/verify_math_reward/std": 0.4995608329772949, "step": 463 }, { "clip_ratio/high_max": 0.0016607362404101877, "clip_ratio/high_mean": 0.0005025412156101083, "clip_ratio/low_mean": 0.0003072622860145202, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008098035068542231, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3282.0, "completions/mean_length": 623.0814819335938, "completions/mean_terminated_length": 571.9512939453125, "completions/min_length": 50.0, "completions/min_terminated_length": 50.0, "epoch": 4.335958005249344, "grad_norm": 0.12353515625, "learning_rate": 1e-06, "loss": 0.0062, "num_tokens": 269807380.0, "reward": 0.5569196939468384, "reward_std": 0.22271278500556946, "rewards/verify_math_reward/mean": 0.5569196343421936, "rewards/verify_math_reward/std": 0.4970270097255707, "step": 464 }, { "clip_ratio/high_max": 0.001639027949750016, "clip_ratio/high_mean": 0.00046428004679910373, "clip_ratio/low_mean": 0.0003137677274480666, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007780477669712127, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3622.0, "completions/mean_length": 573.0803833007812, "completions/mean_terminated_length": 525.2579345703125, "completions/min_length": 107.0, "completions/min_terminated_length": 107.0, "epoch": 4.3452901720618256, "grad_norm": 0.134765625, "learning_rate": 1e-06, "loss": -0.0115, "num_tokens": 270358652.0, "reward": 0.6004464626312256, "reward_std": 0.22149746119976044, "rewards/verify_math_reward/mean": 0.6004464030265808, "rewards/verify_math_reward/std": 0.49008017778396606, "step": 465 }, { "clip_ratio/high_max": 0.0015635195068171015, "clip_ratio/high_mean": 0.00047154270578175783, "clip_ratio/low_mean": 0.00023050625850373763, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007020489679234743, "completions/clipped_ratio": 0.010044642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3958.0, "completions/mean_length": 575.216552734375, "completions/mean_terminated_length": 539.49267578125, "completions/min_length": 97.0, "completions/min_terminated_length": 97.0, "epoch": 4.354622338874307, "grad_norm": 0.125, "learning_rate": 1e-06, "loss": 0.003, "num_tokens": 270924830.0, "reward": 0.6238839626312256, "reward_std": 0.19557398557662964, "rewards/verify_math_reward/mean": 0.6238839030265808, "rewards/verify_math_reward/std": 0.4846802353858948, "step": 466 }, { "clip_ratio/high_max": 0.0016615856666248874, "clip_ratio/high_mean": 0.0004494111750545926, "clip_ratio/low_mean": 0.00027698924031938077, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007264004170792759, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 1919.0, "completions/mean_length": 548.8582763671875, "completions/mean_terminated_length": 504.7695007324219, "completions/min_length": 3.0, "completions/min_terminated_length": 3.0, "epoch": 4.363954505686789, "grad_norm": 0.1484375, "learning_rate": 1e-06, "loss": 0.0062, "num_tokens": 271459015.0, "reward": 0.574776828289032, "reward_std": 0.18362995982170105, "rewards/verify_math_reward/mean": 0.5747767686843872, "rewards/verify_math_reward/std": 0.49465295672416687, "step": 467 }, { "clip_ratio/high_max": 0.0015913287443254376, "clip_ratio/high_mean": 0.000531323003997386, "clip_ratio/low_mean": 0.0003833229704923724, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.000914645991542784, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3699.0, "completions/mean_length": 608.2745971679688, "completions/mean_terminated_length": 556.9263916015625, "completions/min_length": 124.0, "completions/min_terminated_length": 124.0, "epoch": 4.373286672499271, "grad_norm": 0.1484375, "learning_rate": 1e-06, "loss": -0.0092, "num_tokens": 272036405.0, "reward": 0.606026828289032, "reward_std": 0.24202153086662292, "rewards/verify_math_reward/mean": 0.6060267686843872, "rewards/verify_math_reward/std": 0.48890194296836853, "step": 468 }, { "clip_ratio/high_max": 0.0015565556441288209, "clip_ratio/high_mean": 0.0004704969373960921, "clip_ratio/low_mean": 0.000321143754035802, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007916406866570469, "completions/clipped_ratio": 0.024553571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 2894.0, "completions/mean_length": 648.560302734375, "completions/mean_terminated_length": 561.7825927734375, "completions/min_length": 119.0, "completions/min_terminated_length": 119.0, "epoch": 4.3826188393117524, "grad_norm": 0.125, "learning_rate": 1e-06, "loss": -0.0001, "num_tokens": 272617323.0, "reward": 0.5412946939468384, "reward_std": 0.20662423968315125, "rewards/verify_math_reward/mean": 0.5412946343421936, "rewards/verify_math_reward/std": 0.49857014417648315, "step": 469 }, { "clip_ratio/high_max": 0.00128650848637335, "clip_ratio/high_mean": 0.00037658480539448647, "clip_ratio/low_mean": 0.00037177639205765445, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007483612062060274, "completions/clipped_ratio": 0.010044642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3493.0, "completions/mean_length": 630.4342041015625, "completions/mean_terminated_length": 595.2705688476562, "completions/min_length": 125.0, "completions/min_terminated_length": 125.0, "epoch": 4.391951006124234, "grad_norm": 0.1328125, "learning_rate": 1e-06, "loss": -0.0072, "num_tokens": 273241344.0, "reward": 0.5022321939468384, "reward_std": 0.1996411234140396, "rewards/verify_math_reward/mean": 0.5022321343421936, "rewards/verify_math_reward/std": 0.5002743005752563, "step": 470 }, { "clip_ratio/high_max": 0.001751421723383828, "clip_ratio/high_mean": 0.0005249783489489346, "clip_ratio/low_mean": 0.00033580776062080986, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008607861018390395, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 4042.0, "completions/mean_length": 652.114990234375, "completions/mean_terminated_length": 589.4988403320312, "completions/min_length": 11.0, "completions/min_terminated_length": 11.0, "epoch": 4.401283172936716, "grad_norm": 0.126953125, "learning_rate": 1e-06, "loss": 0.0221, "num_tokens": 273857735.0, "reward": 0.5267857313156128, "reward_std": 0.20839229226112366, "rewards/verify_math_reward/mean": 0.5267857313156128, "rewards/verify_math_reward/std": 0.4995608627796173, "step": 471 }, { "clip_ratio/high_max": 0.0015658479151170468, "clip_ratio/high_mean": 0.0004453192345863499, "clip_ratio/low_mean": 0.00031153968529906706, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007568589285256166, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3565.0, "completions/mean_length": 588.75, "completions/mean_terminated_length": 537.1143798828125, "completions/min_length": 11.0, "completions/min_terminated_length": 11.0, "epoch": 4.410615339749198, "grad_norm": 0.1328125, "learning_rate": 1e-06, "loss": -0.0056, "num_tokens": 274423455.0, "reward": 0.5223214626312256, "reward_std": 0.22500258684158325, "rewards/verify_math_reward/mean": 0.5223214030265808, "rewards/verify_math_reward/std": 0.49978047609329224, "step": 472 }, { "clip_ratio/high_max": 0.0016656261632306268, "clip_ratio/high_mean": 0.0005626158235827461, "clip_ratio/low_mean": 0.0003227071864557729, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008853230140175583, "completions/clipped_ratio": 0.0200892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3935.0, "completions/mean_length": 609.8002319335938, "completions/mean_terminated_length": 538.3291625976562, "completions/min_length": 118.0, "completions/min_terminated_length": 118.0, "epoch": 4.41994750656168, "grad_norm": 0.13671875, "learning_rate": 1e-06, "loss": -0.0025, "num_tokens": 274987932.0, "reward": 0.543526828289032, "reward_std": 0.23848573863506317, "rewards/verify_math_reward/mean": 0.5435267686843872, "rewards/verify_math_reward/std": 0.49838000535964966, "step": 473 }, { "clip_ratio/high_max": 0.001721414178973646, "clip_ratio/high_mean": 0.0006090785182095715, "clip_ratio/low_mean": 0.00033543772758548585, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009445162511383387, "completions/clipped_ratio": 0.004464285714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 3845.0, "completions/mean_length": 583.8248291015625, "completions/mean_terminated_length": 568.0751342773438, "completions/min_length": 110.0, "completions/min_terminated_length": 110.0, "epoch": 4.429279673374162, "grad_norm": 0.138671875, "learning_rate": 1e-06, "loss": 0.0236, "num_tokens": 275585623.0, "reward": 0.5256696939468384, "reward_std": 0.2573162019252777, "rewards/verify_math_reward/mean": 0.5256696343421936, "rewards/verify_math_reward/std": 0.4996195435523987, "step": 474 }, { "clip_ratio/high_max": 0.0012805054448108422, "clip_ratio/high_mean": 0.0003528819356688473, "clip_ratio/low_mean": 0.00023715787995115534, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0005900398082303582, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 3561.0, "completions/mean_length": 630.552490234375, "completions/mean_terminated_length": 591.4390869140625, "completions/min_length": 98.0, "completions/min_terminated_length": 98.0, "epoch": 4.438611840186644, "grad_norm": 0.1171875, "learning_rate": 1e-06, "loss": 0.0065, "num_tokens": 276200886.0, "reward": 0.5100446939468384, "reward_std": 0.1762627214193344, "rewards/verify_math_reward/mean": 0.5100446343421936, "rewards/verify_math_reward/std": 0.5001782774925232, "step": 475 }, { "clip_ratio/high_max": 0.001785341470167623, "clip_ratio/high_mean": 0.0005500541788023838, "clip_ratio/low_mean": 0.0003359141497867313, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008859683184709866, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 2889.0, "completions/mean_length": 601.2935791015625, "completions/mean_terminated_length": 541.7922973632812, "completions/min_length": 143.0, "completions/min_terminated_length": 143.0, "epoch": 4.447944006999125, "grad_norm": 0.125, "learning_rate": 1e-06, "loss": 0.0231, "num_tokens": 276767213.0, "reward": 0.5770089626312256, "reward_std": 0.20233887434005737, "rewards/verify_math_reward/mean": 0.5770089030265808, "rewards/verify_math_reward/std": 0.4943099319934845, "step": 476 }, { "clip_ratio/high_max": 0.0019143885692756157, "clip_ratio/high_mean": 0.0005483994686983351, "clip_ratio/low_mean": 0.00033269647110500955, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008810959270704188, "completions/clipped_ratio": 0.0078125, "completions/max_length": 4096.0, "completions/max_terminated_length": 3979.0, "completions/mean_length": 555.7991333007812, "completions/mean_terminated_length": 527.9235229492188, "completions/min_length": 101.0, "completions/min_terminated_length": 101.0, "epoch": 4.457276173811607, "grad_norm": 0.1513671875, "learning_rate": 1e-06, "loss": 0.0053, "num_tokens": 277322377.0, "reward": 0.590401828289032, "reward_std": 0.23589354753494263, "rewards/verify_math_reward/mean": 0.5904017686843872, "rewards/verify_math_reward/std": 0.49203425645828247, "step": 477 }, { "clip_ratio/high_max": 0.0018167906964663416, "clip_ratio/high_mean": 0.0005764182369603077, "clip_ratio/low_mean": 0.0003979794187216612, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009743976543177268, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 4093.0, "completions/mean_length": 617.3538208007812, "completions/mean_terminated_length": 570.1323852539062, "completions/min_length": 14.0, "completions/min_terminated_length": 14.0, "epoch": 4.466608340624089, "grad_norm": 0.1357421875, "learning_rate": 1e-06, "loss": -0.012, "num_tokens": 277913870.0, "reward": 0.5602678656578064, "reward_std": 0.25103527307510376, "rewards/verify_math_reward/mean": 0.5602678656578064, "rewards/verify_math_reward/std": 0.4966317415237427, "step": 478 }, { "clip_ratio/high_max": 0.0016934892501012655, "clip_ratio/high_mean": 0.0005674093749803433, "clip_ratio/low_mean": 0.0003575389092702608, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009249482809536858, "completions/clipped_ratio": 0.0078125, "completions/max_length": 4096.0, "completions/max_terminated_length": 3373.0, "completions/mean_length": 569.872802734375, "completions/mean_terminated_length": 542.1080322265625, "completions/min_length": 3.0, "completions/min_terminated_length": 3.0, "epoch": 4.475940507436571, "grad_norm": 0.1376953125, "learning_rate": 1e-06, "loss": 0.0064, "num_tokens": 278486588.0, "reward": 0.5680803656578064, "reward_std": 0.25153452157974243, "rewards/verify_math_reward/mean": 0.5680803656578064, "rewards/verify_math_reward/std": 0.4956200420856476, "step": 479 }, { "clip_ratio/high_max": 0.0013954150272184052, "clip_ratio/high_mean": 0.00045888196564192185, "clip_ratio/low_mean": 0.00037074485692301096, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.000829626822451246, "completions/clipped_ratio": 0.022321428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3491.0, "completions/mean_length": 662.6730346679688, "completions/mean_terminated_length": 584.2864990234375, "completions/min_length": 144.0, "completions/min_terminated_length": 144.0, "epoch": 4.485272674249052, "grad_norm": 0.12890625, "learning_rate": 1e-06, "loss": 0.0147, "num_tokens": 279092247.0, "reward": 0.5111607313156128, "reward_std": 0.23863616585731506, "rewards/verify_math_reward/mean": 0.5111607313156128, "rewards/verify_math_reward/std": 0.5001546144485474, "step": 480 }, { "clip_ratio/high_max": 0.0017112914138124324, "clip_ratio/high_mean": 0.0004472731827718235, "clip_ratio/low_mean": 0.00038033553357763594, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008276087160083989, "completions/clipped_ratio": 0.0234375, "completions/max_length": 4096.0, "completions/max_terminated_length": 3956.0, "completions/mean_length": 594.5145263671875, "completions/mean_terminated_length": 510.4788513183594, "completions/min_length": 72.0, "completions/min_terminated_length": 72.0, "epoch": 4.494604841061534, "grad_norm": 0.154296875, "learning_rate": 1e-06, "loss": -0.0103, "num_tokens": 279625692.0, "reward": 0.5491071939468384, "reward_std": 0.22048968076705933, "rewards/verify_math_reward/mean": 0.5491071343421936, "rewards/verify_math_reward/std": 0.49786055088043213, "step": 481 }, { "clip_ratio/high_max": 0.001400075911078602, "clip_ratio/high_mean": 0.0003827551252015837, "clip_ratio/low_mean": 0.0003327557158172567, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007155108486358586, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3412.0, "completions/mean_length": 627.4420166015625, "completions/mean_terminated_length": 568.3859252929688, "completions/min_length": 97.0, "completions/min_terminated_length": 97.0, "epoch": 4.503937007874016, "grad_norm": 0.1259765625, "learning_rate": 1e-06, "loss": 0.0049, "num_tokens": 280218680.0, "reward": 0.5524553656578064, "reward_std": 0.19652535021305084, "rewards/verify_math_reward/mean": 0.5524553656578064, "rewards/verify_math_reward/std": 0.49751850962638855, "step": 482 }, { "clip_ratio/high_max": 0.0014981317344791023, "clip_ratio/high_mean": 0.0004190168824607099, "clip_ratio/low_mean": 0.00031817220553875813, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.000737189082428813, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 2517.0, "completions/mean_length": 571.9866333007812, "completions/mean_terminated_length": 507.91363525390625, "completions/min_length": 105.0, "completions/min_terminated_length": 105.0, "epoch": 4.5132691746864975, "grad_norm": 0.134765625, "learning_rate": 1e-06, "loss": 0.0009, "num_tokens": 280745812.0, "reward": 0.6015625, "reward_std": 0.1981329768896103, "rewards/verify_math_reward/mean": 0.6015625, "rewards/verify_math_reward/std": 0.48984986543655396, "step": 483 }, { "clip_ratio/high_max": 0.0018096462326866458, "clip_ratio/high_mean": 0.0005806429239783029, "clip_ratio/low_mean": 0.0002943162592146109, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008749591661398881, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3756.0, "completions/mean_length": 652.5301513671875, "completions/mean_terminated_length": 593.9013061523438, "completions/min_length": 11.0, "completions/min_terminated_length": 11.0, "epoch": 4.522601341498979, "grad_norm": 0.12060546875, "learning_rate": 1e-06, "loss": 0.0205, "num_tokens": 281351335.0, "reward": 0.5178571939468384, "reward_std": 0.21075664460659027, "rewards/verify_math_reward/mean": 0.5178571343421936, "rewards/verify_math_reward/std": 0.4999600946903229, "step": 484 }, { "clip_ratio/high_max": 0.0016633195700705983, "clip_ratio/high_mean": 0.0005281733760966745, "clip_ratio/low_mean": 0.00027120233403366, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007993757121766976, "completions/clipped_ratio": 0.010044642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 2582.0, "completions/mean_length": 588.1317138671875, "completions/mean_terminated_length": 552.5388793945312, "completions/min_length": 19.0, "completions/min_terminated_length": 19.0, "epoch": 4.531933508311461, "grad_norm": 0.1259765625, "learning_rate": 1e-06, "loss": 0.0012, "num_tokens": 281934317.0, "reward": 0.504464328289032, "reward_std": 0.20793946087360382, "rewards/verify_math_reward/mean": 0.5044642686843872, "rewards/verify_math_reward/std": 0.5002593398094177, "step": 485 }, { "clip_ratio/high_max": 0.0014966129547246965, "clip_ratio/high_mean": 0.00048653811109033995, "clip_ratio/low_mean": 0.0003384272192761273, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.000824965342417272, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 2653.0, "completions/mean_length": 550.0223388671875, "completions/mean_terminated_length": 510.0, "completions/min_length": 4.0, "completions/min_terminated_length": 4.0, "epoch": 4.541265675123943, "grad_norm": 0.1396484375, "learning_rate": 1e-06, "loss": 0.0098, "num_tokens": 282464529.0, "reward": 0.5993303656578064, "reward_std": 0.215563103556633, "rewards/verify_math_reward/mean": 0.5993303656578064, "rewards/verify_math_reward/std": 0.49030786752700806, "step": 486 }, { "clip_ratio/high_max": 0.0014841586762486259, "clip_ratio/high_mean": 0.0004888288654001371, "clip_ratio/low_mean": 0.00035418342713455786, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.000843012291170453, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 2152.0, "completions/mean_length": 600.4442138671875, "completions/mean_terminated_length": 556.9966430664062, "completions/min_length": 104.0, "completions/min_terminated_length": 104.0, "epoch": 4.550597841936424, "grad_norm": 0.123046875, "learning_rate": 1e-06, "loss": 0.0009, "num_tokens": 283043479.0, "reward": 0.566964328289032, "reward_std": 0.23120476305484772, "rewards/verify_math_reward/mean": 0.5669642686843872, "rewards/verify_math_reward/std": 0.49577224254608154, "step": 487 }, { "clip_ratio/high_max": 0.0017360884721711045, "clip_ratio/high_mean": 0.0005348529837192473, "clip_ratio/low_mean": 0.0003435390121921955, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008783919965935638, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 3012.0, "completions/mean_length": 560.4955444335938, "completions/mean_terminated_length": 520.5914306640625, "completions/min_length": 134.0, "completions/min_terminated_length": 134.0, "epoch": 4.559930008748906, "grad_norm": 0.1357421875, "learning_rate": 1e-06, "loss": 0.0125, "num_tokens": 283590619.0, "reward": 0.6484375, "reward_std": 0.2168864905834198, "rewards/verify_math_reward/mean": 0.6484375, "rewards/verify_math_reward/std": 0.4777248501777649, "step": 488 }, { "clip_ratio/high_max": 0.00155533077668224, "clip_ratio/high_mean": 0.0004262628510787181, "clip_ratio/low_mean": 0.00034666936699068174, "clip_ratio/low_min": 1.0991910130542237e-05, "clip_ratio/region_mean": 0.0007729322132945526, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 3033.0, "completions/mean_length": 609.171875, "completions/mean_terminated_length": 565.832763671875, "completions/min_length": 83.0, "completions/min_terminated_length": 83.0, "epoch": 4.569262175561388, "grad_norm": 0.1337890625, "learning_rate": 1e-06, "loss": -0.0032, "num_tokens": 284184821.0, "reward": 0.5301339626312256, "reward_std": 0.22804802656173706, "rewards/verify_math_reward/mean": 0.5301339030265808, "rewards/verify_math_reward/std": 0.49936985969543457, "step": 489 }, { "clip_ratio/high_max": 0.0016612641138635809, "clip_ratio/high_mean": 0.0005550469866193453, "clip_ratio/low_mean": 0.000347608870242766, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009026558464029222, "completions/clipped_ratio": 0.005580357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3457.0, "completions/mean_length": 593.9285888671875, "completions/mean_terminated_length": 574.276123046875, "completions/min_length": 113.0, "completions/min_terminated_length": 113.0, "epoch": 4.57859434237387, "grad_norm": 0.1376953125, "learning_rate": 1e-06, "loss": 0.0018, "num_tokens": 284794053.0, "reward": 0.5189732313156128, "reward_std": 0.21786358952522278, "rewards/verify_math_reward/mean": 0.5189732313156128, "rewards/verify_math_reward/std": 0.49991893768310547, "step": 490 }, { "clip_ratio/high_max": 0.0017183926356665324, "clip_ratio/high_mean": 0.0005356732121981622, "clip_ratio/low_mean": 0.000266077066271464, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.000801750290520431, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 1681.0, "completions/mean_length": 579.5357666015625, "completions/mean_terminated_length": 535.8282470703125, "completions/min_length": 3.0, "completions/min_terminated_length": 3.0, "epoch": 4.587926509186351, "grad_norm": 0.1279296875, "learning_rate": 1e-06, "loss": 0.0129, "num_tokens": 285354101.0, "reward": 0.613839328289032, "reward_std": 0.22120575606822968, "rewards/verify_math_reward/mean": 0.6138392686843872, "rewards/verify_math_reward/std": 0.48714008927345276, "step": 491 }, { "clip_ratio/high_max": 0.0013726251263506128, "clip_ratio/high_mean": 0.0004201708183018127, "clip_ratio/low_mean": 0.00048207846452896774, "clip_ratio/low_min": 1.4420857951336075e-05, "clip_ratio/region_mean": 0.0009022492840813356, "completions/clipped_ratio": 0.010044642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3267.0, "completions/mean_length": 591.4520263671875, "completions/mean_terminated_length": 555.8928833007812, "completions/min_length": 98.0, "completions/min_terminated_length": 98.0, "epoch": 4.597258675998834, "grad_norm": 0.13671875, "learning_rate": 1e-06, "loss": 0.0088, "num_tokens": 285935482.0, "reward": 0.520089328289032, "reward_std": 0.2368699461221695, "rewards/verify_math_reward/mean": 0.5200892686843872, "rewards/verify_math_reward/std": 0.4998753070831299, "step": 492 }, { "clip_ratio/high_max": 0.0015729252781966352, "clip_ratio/high_mean": 0.0005027093616263301, "clip_ratio/low_mean": 0.00034286759728274774, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008455769602733199, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 3202.0, "completions/mean_length": 573.6484375, "completions/mean_terminated_length": 529.8677978515625, "completions/min_length": 16.0, "completions/min_terminated_length": 16.0, "epoch": 4.606590842811316, "grad_norm": 0.1298828125, "learning_rate": 1e-06, "loss": 0.0055, "num_tokens": 286490207.0, "reward": 0.5345982313156128, "reward_std": 0.20546559989452362, "rewards/verify_math_reward/mean": 0.5345982313156128, "rewards/verify_math_reward/std": 0.4990801215171814, "step": 493 }, { "clip_ratio/high_max": 0.0014307285273389425, "clip_ratio/high_mean": 0.00043732595440815203, "clip_ratio/low_mean": 0.00033495401589789253, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007722799809926073, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 4067.0, "completions/mean_length": 597.2142944335938, "completions/mean_terminated_length": 545.7032470703125, "completions/min_length": 113.0, "completions/min_terminated_length": 113.0, "epoch": 4.615923009623797, "grad_norm": 0.13671875, "learning_rate": 1e-06, "loss": 0.0097, "num_tokens": 287051447.0, "reward": 0.566964328289032, "reward_std": 0.22206270694732666, "rewards/verify_math_reward/mean": 0.5669642686843872, "rewards/verify_math_reward/std": 0.49577224254608154, "step": 494 }, { "clip_ratio/high_max": 0.00183015546099341, "clip_ratio/high_mean": 0.0005407442623663883, "clip_ratio/low_mean": 0.00035960421735126147, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009003484856293653, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 3774.0, "completions/mean_length": 565.8092041015625, "completions/mean_terminated_length": 525.9650268554688, "completions/min_length": 53.0, "completions/min_terminated_length": 53.0, "epoch": 4.625255176436279, "grad_norm": 0.1357421875, "learning_rate": 1e-06, "loss": 0.0117, "num_tokens": 287603084.0, "reward": 0.5870535969734192, "reward_std": 0.25040724873542786, "rewards/verify_math_reward/mean": 0.5870535969734192, "rewards/verify_math_reward/std": 0.49263834953308105, "step": 495 }, { "clip_ratio/high_max": 0.0016625006010144716, "clip_ratio/high_mean": 0.0004916719269658643, "clip_ratio/low_mean": 0.00033617579026667954, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008278477125713835, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3990.0, "completions/mean_length": 626.0658569335938, "completions/mean_terminated_length": 574.9796142578125, "completions/min_length": 21.0, "completions/min_terminated_length": 21.0, "epoch": 4.634587343248761, "grad_norm": 0.12353515625, "learning_rate": 1e-06, "loss": 0.0108, "num_tokens": 288202551.0, "reward": 0.5323660969734192, "reward_std": 0.211582213640213, "rewards/verify_math_reward/mean": 0.5323660969734192, "rewards/verify_math_reward/std": 0.4992299973964691, "step": 496 }, { "clip_ratio/high_max": 0.0015355633304352523, "clip_ratio/high_mean": 0.0004849236019026648, "clip_ratio/low_mean": 0.0002844143895117668, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007693379930060473, "completions/clipped_ratio": 0.008928571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 2231.0, "completions/mean_length": 546.359375, "completions/mean_terminated_length": 514.380615234375, "completions/min_length": 110.0, "completions/min_terminated_length": 110.0, "epoch": 4.6439195100612425, "grad_norm": 0.1279296875, "learning_rate": 1e-06, "loss": -0.002, "num_tokens": 288742153.0, "reward": 0.606026828289032, "reward_std": 0.21060511469841003, "rewards/verify_math_reward/mean": 0.6060267686843872, "rewards/verify_math_reward/std": 0.48890194296836853, "step": 497 }, { "clip_ratio/high_max": 0.001776651070031221, "clip_ratio/high_mean": 0.0005591528490640485, "clip_ratio/low_mean": 0.00031130768547882326, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008704605488674133, "completions/clipped_ratio": 0.021205357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3757.0, "completions/mean_length": 655.8214721679688, "completions/mean_terminated_length": 581.290771484375, "completions/min_length": 96.0, "completions/min_terminated_length": 96.0, "epoch": 4.653251676873724, "grad_norm": 0.130859375, "learning_rate": 1e-06, "loss": -0.0053, "num_tokens": 289341481.0, "reward": 0.527901828289032, "reward_std": 0.21155014634132385, "rewards/verify_math_reward/mean": 0.5279017686843872, "rewards/verify_math_reward/std": 0.49949970841407776, "step": 498 }, { "clip_ratio/high_max": 0.0015834210566936235, "clip_ratio/high_mean": 0.0005732856652684859, "clip_ratio/low_mean": 0.0003561456413763153, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009294313213104033, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3401.0, "completions/mean_length": 595.716552734375, "completions/mean_terminated_length": 540.156494140625, "completions/min_length": 36.0, "completions/min_terminated_length": 36.0, "epoch": 4.662583843686206, "grad_norm": 0.1376953125, "learning_rate": 1e-06, "loss": 0.0103, "num_tokens": 289908931.0, "reward": 0.6149553656578064, "reward_std": 0.24273650348186493, "rewards/verify_math_reward/mean": 0.6149553656578064, "rewards/verify_math_reward/std": 0.4868776500225067, "step": 499 }, { "clip_ratio/high_max": 0.0014789197302889079, "clip_ratio/high_mean": 0.0004162362445185863, "clip_ratio/low_mean": 0.0003480468201360054, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007642830669283285, "completions/clipped_ratio": 0.0200892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3045.0, "completions/mean_length": 629.9910888671875, "completions/mean_terminated_length": 558.9339599609375, "completions/min_length": 114.0, "completions/min_terminated_length": 114.0, "epoch": 4.671916010498688, "grad_norm": 0.1259765625, "learning_rate": 1e-06, "loss": 0.0091, "num_tokens": 290490819.0, "reward": 0.5613839626312256, "reward_std": 0.20560871064662933, "rewards/verify_math_reward/mean": 0.5613839030265808, "rewards/verify_math_reward/std": 0.496494859457016, "step": 500 }, { "clip_ratio/high_max": 0.0016843309413161478, "clip_ratio/high_mean": 0.0004671865392538166, "clip_ratio/low_mean": 0.0002480672757201319, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.000715253814632888, "completions/clipped_ratio": 0.0189732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3054.0, "completions/mean_length": 627.794677734375, "completions/mean_terminated_length": 560.718994140625, "completions/min_length": 29.0, "completions/min_terminated_length": 29.0, "epoch": 4.681248177311169, "grad_norm": 0.126953125, "learning_rate": 1e-06, "loss": -0.0051, "num_tokens": 291078891.0, "reward": 0.4977678656578064, "reward_std": 0.20437045395374298, "rewards/verify_math_reward/mean": 0.4977678656578064, "rewards/verify_math_reward/std": 0.5002743005752563, "step": 501 }, { "clip_ratio/high_max": 0.0013590634198408225, "clip_ratio/high_mean": 0.00048674410629701015, "clip_ratio/low_mean": 0.00043030665983678773, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009170507692033425, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 3714.0, "completions/mean_length": 623.8783569335938, "completions/mean_terminated_length": 584.6896362304688, "completions/min_length": 101.0, "completions/min_terminated_length": 101.0, "epoch": 4.690580344123651, "grad_norm": 0.1337890625, "learning_rate": 1e-06, "loss": -0.0, "num_tokens": 291695502.0, "reward": 0.494419664144516, "reward_std": 0.25100386142730713, "rewards/verify_math_reward/mean": 0.4944196343421936, "rewards/verify_math_reward/std": 0.5002480745315552, "step": 502 }, { "clip_ratio/high_max": 0.0017423807448722073, "clip_ratio/high_mean": 0.0005437559307210904, "clip_ratio/low_mean": 0.00040957945202535484, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009533353813822032, "completions/clipped_ratio": 0.024553571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3738.0, "completions/mean_length": 649.2332763671875, "completions/mean_terminated_length": 562.4725341796875, "completions/min_length": 148.0, "completions/min_terminated_length": 148.0, "epoch": 4.699912510936133, "grad_norm": 0.14453125, "learning_rate": 1e-06, "loss": -0.0026, "num_tokens": 292281383.0, "reward": 0.4988839626312256, "reward_std": 0.2477276772260666, "rewards/verify_math_reward/mean": 0.4988839328289032, "rewards/verify_math_reward/std": 0.5002779960632324, "step": 503 }, { "clip_ratio/high_max": 0.0014809887270530453, "clip_ratio/high_mean": 0.00043425572494015796, "clip_ratio/low_mean": 0.00035411961005138437, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.000788375347838155, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 2632.0, "completions/mean_length": 643.568115234375, "completions/mean_terminated_length": 588.767578125, "completions/min_length": 159.0, "completions/min_terminated_length": 159.0, "epoch": 4.7092446777486145, "grad_norm": 0.1298828125, "learning_rate": 1e-06, "loss": 0.0139, "num_tokens": 292892268.0, "reward": 0.53125, "reward_std": 0.21447932720184326, "rewards/verify_math_reward/mean": 0.53125, "rewards/verify_math_reward/std": 0.4993011951446533, "step": 504 }, { "clip_ratio/high_max": 0.0012838945476687513, "clip_ratio/high_mean": 0.0003630980708067, "clip_ratio/low_mean": 0.0003246117719299946, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0006877098348923028, "completions/clipped_ratio": 0.0189732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3031.0, "completions/mean_length": 635.3292846679688, "completions/mean_terminated_length": 568.3992919921875, "completions/min_length": 7.0, "completions/min_terminated_length": 7.0, "epoch": 4.718576844561096, "grad_norm": 0.11328125, "learning_rate": 1e-06, "loss": 0.001, "num_tokens": 293486659.0, "reward": 0.5189732313156128, "reward_std": 0.19125540554523468, "rewards/verify_math_reward/mean": 0.5189732313156128, "rewards/verify_math_reward/std": 0.49991893768310547, "step": 505 }, { "clip_ratio/high_max": 0.0013454458530759439, "clip_ratio/high_mean": 0.00038541975618500146, "clip_ratio/low_mean": 0.000398620349869816, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007840401012799703, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3845.0, "completions/mean_length": 625.091552734375, "completions/mean_terminated_length": 569.9977416992188, "completions/min_length": 92.0, "completions/min_terminated_length": 92.0, "epoch": 4.727909011373578, "grad_norm": 0.119140625, "learning_rate": 1e-06, "loss": 0.0054, "num_tokens": 294078909.0, "reward": 0.5078125, "reward_std": 0.1907668560743332, "rewards/verify_math_reward/mean": 0.5078125, "rewards/verify_math_reward/std": 0.5002182126045227, "step": 506 }, { "clip_ratio/high_max": 0.0018093389862769982, "clip_ratio/high_mean": 0.000492179358161593, "clip_ratio/low_mean": 0.0003963819007140046, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008885612569429213, "completions/clipped_ratio": 0.008928571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 2044.0, "completions/mean_length": 538.8717041015625, "completions/mean_terminated_length": 506.8254699707031, "completions/min_length": 92.0, "completions/min_terminated_length": 92.0, "epoch": 4.73724117818606, "grad_norm": 0.1298828125, "learning_rate": 1e-06, "loss": 0.0086, "num_tokens": 294627082.0, "reward": 0.5859375, "reward_std": 0.20790556073188782, "rewards/verify_math_reward/mean": 0.5859375, "rewards/verify_math_reward/std": 0.4928344786167145, "step": 507 }, { "clip_ratio/high_max": 0.0013911390997236595, "clip_ratio/high_mean": 0.00044665890368378314, "clip_ratio/low_mean": 0.0003579561971491785, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008046150978771038, "completions/clipped_ratio": 0.0189732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3191.0, "completions/mean_length": 673.943115234375, "completions/mean_terminated_length": 607.7599487304688, "completions/min_length": 120.0, "completions/min_terminated_length": 120.0, "epoch": 4.746573344998541, "grad_norm": 0.1328125, "learning_rate": 1e-06, "loss": 0.0087, "num_tokens": 295256015.0, "reward": 0.4921875298023224, "reward_std": 0.24217379093170166, "rewards/verify_math_reward/mean": 0.4921875, "rewards/verify_math_reward/std": 0.5002182126045227, "step": 508 }, { "clip_ratio/high_max": 0.0017413587320334045, "clip_ratio/high_mean": 0.0006114172979323484, "clip_ratio/low_mean": 0.0003403451073609176, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009517624075670028, "completions/clipped_ratio": 0.0234375, "completions/max_length": 4096.0, "completions/max_terminated_length": 2225.0, "completions/mean_length": 603.8136596679688, "completions/mean_terminated_length": 520.0011596679688, "completions/min_length": 87.0, "completions/min_terminated_length": 87.0, "epoch": 4.755905511811024, "grad_norm": 0.1494140625, "learning_rate": 1e-06, "loss": -0.0016, "num_tokens": 295804640.0, "reward": 0.5558035969734192, "reward_std": 0.23353983461856842, "rewards/verify_math_reward/mean": 0.5558035969734192, "rewards/verify_math_reward/std": 0.49715372920036316, "step": 509 }, { "clip_ratio/high_max": 0.0019195449704056955, "clip_ratio/high_mean": 0.0006316532999335323, "clip_ratio/low_mean": 0.0003624669336659281, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009941202315530973, "completions/clipped_ratio": 0.0078125, "completions/max_length": 4096.0, "completions/max_terminated_length": 3821.0, "completions/mean_length": 598.1752319335938, "completions/mean_terminated_length": 570.63330078125, "completions/min_length": 54.0, "completions/min_terminated_length": 54.0, "epoch": 4.765237678623506, "grad_norm": 0.130859375, "learning_rate": 1e-06, "loss": 0.0134, "num_tokens": 296396533.0, "reward": 0.6171875, "reward_std": 0.24901039898395538, "rewards/verify_math_reward/mean": 0.6171875, "rewards/verify_math_reward/std": 0.4863446056842804, "step": 510 }, { "clip_ratio/high_max": 0.0014497905503958464, "clip_ratio/high_mean": 0.0004942071557252348, "clip_ratio/low_mean": 0.00029100762981215667, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007852147855373914, "completions/clipped_ratio": 0.005580357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 4041.0, "completions/mean_length": 627.5178833007812, "completions/mean_terminated_length": 608.0538940429688, "completions/min_length": 68.0, "completions/min_terminated_length": 68.0, "epoch": 4.7745698454359875, "grad_norm": 0.12255859375, "learning_rate": 1e-06, "loss": 0.023, "num_tokens": 297041293.0, "reward": 0.5301339626312256, "reward_std": 0.24089357256889343, "rewards/verify_math_reward/mean": 0.5301339030265808, "rewards/verify_math_reward/std": 0.49936988949775696, "step": 511 }, { "clip_ratio/high_max": 0.001688839212874882, "clip_ratio/high_mean": 0.0005524357045487704, "clip_ratio/low_mean": 0.0003401884688400969, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008926241744120489, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 1510.0, "completions/mean_length": 544.1350708007812, "completions/mean_terminated_length": 483.66064453125, "completions/min_length": 5.0, "completions/min_terminated_length": 5.0, "epoch": 4.783902012248469, "grad_norm": 0.14453125, "learning_rate": 1e-06, "loss": -0.0101, "num_tokens": 297552446.0, "reward": 0.574776828289032, "reward_std": 0.21643507480621338, "rewards/verify_math_reward/mean": 0.5747767686843872, "rewards/verify_math_reward/std": 0.49465295672416687, "step": 512 }, { "clip_ratio/high_max": 0.00187992444352858, "clip_ratio/high_mean": 0.0005772723015979864, "clip_ratio/low_mean": 0.00031451219206246606, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008917844893403526, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3439.0, "completions/mean_length": 601.372802734375, "completions/mean_terminated_length": 553.9343872070312, "completions/min_length": 23.0, "completions/min_terminated_length": 23.0, "epoch": 4.793234179060951, "grad_norm": 0.1337890625, "learning_rate": 1e-06, "loss": 0.001, "num_tokens": 298134436.0, "reward": 0.5580357313156128, "reward_std": 0.19997744262218475, "rewards/verify_math_reward/mean": 0.5580357313156128, "rewards/verify_math_reward/std": 0.49689778685569763, "step": 513 }, { "clip_ratio/high_max": 0.0016608175455985474, "clip_ratio/high_mean": 0.0005447676526273426, "clip_ratio/low_mean": 0.00028974903091238957, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008345166697836248, "completions/clipped_ratio": 0.0189732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3857.0, "completions/mean_length": 622.1350708007812, "completions/mean_terminated_length": 554.949951171875, "completions/min_length": 127.0, "completions/min_terminated_length": 127.0, "epoch": 4.802566345873433, "grad_norm": 0.12255859375, "learning_rate": 1e-06, "loss": -0.0025, "num_tokens": 298703125.0, "reward": 0.5424107313156128, "reward_std": 0.22202809154987335, "rewards/verify_math_reward/mean": 0.5424107313156128, "rewards/verify_math_reward/std": 0.4984763562679291, "step": 514 }, { "clip_ratio/high_max": 0.0018567469451227225, "clip_ratio/high_mean": 0.0005445421916192572, "clip_ratio/low_mean": 0.0003416407251961573, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008861829123816278, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3681.0, "completions/mean_length": 657.4140625, "completions/mean_terminated_length": 598.8683471679688, "completions/min_length": 119.0, "completions/min_terminated_length": 119.0, "epoch": 4.811898512685914, "grad_norm": 0.12060546875, "learning_rate": 1e-06, "loss": -0.0001, "num_tokens": 299327504.0, "reward": 0.5334821939468384, "reward_std": 0.2286778688430786, "rewards/verify_math_reward/mean": 0.5334821343421936, "rewards/verify_math_reward/std": 0.49915632605552673, "step": 515 }, { "clip_ratio/high_max": 0.0014091908105910989, "clip_ratio/high_mean": 0.0004355001061639996, "clip_ratio/low_mean": 0.00039597377326572314, "clip_ratio/low_min": 1.1918383279407863e-05, "clip_ratio/region_mean": 0.0008314738761328044, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3676.0, "completions/mean_length": 612.536865234375, "completions/mean_terminated_length": 557.2437744140625, "completions/min_length": 106.0, "completions/min_terminated_length": 106.0, "epoch": 4.821230679498396, "grad_norm": 0.1376953125, "learning_rate": 1e-06, "loss": -0.0131, "num_tokens": 299913089.0, "reward": 0.5066964626312256, "reward_std": 0.22579282522201538, "rewards/verify_math_reward/mean": 0.5066964030265808, "rewards/verify_math_reward/std": 0.5002344250679016, "step": 516 }, { "clip_ratio/high_max": 0.0017064566127373837, "clip_ratio/high_mean": 0.0005202652157549892, "clip_ratio/low_mean": 0.0003574382284341482, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008777034604463552, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 2629.0, "completions/mean_length": 614.1373291015625, "completions/mean_terminated_length": 562.8754272460938, "completions/min_length": 100.0, "completions/min_terminated_length": 100.0, "epoch": 4.830562846310878, "grad_norm": 0.12109375, "learning_rate": 1e-06, "loss": -0.0048, "num_tokens": 300498980.0, "reward": 0.5, "reward_std": 0.2103448063135147, "rewards/verify_math_reward/mean": 0.5, "rewards/verify_math_reward/std": 0.5002792477607727, "step": 517 }, { "clip_ratio/high_max": 0.0016321075872838264, "clip_ratio/high_mean": 0.00046111401070447755, "clip_ratio/low_mean": 0.0002837311283201416, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007448451387972455, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3347.0, "completions/mean_length": 645.513427734375, "completions/mean_terminated_length": 594.7134399414062, "completions/min_length": 11.0, "completions/min_terminated_length": 11.0, "epoch": 4.83989501312336, "grad_norm": 0.11376953125, "learning_rate": 1e-06, "loss": 0.0133, "num_tokens": 301106424.0, "reward": 0.5714285969734192, "reward_std": 0.1872738152742386, "rewards/verify_math_reward/mean": 0.5714285969734192, "rewards/verify_math_reward/std": 0.49514803290367126, "step": 518 }, { "clip_ratio/high_max": 0.001791798085832852, "clip_ratio/high_mean": 0.0005298276229268595, "clip_ratio/low_mean": 0.0003157693417961127, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008455969691567589, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3740.0, "completions/mean_length": 628.6261596679688, "completions/mean_terminated_length": 577.5775756835938, "completions/min_length": 121.0, "completions/min_terminated_length": 121.0, "epoch": 4.849227179935841, "grad_norm": 0.1279296875, "learning_rate": 1e-06, "loss": 0.0168, "num_tokens": 301699529.0, "reward": 0.5524553656578064, "reward_std": 0.2000548094511032, "rewards/verify_math_reward/mean": 0.5524553656578064, "rewards/verify_math_reward/std": 0.49751853942871094, "step": 519 }, { "clip_ratio/high_max": 0.0015914729137875838, "clip_ratio/high_mean": 0.0005042617715389497, "clip_ratio/low_mean": 0.00036590810123016126, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008701698752702214, "completions/clipped_ratio": 0.022321428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3564.0, "completions/mean_length": 654.6295166015625, "completions/mean_terminated_length": 576.059326171875, "completions/min_length": 3.0, "completions/min_terminated_length": 3.0, "epoch": 4.858559346748323, "grad_norm": 0.1396484375, "learning_rate": 1e-06, "loss": -0.0047, "num_tokens": 302291989.0, "reward": 0.4955357313156128, "reward_std": 0.21714931726455688, "rewards/verify_math_reward/mean": 0.4955357015132904, "rewards/verify_math_reward/std": 0.500259280204773, "step": 520 }, { "clip_ratio/high_max": 0.0016609390340818209, "clip_ratio/high_mean": 0.000485138586782341, "clip_ratio/low_mean": 0.0002607448440130611, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007458834279532311, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 4060.0, "completions/mean_length": 638.8035888671875, "completions/mean_terminated_length": 583.9274291992188, "completions/min_length": 109.0, "completions/min_terminated_length": 109.0, "epoch": 4.867891513560805, "grad_norm": 0.10986328125, "learning_rate": 1e-06, "loss": 0.0137, "num_tokens": 302899901.0, "reward": 0.5691964626312256, "reward_std": 0.19189411401748657, "rewards/verify_math_reward/mean": 0.5691964030265808, "rewards/verify_math_reward/std": 0.4954652488231659, "step": 521 }, { "clip_ratio/high_max": 0.0016318977886840003, "clip_ratio/high_mean": 0.0004224451587333533, "clip_ratio/low_mean": 0.00042914150799333584, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008515866561538132, "completions/clipped_ratio": 0.0200892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3058.0, "completions/mean_length": 588.3939819335938, "completions/mean_terminated_length": 516.4840698242188, "completions/min_length": 73.0, "completions/min_terminated_length": 73.0, "epoch": 4.8772236803732865, "grad_norm": 0.1318359375, "learning_rate": 1e-06, "loss": -0.0199, "num_tokens": 303438342.0, "reward": 0.6004464626312256, "reward_std": 0.19746080040931702, "rewards/verify_math_reward/mean": 0.6004464030265808, "rewards/verify_math_reward/std": 0.49008017778396606, "step": 522 }, { "clip_ratio/high_max": 0.0017518207860121038, "clip_ratio/high_mean": 0.0005529486968498531, "clip_ratio/low_mean": 0.00034203485483885743, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008949835546445684, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 1923.0, "completions/mean_length": 580.65625, "completions/mean_terminated_length": 540.9796752929688, "completions/min_length": 56.0, "completions/min_terminated_length": 56.0, "epoch": 4.886555847185768, "grad_norm": 0.126953125, "learning_rate": 1e-06, "loss": 0.0034, "num_tokens": 304014050.0, "reward": 0.4642857313156128, "reward_std": 0.2295461893081665, "rewards/verify_math_reward/mean": 0.4642857015132904, "rewards/verify_math_reward/std": 0.4990013837814331, "step": 523 }, { "clip_ratio/high_max": 0.001817819624193362, "clip_ratio/high_mean": 0.000533803677853939, "clip_ratio/low_mean": 0.0002993256985064363, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008331293784067384, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 3766.0, "completions/mean_length": 565.1373291015625, "completions/mean_terminated_length": 521.2508544921875, "completions/min_length": 72.0, "completions/min_terminated_length": 72.0, "epoch": 4.89588801399825, "grad_norm": 0.12255859375, "learning_rate": 1e-06, "loss": 0.0039, "num_tokens": 304566661.0, "reward": 0.5636160969734192, "reward_std": 0.18979115784168243, "rewards/verify_math_reward/mean": 0.5636160969734192, "rewards/verify_math_reward/std": 0.49621346592903137, "step": 524 }, { "clip_ratio/high_max": 0.0014290894741861848, "clip_ratio/high_mean": 0.00046198587665458035, "clip_ratio/low_mean": 0.0002933630476036342, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007553489303973038, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 2825.0, "completions/mean_length": 655.3370971679688, "completions/mean_terminated_length": 592.779541015625, "completions/min_length": 163.0, "completions/min_terminated_length": 163.0, "epoch": 4.905220180810732, "grad_norm": 0.130859375, "learning_rate": 1e-06, "loss": 0.0062, "num_tokens": 305184483.0, "reward": 0.4732142984867096, "reward_std": 0.22695286571979523, "rewards/verify_math_reward/mean": 0.4732142984867096, "rewards/verify_math_reward/std": 0.4995608627796173, "step": 525 }, { "clip_ratio/high_max": 0.0013120018265908584, "clip_ratio/high_mean": 0.00043393694613769185, "clip_ratio/low_mean": 0.0002596845182551988, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0006936214717825351, "completions/clipped_ratio": 0.0267857142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3692.0, "completions/mean_length": 673.5267944335938, "completions/mean_terminated_length": 579.3302612304688, "completions/min_length": 83.0, "completions/min_terminated_length": 83.0, "epoch": 4.914552347623214, "grad_norm": 0.1162109375, "learning_rate": 1e-06, "loss": 0.0022, "num_tokens": 305781475.0, "reward": 0.5368303656578064, "reward_std": 0.19933508336544037, "rewards/verify_math_reward/mean": 0.5368303656578064, "rewards/verify_math_reward/std": 0.49892017245292664, "step": 526 }, { "clip_ratio/high_max": 0.001622979414605652, "clip_ratio/high_mean": 0.00045199954479357984, "clip_ratio/low_mean": 0.0003581312797678038, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008101308358163806, "completions/clipped_ratio": 0.0189732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 2902.0, "completions/mean_length": 603.1953125, "completions/mean_terminated_length": 535.6439208984375, "completions/min_length": 112.0, "completions/min_terminated_length": 112.0, "epoch": 4.923884514435695, "grad_norm": 0.1337890625, "learning_rate": 1e-06, "loss": 0.0, "num_tokens": 306341770.0, "reward": 0.512276828289032, "reward_std": 0.19791541993618011, "rewards/verify_math_reward/mean": 0.5122767686843872, "rewards/verify_math_reward/std": 0.500128448009491, "step": 527 }, { "clip_ratio/high_max": 0.0017586277590453392, "clip_ratio/high_mean": 0.0005600705312645005, "clip_ratio/low_mean": 0.00034087833500962006, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009009488585434156, "completions/clipped_ratio": 0.0189732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3627.0, "completions/mean_length": 626.2020263671875, "completions/mean_terminated_length": 559.0955200195312, "completions/min_length": 101.0, "completions/min_terminated_length": 101.0, "epoch": 4.933216681248178, "grad_norm": 0.134765625, "learning_rate": 1e-06, "loss": -0.0066, "num_tokens": 306922247.0, "reward": 0.5870535969734192, "reward_std": 0.2231585681438446, "rewards/verify_math_reward/mean": 0.5870535969734192, "rewards/verify_math_reward/std": 0.49263834953308105, "step": 528 }, { "clip_ratio/high_max": 0.0018863202985812677, "clip_ratio/high_mean": 0.0005989270262034552, "clip_ratio/low_mean": 0.0003069568422233715, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009058838868440944, "completions/clipped_ratio": 0.0078125, "completions/max_length": 4096.0, "completions/max_terminated_length": 2550.0, "completions/mean_length": 568.3482666015625, "completions/mean_terminated_length": 540.5714721679688, "completions/min_length": 24.0, "completions/min_terminated_length": 24.0, "epoch": 4.942548848060659, "grad_norm": 0.138671875, "learning_rate": 1e-06, "loss": 0.0069, "num_tokens": 307482927.0, "reward": 0.5691964626312256, "reward_std": 0.24495214223861694, "rewards/verify_math_reward/mean": 0.5691964030265808, "rewards/verify_math_reward/std": 0.4954652488231659, "step": 529 }, { "clip_ratio/high_max": 0.0015429682252943167, "clip_ratio/high_mean": 0.00048443989123825304, "clip_ratio/low_mean": 0.0004059024936395872, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008903423749870853, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 2187.0, "completions/mean_length": 613.6752319335938, "completions/mean_terminated_length": 562.4065551757812, "completions/min_length": 18.0, "completions/min_terminated_length": 18.0, "epoch": 4.951881014873141, "grad_norm": 0.134765625, "learning_rate": 1e-06, "loss": -0.0096, "num_tokens": 308069340.0, "reward": 0.5915178656578064, "reward_std": 0.2319568395614624, "rewards/verify_math_reward/mean": 0.5915178656578064, "rewards/verify_math_reward/std": 0.49182769656181335, "step": 530 }, { "clip_ratio/high_max": 0.0013118975239194697, "clip_ratio/high_mean": 0.0003627593423516373, "clip_ratio/low_mean": 0.0002987153034155199, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0006614746437207941, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3686.0, "completions/mean_length": 582.3114013671875, "completions/mean_terminated_length": 526.53857421875, "completions/min_length": 133.0, "completions/min_terminated_length": 133.0, "epoch": 4.961213181685623, "grad_norm": 0.12109375, "learning_rate": 1e-06, "loss": 0.0084, "num_tokens": 308617995.0, "reward": 0.6484375, "reward_std": 0.1827705055475235, "rewards/verify_math_reward/mean": 0.6484375, "rewards/verify_math_reward/std": 0.4777248501777649, "step": 531 }, { "clip_ratio/high_max": 0.0015116052236407995, "clip_ratio/high_mean": 0.00044818270077939815, "clip_ratio/low_mean": 0.00041221798005608434, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008604006825407851, "completions/clipped_ratio": 0.0078125, "completions/max_length": 4096.0, "completions/max_terminated_length": 2991.0, "completions/mean_length": 592.599365234375, "completions/mean_terminated_length": 565.0135498046875, "completions/min_length": 166.0, "completions/min_terminated_length": 166.0, "epoch": 4.970545348498105, "grad_norm": 0.1357421875, "learning_rate": 1e-06, "loss": 0.0136, "num_tokens": 309199980.0, "reward": 0.5401785969734192, "reward_std": 0.22398200631141663, "rewards/verify_math_reward/mean": 0.5401785969734192, "rewards/verify_math_reward/std": 0.49866142868995667, "step": 532 }, { "clip_ratio/high_max": 0.0017853919598564971, "clip_ratio/high_mean": 0.0004675755010339344, "clip_ratio/low_mean": 0.0002622930529696532, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007298685522982851, "completions/clipped_ratio": 0.0200892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 2747.0, "completions/mean_length": 624.0546875, "completions/mean_terminated_length": 552.8758544921875, "completions/min_length": 42.0, "completions/min_terminated_length": 42.0, "epoch": 4.979877515310586, "grad_norm": 0.1328125, "learning_rate": 1e-06, "loss": 0.0118, "num_tokens": 309778109.0, "reward": 0.5424107313156128, "reward_std": 0.18701308965682983, "rewards/verify_math_reward/mean": 0.5424107313156128, "rewards/verify_math_reward/std": 0.4984763562679291, "step": 533 }, { "clip_ratio/high_max": 0.001457198579373653, "clip_ratio/high_mean": 0.00040370797762534494, "clip_ratio/low_mean": 0.0003826118379492982, "clip_ratio/low_min": 1.0232482054561842e-05, "clip_ratio/region_mean": 0.0007863198206905508, "completions/clipped_ratio": 0.024553571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 4014.0, "completions/mean_length": 654.2199096679688, "completions/mean_terminated_length": 567.5846557617188, "completions/min_length": 64.0, "completions/min_terminated_length": 64.0, "epoch": 4.989209682123068, "grad_norm": 0.126953125, "learning_rate": 1e-06, "loss": -0.0015, "num_tokens": 310366386.0, "reward": 0.5022321939468384, "reward_std": 0.22303980588912964, "rewards/verify_math_reward/mean": 0.5022321343421936, "rewards/verify_math_reward/std": 0.5002742409706116, "step": 534 }, { "clip_ratio/high_max": 0.001458971493775607, "clip_ratio/high_mean": 0.00043968069712718716, "clip_ratio/low_mean": 0.00028276925286263577, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007224499568110332, "completions/clipped_ratio": 0.014204545454545414, "completions/max_length": 4096.0, "completions/max_terminated_length": 1857.0, "completions/mean_length": 556.3863525390625, "completions/mean_terminated_length": 505.3832702636719, "completions/min_length": 94.0, "completions/min_terminated_length": 94.0, "epoch": 4.99854184893555, "grad_norm": 0.1162109375, "learning_rate": 1e-06, "loss": 0.0132, "num_tokens": 310945778.0, "reward": 0.5703125, "reward_std": 0.17379067838191986, "rewards/verify_math_reward/mean": 0.5703125, "rewards/verify_math_reward/std": 0.49530795216560364, "step": 535 }, { "clip_ratio/high_max": 0.0017452636657253606, "clip_ratio/high_mean": 0.0005312801556556224, "clip_ratio/low_mean": 0.0003431514528529078, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008744316155571141, "completions/clipped_ratio": 0.010044642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3998.0, "completions/mean_length": 594.6629638671875, "completions/mean_terminated_length": 559.1364135742188, "completions/min_length": 104.0, "completions/min_terminated_length": 104.0, "epoch": 5.009332166812482, "grad_norm": 0.12890625, "learning_rate": 1e-06, "loss": 0.0051, "num_tokens": 311519324.0, "reward": 0.6127232313156128, "reward_std": 0.2122662216424942, "rewards/verify_math_reward/mean": 0.6127232313156128, "rewards/verify_math_reward/std": 0.4873998463153839, "step": 536 }, { "clip_ratio/high_max": 0.0015920778469080688, "clip_ratio/high_mean": 0.0004222492743792827, "clip_ratio/low_mean": 0.00038444910774160235, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008066983773460379, "completions/clipped_ratio": 0.0189732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3801.0, "completions/mean_length": 639.763427734375, "completions/mean_terminated_length": 572.919189453125, "completions/min_length": 67.0, "completions/min_terminated_length": 67.0, "epoch": 5.0186643336249634, "grad_norm": 0.1298828125, "learning_rate": 1e-06, "loss": 0.0025, "num_tokens": 312104640.0, "reward": 0.4966517984867096, "reward_std": 0.21425220370292664, "rewards/verify_math_reward/mean": 0.4966517984867096, "rewards/verify_math_reward/std": 0.5002680420875549, "step": 537 }, { "clip_ratio/high_max": 0.0012568632373586297, "clip_ratio/high_mean": 0.00036879289780245017, "clip_ratio/low_mean": 0.0003587537908060767, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007275466850842349, "completions/clipped_ratio": 0.029017857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 4034.0, "completions/mean_length": 708.7288208007812, "completions/mean_terminated_length": 607.5, "completions/min_length": 143.0, "completions/min_terminated_length": 143.0, "epoch": 5.027996500437445, "grad_norm": 0.12255859375, "learning_rate": 1e-06, "loss": 0.0182, "num_tokens": 312723525.0, "reward": 0.5078125, "reward_std": 0.23345358669757843, "rewards/verify_math_reward/mean": 0.5078125, "rewards/verify_math_reward/std": 0.5002182126045227, "step": 538 }, { "clip_ratio/high_max": 0.0016520366107215523, "clip_ratio/high_mean": 0.0005098993252659056, "clip_ratio/low_mean": 0.000413374274103262, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009232736074409331, "completions/clipped_ratio": 0.0267857142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 4070.0, "completions/mean_length": 638.755615234375, "completions/mean_terminated_length": 543.60205078125, "completions/min_length": 51.0, "completions/min_terminated_length": 51.0, "epoch": 5.037328667249927, "grad_norm": 0.146484375, "learning_rate": 1e-06, "loss": 0.0087, "num_tokens": 313290154.0, "reward": 0.5301339626312256, "reward_std": 0.23131422698497772, "rewards/verify_math_reward/mean": 0.5301339030265808, "rewards/verify_math_reward/std": 0.49936985969543457, "step": 539 }, { "clip_ratio/high_max": 0.0018554929847596213, "clip_ratio/high_mean": 0.0006344309333599085, "clip_ratio/low_mean": 0.000356605803972343, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009910367325574043, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 2387.0, "completions/mean_length": 603.2064819335938, "completions/mean_terminated_length": 547.7653198242188, "completions/min_length": 4.0, "completions/min_terminated_length": 4.0, "epoch": 5.046660834062409, "grad_norm": 0.146484375, "learning_rate": 1e-06, "loss": 0.008, "num_tokens": 313856395.0, "reward": 0.59375, "reward_std": 0.25690361857414246, "rewards/verify_math_reward/mean": 0.59375, "rewards/verify_math_reward/std": 0.4914066195487976, "step": 540 }, { "clip_ratio/high_max": 0.0020592304954334395, "clip_ratio/high_mean": 0.0006356832564051729, "clip_ratio/low_mean": 0.00038154293611114554, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010172261918341974, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 4073.0, "completions/mean_length": 584.7109375, "completions/mean_terminated_length": 541.0678100585938, "completions/min_length": 75.0, "completions/min_terminated_length": 75.0, "epoch": 5.05599300087489, "grad_norm": 0.1455078125, "learning_rate": 1e-06, "loss": 0.0037, "num_tokens": 314418168.0, "reward": 0.5680803656578064, "reward_std": 0.2286132574081421, "rewards/verify_math_reward/mean": 0.5680803656578064, "rewards/verify_math_reward/std": 0.4956200420856476, "step": 541 }, { "clip_ratio/high_max": 0.0015770561394674587, "clip_ratio/high_mean": 0.0004937669634728081, "clip_ratio/low_mean": 0.0003279400937117316, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008217070571845397, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 2069.0, "completions/mean_length": 591.0022583007812, "completions/mean_terminated_length": 543.423095703125, "completions/min_length": 143.0, "completions/min_terminated_length": 143.0, "epoch": 5.065325167687372, "grad_norm": 0.126953125, "learning_rate": 1e-06, "loss": 0.0049, "num_tokens": 314981090.0, "reward": 0.535714328289032, "reward_std": 0.23198752105236053, "rewards/verify_math_reward/mean": 0.5357142686843872, "rewards/verify_math_reward/std": 0.4990014135837555, "step": 542 }, { "clip_ratio/high_max": 0.0014711573840031633, "clip_ratio/high_mean": 0.00045135759103231976, "clip_ratio/low_mean": 0.00039902060279928264, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008503782141815464, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 3578.0, "completions/mean_length": 591.724365234375, "completions/mean_terminated_length": 552.1727294921875, "completions/min_length": 96.0, "completions/min_terminated_length": 96.0, "epoch": 5.074657334499854, "grad_norm": 0.134765625, "learning_rate": 1e-06, "loss": 0.0139, "num_tokens": 315557763.0, "reward": 0.5189732313156128, "reward_std": 0.23833489418029785, "rewards/verify_math_reward/mean": 0.5189732313156128, "rewards/verify_math_reward/std": 0.49991893768310547, "step": 543 }, { "clip_ratio/high_max": 0.0013870163129467983, "clip_ratio/high_mean": 0.0003142091329664254, "clip_ratio/low_mean": 0.00029854387867089827, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0006127530073172238, "completions/clipped_ratio": 0.010044642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3082.0, "completions/mean_length": 593.5324096679688, "completions/mean_terminated_length": 557.9943237304688, "completions/min_length": 13.0, "completions/min_terminated_length": 13.0, "epoch": 5.083989501312336, "grad_norm": 0.11279296875, "learning_rate": 1e-06, "loss": -0.0015, "num_tokens": 316146208.0, "reward": 0.5524553656578064, "reward_std": 0.17405030131340027, "rewards/verify_math_reward/mean": 0.5524553656578064, "rewards/verify_math_reward/std": 0.49751853942871094, "step": 544 }, { "clip_ratio/high_max": 0.0009673911044956185, "clip_ratio/high_mean": 0.0002331657036620527, "clip_ratio/low_mean": 0.0002735016506676402, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0005066673679721134, "completions/clipped_ratio": 0.006696428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3570.0, "completions/mean_length": 591.6473388671875, "completions/mean_terminated_length": 568.0224609375, "completions/min_length": 142.0, "completions/min_terminated_length": 142.0, "epoch": 5.093321668124818, "grad_norm": 0.1064453125, "learning_rate": 1e-06, "loss": 0.0037, "num_tokens": 316751004.0, "reward": 0.4464285969734192, "reward_std": 0.15596871078014374, "rewards/verify_math_reward/mean": 0.4464285671710968, "rewards/verify_math_reward/std": 0.4973995089530945, "step": 545 }, { "clip_ratio/high_max": 0.0015565626936222543, "clip_ratio/high_mean": 0.00043005349834857043, "clip_ratio/low_mean": 0.00026414494732307503, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0006941984429431614, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3617.0, "completions/mean_length": 588.6864013671875, "completions/mean_terminated_length": 528.9705200195312, "completions/min_length": 119.0, "completions/min_terminated_length": 119.0, "epoch": 5.1026538349373, "grad_norm": 0.12060546875, "learning_rate": 1e-06, "loss": -0.0097, "num_tokens": 317299995.0, "reward": 0.5770089626312256, "reward_std": 0.19387967884540558, "rewards/verify_math_reward/mean": 0.5770089030265808, "rewards/verify_math_reward/std": 0.4943099617958069, "step": 546 }, { "clip_ratio/high_max": 0.0015786259682499804, "clip_ratio/high_mean": 0.0005050115909170927, "clip_ratio/low_mean": 0.00023925772779875842, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007442693167831749, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 4042.0, "completions/mean_length": 591.5971069335938, "completions/mean_terminated_length": 531.9307861328125, "completions/min_length": 3.0, "completions/min_terminated_length": 3.0, "epoch": 5.111986001749782, "grad_norm": 0.1357421875, "learning_rate": 1e-06, "loss": -0.0068, "num_tokens": 317858642.0, "reward": 0.5803571939468384, "reward_std": 0.18228377401828766, "rewards/verify_math_reward/mean": 0.5803571343421936, "rewards/verify_math_reward/std": 0.4937761127948761, "step": 547 }, { "clip_ratio/high_max": 0.0016671581252012402, "clip_ratio/high_mean": 0.0005753812163220573, "clip_ratio/low_mean": 0.00031317391835727904, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008885551487765042, "completions/clipped_ratio": 0.008928571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 1895.0, "completions/mean_length": 567.0658569335938, "completions/mean_terminated_length": 535.273681640625, "completions/min_length": 130.0, "completions/min_terminated_length": 130.0, "epoch": 5.121318168562263, "grad_norm": 0.134765625, "learning_rate": 1e-06, "loss": 0.0003, "num_tokens": 318415933.0, "reward": 0.6004464626312256, "reward_std": 0.22492702305316925, "rewards/verify_math_reward/mean": 0.6004464030265808, "rewards/verify_math_reward/std": 0.49008017778396606, "step": 548 }, { "clip_ratio/high_max": 0.001725742742564762, "clip_ratio/high_mean": 0.000571799853560151, "clip_ratio/low_mean": 0.000325643508858775, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008974433749244781, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3755.0, "completions/mean_length": 642.2277221679688, "completions/mean_terminated_length": 595.3439331054688, "completions/min_length": 117.0, "completions/min_terminated_length": 117.0, "epoch": 5.130650335374745, "grad_norm": 0.1259765625, "learning_rate": 1e-06, "loss": 0.0004, "num_tokens": 319033209.0, "reward": 0.5613839626312256, "reward_std": 0.22974373400211334, "rewards/verify_math_reward/mean": 0.5613839030265808, "rewards/verify_math_reward/std": 0.496494859457016, "step": 549 }, { "clip_ratio/high_max": 0.0011382587290427182, "clip_ratio/high_mean": 0.00030806649988335266, "clip_ratio/low_mean": 0.0002933993080205255, "clip_ratio/low_min": 1.544735459901858e-05, "clip_ratio/region_mean": 0.0006014658119966043, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 4047.0, "completions/mean_length": 592.9832763671875, "completions/mean_terminated_length": 553.4458618164062, "completions/min_length": 123.0, "completions/min_terminated_length": 123.0, "epoch": 5.139982502187227, "grad_norm": 0.11181640625, "learning_rate": 1e-06, "loss": 0.0079, "num_tokens": 319622818.0, "reward": 0.4520089626312256, "reward_std": 0.15977369248867035, "rewards/verify_math_reward/mean": 0.4520089328289032, "rewards/verify_math_reward/std": 0.49796947836875916, "step": 550 }, { "clip_ratio/high_max": 0.0018136503895220812, "clip_ratio/high_mean": 0.00048139415093828575, "clip_ratio/low_mean": 0.0003645178137503535, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008459119626422762, "completions/clipped_ratio": 0.0078125, "completions/max_length": 4096.0, "completions/max_terminated_length": 2129.0, "completions/mean_length": 545.9553833007812, "completions/mean_terminated_length": 518.0022583007812, "completions/min_length": 59.0, "completions/min_terminated_length": 59.0, "epoch": 5.1493146689997085, "grad_norm": 0.1279296875, "learning_rate": 1e-06, "loss": -0.0046, "num_tokens": 320171658.0, "reward": 0.5892857313156128, "reward_std": 0.18333503603935242, "rewards/verify_math_reward/mean": 0.5892857313156128, "rewards/verify_math_reward/std": 0.49223825335502625, "step": 551 }, { "clip_ratio/high_max": 0.0017637156015553046, "clip_ratio/high_mean": 0.0005619154640044144, "clip_ratio/low_mean": 0.00030062308655942616, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008625385489722248, "completions/clipped_ratio": 0.010044642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3447.0, "completions/mean_length": 636.1517944335938, "completions/mean_terminated_length": 601.0462036132812, "completions/min_length": 165.0, "completions/min_terminated_length": 165.0, "epoch": 5.15864683581219, "grad_norm": 0.1171875, "learning_rate": 1e-06, "loss": -0.0031, "num_tokens": 320792066.0, "reward": 0.5256696939468384, "reward_std": 0.22361180186271667, "rewards/verify_math_reward/mean": 0.5256696343421936, "rewards/verify_math_reward/std": 0.4996195435523987, "step": 552 }, { "clip_ratio/high_max": 0.0013090748489048565, "clip_ratio/high_mean": 0.00036141685393431544, "clip_ratio/low_mean": 0.00035929172508986085, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007207085664049373, "completions/clipped_ratio": 0.010044642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3668.0, "completions/mean_length": 593.4386596679688, "completions/mean_terminated_length": 557.899658203125, "completions/min_length": 140.0, "completions/min_terminated_length": 140.0, "epoch": 5.167979002624672, "grad_norm": 0.12451171875, "learning_rate": 1e-06, "loss": 0.0042, "num_tokens": 321373147.0, "reward": 0.551339328289032, "reward_std": 0.20215661823749542, "rewards/verify_math_reward/mean": 0.5513392686843872, "rewards/verify_math_reward/std": 0.4976350665092468, "step": 553 }, { "clip_ratio/high_max": 0.001539472751574067, "clip_ratio/high_mean": 0.00043860073469659255, "clip_ratio/low_mean": 0.0003451004399721569, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.000783701170803397, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3065.0, "completions/mean_length": 628.9453125, "completions/mean_terminated_length": 569.9149169921875, "completions/min_length": 3.0, "completions/min_terminated_length": 3.0, "epoch": 5.177311169437154, "grad_norm": 0.11865234375, "learning_rate": 1e-06, "loss": -0.0004, "num_tokens": 321962658.0, "reward": 0.5212053656578064, "reward_std": 0.20426209270954132, "rewards/verify_math_reward/mean": 0.5212053656578064, "rewards/verify_math_reward/std": 0.49982914328575134, "step": 554 }, { "clip_ratio/high_max": 0.0013414804416242987, "clip_ratio/high_mean": 0.00043196804767831054, "clip_ratio/low_mean": 0.00035095157750220096, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007829196079001122, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3377.0, "completions/mean_length": 633.3236694335938, "completions/mean_terminated_length": 574.3677978515625, "completions/min_length": 115.0, "completions/min_terminated_length": 115.0, "epoch": 5.186643336249635, "grad_norm": 0.11962890625, "learning_rate": 1e-06, "loss": 0.0026, "num_tokens": 322554580.0, "reward": 0.5334821939468384, "reward_std": 0.2213120013475418, "rewards/verify_math_reward/mean": 0.5334821343421936, "rewards/verify_math_reward/std": 0.49915629625320435, "step": 555 }, { "clip_ratio/high_max": 0.0014922396949259564, "clip_ratio/high_mean": 0.0004467867923949598, "clip_ratio/low_mean": 0.00029836162491392315, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007451484184457513, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 3797.0, "completions/mean_length": 591.4475708007812, "completions/mean_terminated_length": 547.8881225585938, "completions/min_length": 105.0, "completions/min_terminated_length": 105.0, "epoch": 5.195975503062117, "grad_norm": 0.12451171875, "learning_rate": 1e-06, "loss": -0.0034, "num_tokens": 323130573.0, "reward": 0.6160714626312256, "reward_std": 0.20819656550884247, "rewards/verify_math_reward/mean": 0.6160714030265808, "rewards/verify_math_reward/std": 0.486612468957901, "step": 556 }, { "clip_ratio/high_max": 0.001416543123923475, "clip_ratio/high_mean": 0.0004352897617536655, "clip_ratio/low_mean": 0.0003223823791813629, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007576721350233129, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 2090.0, "completions/mean_length": 653.6484375, "completions/mean_terminated_length": 595.0386352539062, "completions/min_length": 3.0, "completions/min_terminated_length": 3.0, "epoch": 5.205307669874599, "grad_norm": 0.12353515625, "learning_rate": 1e-06, "loss": -0.0033, "num_tokens": 323743770.0, "reward": 0.5290178656578064, "reward_std": 0.2077540010213852, "rewards/verify_math_reward/mean": 0.5290178656578064, "rewards/verify_math_reward/std": 0.49943605065345764, "step": 557 }, { "clip_ratio/high_max": 0.0016756152781454148, "clip_ratio/high_mean": 0.0005096179843349091, "clip_ratio/low_mean": 0.0005116133927458577, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001021231357299257, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3703.0, "completions/mean_length": 607.3236694335938, "completions/mean_terminated_length": 555.9614868164062, "completions/min_length": 74.0, "completions/min_terminated_length": 74.0, "epoch": 5.2146398366870805, "grad_norm": 0.14453125, "learning_rate": 1e-06, "loss": -0.0082, "num_tokens": 324330212.0, "reward": 0.5613839626312256, "reward_std": 0.2486380934715271, "rewards/verify_math_reward/mean": 0.5613839030265808, "rewards/verify_math_reward/std": 0.496494859457016, "step": 558 }, { "clip_ratio/high_max": 0.0015683017918490805, "clip_ratio/high_mean": 0.0004760605506817228, "clip_ratio/low_mean": 0.0004015500310288189, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008776105796641787, "completions/clipped_ratio": 0.021205357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 4070.0, "completions/mean_length": 646.7645263671875, "completions/mean_terminated_length": 572.03759765625, "completions/min_length": 129.0, "completions/min_terminated_length": 129.0, "epoch": 5.223972003499562, "grad_norm": 0.1357421875, "learning_rate": 1e-06, "loss": 0.0023, "num_tokens": 324917201.0, "reward": 0.5580357313156128, "reward_std": 0.23758603632450104, "rewards/verify_math_reward/mean": 0.5580357313156128, "rewards/verify_math_reward/std": 0.49689778685569763, "step": 559 }, { "clip_ratio/high_max": 0.0014849870021862444, "clip_ratio/high_mean": 0.0004950290035594662, "clip_ratio/low_mean": 0.0002708091083150066, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007658381196051778, "completions/clipped_ratio": 0.0189732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 2493.0, "completions/mean_length": 614.6495971679688, "completions/mean_terminated_length": 547.3196411132812, "completions/min_length": 32.0, "completions/min_terminated_length": 32.0, "epoch": 5.233304170312044, "grad_norm": 0.126953125, "learning_rate": 1e-06, "loss": 0.0002, "num_tokens": 325488015.0, "reward": 0.4888392984867096, "reward_std": 0.19798003137111664, "rewards/verify_math_reward/mean": 0.4888392984867096, "rewards/verify_math_reward/std": 0.5001546144485474, "step": 560 }, { "clip_ratio/high_max": 0.0013966550609438855, "clip_ratio/high_mean": 0.00037200242871904265, "clip_ratio/low_mean": 0.00021488170978045673, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0005868841367373534, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 4033.0, "completions/mean_length": 630.3069458007812, "completions/mean_terminated_length": 567.2943115234375, "completions/min_length": 129.0, "completions/min_terminated_length": 129.0, "epoch": 5.242636337124526, "grad_norm": 0.119140625, "learning_rate": 1e-06, "loss": 0.0173, "num_tokens": 326078362.0, "reward": 0.582589328289032, "reward_std": 0.16311585903167725, "rewards/verify_math_reward/mean": 0.5825892686843872, "rewards/verify_math_reward/std": 0.4934072494506836, "step": 561 }, { "clip_ratio/high_max": 0.0016356369869754417, "clip_ratio/high_mean": 0.0005107918336761941, "clip_ratio/low_mean": 0.00031867606321611675, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.000829467900985037, "completions/clipped_ratio": 0.008928571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 1975.0, "completions/mean_length": 573.1685791015625, "completions/mean_terminated_length": 541.4313354492188, "completions/min_length": 11.0, "completions/min_terminated_length": 11.0, "epoch": 5.251968503937007, "grad_norm": 0.1337890625, "learning_rate": 1e-06, "loss": 0.0079, "num_tokens": 326644473.0, "reward": 0.5345982313156128, "reward_std": 0.2242858111858368, "rewards/verify_math_reward/mean": 0.5345982313156128, "rewards/verify_math_reward/std": 0.4990801215171814, "step": 562 }, { "clip_ratio/high_max": 0.0015790890902280807, "clip_ratio/high_mean": 0.0005361258564562377, "clip_ratio/low_mean": 0.00045058669729769463, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009867125536402455, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 2186.0, "completions/mean_length": 601.5480346679688, "completions/mean_terminated_length": 554.1119995117188, "completions/min_length": 151.0, "completions/min_terminated_length": 151.0, "epoch": 5.26130067074949, "grad_norm": 0.1435546875, "learning_rate": 1e-06, "loss": 0.0102, "num_tokens": 327219580.0, "reward": 0.5145089626312256, "reward_std": 0.2533339262008667, "rewards/verify_math_reward/mean": 0.5145089030265808, "rewards/verify_math_reward/std": 0.5000686049461365, "step": 563 }, { "clip_ratio/high_max": 0.0015544271627732087, "clip_ratio/high_mean": 0.00044894331995237735, "clip_ratio/low_mean": 0.0003341211422593915, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007830644690329791, "completions/clipped_ratio": 0.0200892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 2676.0, "completions/mean_length": 673.8928833007812, "completions/mean_terminated_length": 603.7357788085938, "completions/min_length": 157.0, "completions/min_terminated_length": 157.0, "epoch": 5.270632837561972, "grad_norm": 0.125, "learning_rate": 1e-06, "loss": 0.0064, "num_tokens": 327849308.0, "reward": 0.447544664144516, "reward_std": 0.21725627779960632, "rewards/verify_math_reward/mean": 0.4475446343421936, "rewards/verify_math_reward/std": 0.49751853942871094, "step": 564 }, { "clip_ratio/high_max": 0.0016612288181931945, "clip_ratio/high_mean": 0.00047205458122334676, "clip_ratio/low_mean": 0.0005131374111897458, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009851919849097612, "completions/clipped_ratio": 0.006696428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 2925.0, "completions/mean_length": 568.7265625, "completions/mean_terminated_length": 544.9472045898438, "completions/min_length": 11.0, "completions/min_terminated_length": 11.0, "epoch": 5.2799650043744535, "grad_norm": 0.1416015625, "learning_rate": 1e-06, "loss": 0.0042, "num_tokens": 328425319.0, "reward": 0.535714328289032, "reward_std": 0.25066685676574707, "rewards/verify_math_reward/mean": 0.5357142686843872, "rewards/verify_math_reward/std": 0.4990013837814331, "step": 565 }, { "clip_ratio/high_max": 0.0017810704557632562, "clip_ratio/high_mean": 0.0004967451700395031, "clip_ratio/low_mean": 0.00039699774970358703, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008937429438446998, "completions/clipped_ratio": 0.010044642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 2700.0, "completions/mean_length": 581.0670166015625, "completions/mean_terminated_length": 545.4024658203125, "completions/min_length": 103.0, "completions/min_terminated_length": 103.0, "epoch": 5.289297171186935, "grad_norm": 0.1357421875, "learning_rate": 1e-06, "loss": 0.0142, "num_tokens": 328994995.0, "reward": 0.5691964626312256, "reward_std": 0.2088063806295395, "rewards/verify_math_reward/mean": 0.5691964030265808, "rewards/verify_math_reward/std": 0.4954652488231659, "step": 566 }, { "clip_ratio/high_max": 0.0017589321250852663, "clip_ratio/high_mean": 0.0004996654330398087, "clip_ratio/low_mean": 0.0002853683243984051, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007850337747186131, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3904.0, "completions/mean_length": 624.4319458007812, "completions/mean_terminated_length": 573.3215942382812, "completions/min_length": 105.0, "completions/min_terminated_length": 105.0, "epoch": 5.298629337999417, "grad_norm": 0.126953125, "learning_rate": 1e-06, "loss": -0.0016, "num_tokens": 329586702.0, "reward": 0.515625, "reward_std": 0.24036115407943726, "rewards/verify_math_reward/mean": 0.515625, "rewards/verify_math_reward/std": 0.5000349283218384, "step": 567 }, { "clip_ratio/high_max": 0.001833881327911513, "clip_ratio/high_mean": 0.0005528118585971242, "clip_ratio/low_mean": 0.00028448922512325225, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008373010800823977, "completions/clipped_ratio": 0.021205357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 2299.0, "completions/mean_length": 616.3125, "completions/mean_terminated_length": 540.9258422851562, "completions/min_length": 91.0, "completions/min_terminated_length": 91.0, "epoch": 5.307961504811899, "grad_norm": 0.123046875, "learning_rate": 1e-06, "loss": -0.0037, "num_tokens": 330147854.0, "reward": 0.582589328289032, "reward_std": 0.1862967312335968, "rewards/verify_math_reward/mean": 0.5825892686843872, "rewards/verify_math_reward/std": 0.4934072494506836, "step": 568 }, { "clip_ratio/high_max": 0.0013719777434744174, "clip_ratio/high_mean": 0.00037502635007058416, "clip_ratio/low_mean": 0.0003804965200515653, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007555228744422493, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 4067.0, "completions/mean_length": 627.7366333007812, "completions/mean_terminated_length": 568.6856079101562, "completions/min_length": 176.0, "completions/min_terminated_length": 176.0, "epoch": 5.31729367162438, "grad_norm": 0.1259765625, "learning_rate": 1e-06, "loss": 0.0043, "num_tokens": 330741122.0, "reward": 0.5256696939468384, "reward_std": 0.2009527087211609, "rewards/verify_math_reward/mean": 0.5256696343421936, "rewards/verify_math_reward/std": 0.4996195435523987, "step": 569 }, { "clip_ratio/high_max": 0.0016941607818807825, "clip_ratio/high_mean": 0.0005355355674510065, "clip_ratio/low_mean": 0.00033066106607293477, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008661966294312151, "completions/clipped_ratio": 0.021205357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3053.0, "completions/mean_length": 647.3850708007812, "completions/mean_terminated_length": 572.6715698242188, "completions/min_length": 153.0, "completions/min_terminated_length": 153.0, "epoch": 5.326625838436862, "grad_norm": 0.12890625, "learning_rate": 1e-06, "loss": 0.0089, "num_tokens": 331328323.0, "reward": 0.5290178656578064, "reward_std": 0.22282226383686066, "rewards/verify_math_reward/mean": 0.5290178656578064, "rewards/verify_math_reward/std": 0.49943605065345764, "step": 570 }, { "clip_ratio/high_max": 0.0018209992931588204, "clip_ratio/high_mean": 0.0005488518972924794, "clip_ratio/low_mean": 0.0004011082201031968, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009499601151219395, "completions/clipped_ratio": 0.0189732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3269.0, "completions/mean_length": 617.2042846679688, "completions/mean_terminated_length": 549.9237670898438, "completions/min_length": 129.0, "completions/min_terminated_length": 129.0, "epoch": 5.335958005249344, "grad_norm": 0.1337890625, "learning_rate": 1e-06, "loss": 0.0103, "num_tokens": 331893930.0, "reward": 0.574776828289032, "reward_std": 0.2292449027299881, "rewards/verify_math_reward/mean": 0.5747767686843872, "rewards/verify_math_reward/std": 0.49465295672416687, "step": 571 }, { "clip_ratio/high_max": 0.0015630527213943424, "clip_ratio/high_mean": 0.0004424715377808752, "clip_ratio/low_mean": 0.00027531418140824826, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007177857105489238, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 2148.0, "completions/mean_length": 590.4631958007812, "completions/mean_terminated_length": 550.8972778320312, "completions/min_length": 111.0, "completions/min_terminated_length": 111.0, "epoch": 5.3452901720618256, "grad_norm": 0.12109375, "learning_rate": 1e-06, "loss": 0.0065, "num_tokens": 332474713.0, "reward": 0.4966517984867096, "reward_std": 0.21894694864749908, "rewards/verify_math_reward/mean": 0.4966517984867096, "rewards/verify_math_reward/std": 0.5002680420875549, "step": 572 }, { "clip_ratio/high_max": 0.0016773431261754013, "clip_ratio/high_mean": 0.0005180152492130219, "clip_ratio/low_mean": 0.0003890500597663049, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.000907065300452814, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3431.0, "completions/mean_length": 608.5100708007812, "completions/mean_terminated_length": 549.1317138671875, "completions/min_length": 87.0, "completions/min_terminated_length": 87.0, "epoch": 5.354622338874307, "grad_norm": 0.126953125, "learning_rate": 1e-06, "loss": 0.0136, "num_tokens": 333053074.0, "reward": 0.590401828289032, "reward_std": 0.22781910002231598, "rewards/verify_math_reward/mean": 0.5904017686843872, "rewards/verify_math_reward/std": 0.49203425645828247, "step": 573 }, { "clip_ratio/high_max": 0.0015063100163388299, "clip_ratio/high_mean": 0.00047234158000719617, "clip_ratio/low_mean": 0.000319266402584617, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007916079744063609, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 2021.0, "completions/mean_length": 548.0, "completions/mean_terminated_length": 503.90057373046875, "completions/min_length": 20.0, "completions/min_terminated_length": 20.0, "epoch": 5.363954505686789, "grad_norm": 0.1396484375, "learning_rate": 1e-06, "loss": 0.0094, "num_tokens": 333589578.0, "reward": 0.5613839626312256, "reward_std": 0.19550618529319763, "rewards/verify_math_reward/mean": 0.5613839030265808, "rewards/verify_math_reward/std": 0.496494859457016, "step": 574 }, { "clip_ratio/high_max": 0.001743977880323655, "clip_ratio/high_mean": 0.0005185850332054542, "clip_ratio/low_mean": 0.0002559582160301943, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007745432667434216, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3274.0, "completions/mean_length": 654.4375, "completions/mean_terminated_length": 591.8636474609375, "completions/min_length": 92.0, "completions/min_terminated_length": 92.0, "epoch": 5.373286672499271, "grad_norm": 0.11767578125, "learning_rate": 1e-06, "loss": -0.0099, "num_tokens": 334202898.0, "reward": 0.5212053656578064, "reward_std": 0.20478273928165436, "rewards/verify_math_reward/mean": 0.5212053656578064, "rewards/verify_math_reward/std": 0.49982914328575134, "step": 575 }, { "clip_ratio/high_max": 0.0017537659587105736, "clip_ratio/high_mean": 0.0004901738057014882, "clip_ratio/low_mean": 0.0003412311627926101, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008314049746331875, "completions/clipped_ratio": 0.0200892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 2358.0, "completions/mean_length": 621.1038208007812, "completions/mean_terminated_length": 549.864501953125, "completions/min_length": 15.0, "completions/min_terminated_length": 15.0, "epoch": 5.3826188393117524, "grad_norm": 0.126953125, "learning_rate": 1e-06, "loss": 0.0019, "num_tokens": 334782223.0, "reward": 0.5502232313156128, "reward_std": 0.22882941365242004, "rewards/verify_math_reward/mean": 0.5502232313156128, "rewards/verify_math_reward/std": 0.49774909019470215, "step": 576 }, { "clip_ratio/high_max": 0.0012921654160891194, "clip_ratio/high_mean": 0.0003068975581754785, "clip_ratio/low_mean": 0.0002428725406389276, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0005497700994965271, "completions/clipped_ratio": 0.0200892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3291.0, "completions/mean_length": 640.5736694335938, "completions/mean_terminated_length": 569.7335205078125, "completions/min_length": 107.0, "completions/min_terminated_length": 107.0, "epoch": 5.391951006124234, "grad_norm": 0.1171875, "learning_rate": 1e-06, "loss": -0.0052, "num_tokens": 335369025.0, "reward": 0.6026785969734192, "reward_std": 0.1888531893491745, "rewards/verify_math_reward/mean": 0.6026785969734192, "rewards/verify_math_reward/std": 0.48961687088012695, "step": 577 }, { "clip_ratio/high_max": 0.0015022727311588824, "clip_ratio/high_mean": 0.00045029936018181616, "clip_ratio/low_mean": 0.000329555451799024, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007798548226674029, "completions/clipped_ratio": 0.010044642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 2838.0, "completions/mean_length": 577.747802734375, "completions/mean_terminated_length": 542.049560546875, "completions/min_length": 33.0, "completions/min_terminated_length": 33.0, "epoch": 5.401283172936716, "grad_norm": 0.12255859375, "learning_rate": 1e-06, "loss": 0.0027, "num_tokens": 335945447.0, "reward": 0.5870535969734192, "reward_std": 0.20580121874809265, "rewards/verify_math_reward/mean": 0.5870535969734192, "rewards/verify_math_reward/std": 0.49263837933540344, "step": 578 }, { "clip_ratio/high_max": 0.0015232671194098657, "clip_ratio/high_mean": 0.0004816653081434197, "clip_ratio/low_mean": 0.0003052244320542741, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.000786889742812491, "completions/clipped_ratio": 0.0189732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 4004.0, "completions/mean_length": 665.099365234375, "completions/mean_terminated_length": 598.7451171875, "completions/min_length": 137.0, "completions/min_terminated_length": 137.0, "epoch": 5.410615339749198, "grad_norm": 0.1279296875, "learning_rate": 1e-06, "loss": 0.0147, "num_tokens": 336565784.0, "reward": 0.5189732313156128, "reward_std": 0.21312426030635834, "rewards/verify_math_reward/mean": 0.5189732313156128, "rewards/verify_math_reward/std": 0.49991893768310547, "step": 579 }, { "clip_ratio/high_max": 0.0018734333134489134, "clip_ratio/high_mean": 0.0005695729123544879, "clip_ratio/low_mean": 0.00034296873502626113, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009125416563620092, "completions/clipped_ratio": 0.0189732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3944.0, "completions/mean_length": 627.1060791015625, "completions/mean_terminated_length": 560.0170288085938, "completions/min_length": 113.0, "completions/min_terminated_length": 113.0, "epoch": 5.41994750656168, "grad_norm": 0.1455078125, "learning_rate": 1e-06, "loss": 0.0091, "num_tokens": 337143775.0, "reward": 0.5569196939468384, "reward_std": 0.22883333265781403, "rewards/verify_math_reward/mean": 0.5569196343421936, "rewards/verify_math_reward/std": 0.4970270097255707, "step": 580 }, { "clip_ratio/high_max": 0.0016861082176546915, "clip_ratio/high_mean": 0.0004648963398494743, "clip_ratio/low_mean": 0.0003388802524568746, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008037765906010463, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 3145.0, "completions/mean_length": 586.6964721679688, "completions/mean_terminated_length": 543.0780029296875, "completions/min_length": 141.0, "completions/min_terminated_length": 141.0, "epoch": 5.429279673374162, "grad_norm": 0.12109375, "learning_rate": 1e-06, "loss": 0.0076, "num_tokens": 337710623.0, "reward": 0.559151828289032, "reward_std": 0.20156000554561615, "rewards/verify_math_reward/mean": 0.5591517686843872, "rewards/verify_math_reward/std": 0.496766060590744, "step": 581 }, { "clip_ratio/high_max": 0.001424003468855517, "clip_ratio/high_mean": 0.00037773066139834555, "clip_ratio/low_mean": 0.0003189537942489551, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0006966844571252295, "completions/clipped_ratio": 0.022321428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 2821.0, "completions/mean_length": 643.5580444335938, "completions/mean_terminated_length": 564.735107421875, "completions/min_length": 125.0, "completions/min_terminated_length": 125.0, "epoch": 5.438611840186644, "grad_norm": 0.1240234375, "learning_rate": 1e-06, "loss": 0.0234, "num_tokens": 338286859.0, "reward": 0.5703125, "reward_std": 0.18426935374736786, "rewards/verify_math_reward/mean": 0.5703125, "rewards/verify_math_reward/std": 0.49530795216560364, "step": 582 }, { "clip_ratio/high_max": 0.0012338231508692843, "clip_ratio/high_mean": 0.00044580090559520613, "clip_ratio/low_mean": 0.00031469859845856263, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007604994998473558, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 2355.0, "completions/mean_length": 579.0324096679688, "completions/mean_terminated_length": 535.3186645507812, "completions/min_length": 6.0, "completions/min_terminated_length": 6.0, "epoch": 5.447944006999125, "grad_norm": 0.130859375, "learning_rate": 1e-06, "loss": -0.0006, "num_tokens": 338843504.0, "reward": 0.5792410969734192, "reward_std": 0.22800594568252563, "rewards/verify_math_reward/mean": 0.5792410969734192, "rewards/verify_math_reward/std": 0.49395665526390076, "step": 583 }, { "clip_ratio/high_max": 0.0021068601017759647, "clip_ratio/high_mean": 0.0006619417997626442, "clip_ratio/low_mean": 0.00037777678517159075, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001039718593347061, "completions/clipped_ratio": 0.0078125, "completions/max_length": 4096.0, "completions/max_terminated_length": 1972.0, "completions/mean_length": 582.5614013671875, "completions/mean_terminated_length": 554.8965454101562, "completions/min_length": 67.0, "completions/min_terminated_length": 67.0, "epoch": 5.457276173811607, "grad_norm": 0.1376953125, "learning_rate": 1e-06, "loss": 0.0056, "num_tokens": 339421447.0, "reward": 0.5345982313156128, "reward_std": 0.24487334489822388, "rewards/verify_math_reward/mean": 0.5345982313156128, "rewards/verify_math_reward/std": 0.4990801215171814, "step": 584 }, { "clip_ratio/high_max": 0.0016412534496339504, "clip_ratio/high_mean": 0.0004804542854799365, "clip_ratio/low_mean": 0.000321047708439437, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008015020030143205, "completions/clipped_ratio": 0.021205357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3328.0, "completions/mean_length": 660.6205444335938, "completions/mean_terminated_length": 586.19384765625, "completions/min_length": 4.0, "completions/min_terminated_length": 4.0, "epoch": 5.466608340624089, "grad_norm": 0.11962890625, "learning_rate": 1e-06, "loss": -0.0003, "num_tokens": 340032507.0, "reward": 0.5234375, "reward_std": 0.2059527486562729, "rewards/verify_math_reward/mean": 0.5234375, "rewards/verify_math_reward/std": 0.49972933530807495, "step": 585 }, { "clip_ratio/high_max": 0.0015810954027983826, "clip_ratio/high_mean": 0.0005144460722021904, "clip_ratio/low_mean": 0.0003890473876708711, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009034934582814458, "completions/clipped_ratio": 0.0200892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 2884.0, "completions/mean_length": 613.6808471679688, "completions/mean_terminated_length": 542.289306640625, "completions/min_length": 151.0, "completions/min_terminated_length": 151.0, "epoch": 5.475940507436571, "grad_norm": 0.1318359375, "learning_rate": 1e-06, "loss": 0.0063, "num_tokens": 340596373.0, "reward": 0.515625, "reward_std": 0.22308439016342163, "rewards/verify_math_reward/mean": 0.515625, "rewards/verify_math_reward/std": 0.5000349283218384, "step": 586 }, { "clip_ratio/high_max": 0.0014493896469502943, "clip_ratio/high_mean": 0.0003946472652387456, "clip_ratio/low_mean": 0.000327332647430012, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007219799072117894, "completions/clipped_ratio": 0.006696428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3835.0, "completions/mean_length": 624.6842041015625, "completions/mean_terminated_length": 601.2820434570312, "completions/min_length": 145.0, "completions/min_terminated_length": 145.0, "epoch": 5.485272674249052, "grad_norm": 0.11767578125, "learning_rate": 1e-06, "loss": 0.0147, "num_tokens": 341230130.0, "reward": 0.4877232313156128, "reward_std": 0.18952901661396027, "rewards/verify_math_reward/mean": 0.4877232015132904, "rewards/verify_math_reward/std": 0.500128448009491, "step": 587 }, { "clip_ratio/high_max": 0.0013787154102828936, "clip_ratio/high_mean": 0.0004109978194719588, "clip_ratio/low_mean": 0.0003107044553871674, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007217022816803365, "completions/clipped_ratio": 0.024553571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3268.0, "completions/mean_length": 653.9553833007812, "completions/mean_terminated_length": 567.3134765625, "completions/min_length": 5.0, "completions/min_terminated_length": 5.0, "epoch": 5.494604841061534, "grad_norm": 0.126953125, "learning_rate": 1e-06, "loss": 0.0004, "num_tokens": 341823634.0, "reward": 0.5212053656578064, "reward_std": 0.22240857779979706, "rewards/verify_math_reward/mean": 0.5212053656578064, "rewards/verify_math_reward/std": 0.49982914328575134, "step": 588 }, { "clip_ratio/high_max": 0.0016527361967746401, "clip_ratio/high_mean": 0.000604953545916942, "clip_ratio/low_mean": 0.0003452273822404095, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009501809254288673, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3403.0, "completions/mean_length": 618.625, "completions/mean_terminated_length": 559.4188842773438, "completions/min_length": 90.0, "completions/min_terminated_length": 90.0, "epoch": 5.503937007874016, "grad_norm": 0.1279296875, "learning_rate": 1e-06, "loss": -0.0088, "num_tokens": 342402138.0, "reward": 0.5736607313156128, "reward_std": 0.2408929020166397, "rewards/verify_math_reward/mean": 0.5736607313156128, "rewards/verify_math_reward/std": 0.4948205351829529, "step": 589 }, { "clip_ratio/high_max": 0.0019028083952434827, "clip_ratio/high_mean": 0.0005872474728221277, "clip_ratio/low_mean": 0.00035215872594562825, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009394062053615926, "completions/clipped_ratio": 0.010044642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 2184.0, "completions/mean_length": 544.7734375, "completions/mean_terminated_length": 508.74066162109375, "completions/min_length": 51.0, "completions/min_terminated_length": 51.0, "epoch": 5.5132691746864975, "grad_norm": 0.12890625, "learning_rate": 1e-06, "loss": 0.0233, "num_tokens": 342937111.0, "reward": 0.5703125, "reward_std": 0.2225145399570465, "rewards/verify_math_reward/mean": 0.5703125, "rewards/verify_math_reward/std": 0.49530795216560364, "step": 590 }, { "clip_ratio/high_max": 0.0014833757168162265, "clip_ratio/high_mean": 0.000418451493487737, "clip_ratio/low_mean": 0.00046329974998116086, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008817512316454668, "completions/clipped_ratio": 0.0234375, "completions/max_length": 4096.0, "completions/max_terminated_length": 3757.0, "completions/mean_length": 645.1038208007812, "completions/mean_terminated_length": 562.2822875976562, "completions/min_length": 138.0, "completions/min_terminated_length": 138.0, "epoch": 5.522601341498979, "grad_norm": 0.13671875, "learning_rate": 1e-06, "loss": 0.0076, "num_tokens": 343525188.0, "reward": 0.527901828289032, "reward_std": 0.22229093313217163, "rewards/verify_math_reward/mean": 0.5279017686843872, "rewards/verify_math_reward/std": 0.49949970841407776, "step": 591 }, { "clip_ratio/high_max": 0.0015365254057542188, "clip_ratio/high_mean": 0.0004624969318456351, "clip_ratio/low_mean": 0.00023578075342811644, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0006982776862969331, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 2926.0, "completions/mean_length": 600.8873291015625, "completions/mean_terminated_length": 557.4451904296875, "completions/min_length": 109.0, "completions/min_terminated_length": 109.0, "epoch": 5.531933508311461, "grad_norm": 0.12255859375, "learning_rate": 1e-06, "loss": 0.013, "num_tokens": 344119759.0, "reward": 0.5390625, "reward_std": 0.20512789487838745, "rewards/verify_math_reward/mean": 0.5390625, "rewards/verify_math_reward/std": 0.4987502098083496, "step": 592 }, { "clip_ratio/high_max": 0.0016532313657080522, "clip_ratio/high_mean": 0.0005328238598849566, "clip_ratio/low_mean": 0.000368635616496249, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009014594506879803, "completions/clipped_ratio": 0.0078125, "completions/max_length": 4096.0, "completions/max_terminated_length": 2424.0, "completions/mean_length": 565.2109375, "completions/mean_terminated_length": 537.4094848632812, "completions/min_length": 115.0, "completions/min_terminated_length": 115.0, "epoch": 5.541265675123943, "grad_norm": 0.1396484375, "learning_rate": 1e-06, "loss": 0.0116, "num_tokens": 344693836.0, "reward": 0.578125, "reward_std": 0.2285715937614441, "rewards/verify_math_reward/mean": 0.578125, "rewards/verify_math_reward/std": 0.4941346049308777, "step": 593 }, { "clip_ratio/high_max": 0.0016362896258215187, "clip_ratio/high_mean": 0.0004772021652570402, "clip_ratio/low_mean": 0.00041457257748334087, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008917747445593704, "completions/clipped_ratio": 0.022321428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3985.0, "completions/mean_length": 684.950927734375, "completions/mean_terminated_length": 607.0730590820312, "completions/min_length": 93.0, "completions/min_terminated_length": 93.0, "epoch": 5.550597841936424, "grad_norm": 0.1396484375, "learning_rate": 1e-06, "loss": 0.0089, "num_tokens": 345310208.0, "reward": 0.520089328289032, "reward_std": 0.2350693941116333, "rewards/verify_math_reward/mean": 0.5200892686843872, "rewards/verify_math_reward/std": 0.4998753070831299, "step": 594 }, { "clip_ratio/high_max": 0.0014269785442593275, "clip_ratio/high_mean": 0.00038235130114117055, "clip_ratio/low_mean": 0.0004296153347240761, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008119666417769622, "completions/clipped_ratio": 0.0189732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3296.0, "completions/mean_length": 641.1953125, "completions/mean_terminated_length": 574.3788452148438, "completions/min_length": 97.0, "completions/min_terminated_length": 97.0, "epoch": 5.559930008748906, "grad_norm": 0.1298828125, "learning_rate": 1e-06, "loss": -0.0071, "num_tokens": 345922127.0, "reward": 0.4665178656578064, "reward_std": 0.22887356579303741, "rewards/verify_math_reward/mean": 0.4665178656578064, "rewards/verify_math_reward/std": 0.49915632605552673, "step": 595 }, { "clip_ratio/high_max": 0.0016614030055279727, "clip_ratio/high_mean": 0.0005471608137668227, "clip_ratio/low_mean": 0.00035743593355164194, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.000904596743566799, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3889.0, "completions/mean_length": 617.5, "completions/mean_terminated_length": 566.2876586914062, "completions/min_length": 129.0, "completions/min_terminated_length": 129.0, "epoch": 5.569262175561388, "grad_norm": 0.1279296875, "learning_rate": 1e-06, "loss": 0.0016, "num_tokens": 346497959.0, "reward": 0.535714328289032, "reward_std": 0.23427662253379822, "rewards/verify_math_reward/mean": 0.5357142686843872, "rewards/verify_math_reward/std": 0.4990014135837555, "step": 596 }, { "clip_ratio/high_max": 0.0013901961174269672, "clip_ratio/high_mean": 0.00041155310964313685, "clip_ratio/low_mean": 0.00027649343553548533, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0006880465534777613, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3500.0, "completions/mean_length": 603.739990234375, "completions/mean_terminated_length": 544.2803955078125, "completions/min_length": 14.0, "completions/min_terminated_length": 14.0, "epoch": 5.57859434237387, "grad_norm": 0.12890625, "learning_rate": 1e-06, "loss": 0.0034, "num_tokens": 347055054.0, "reward": 0.5848214626312256, "reward_std": 0.17507019639015198, "rewards/verify_math_reward/mean": 0.5848214030265808, "rewards/verify_math_reward/std": 0.49302801489830017, "step": 597 }, { "clip_ratio/high_max": 0.0017493439572717762, "clip_ratio/high_mean": 0.0005472518530496018, "clip_ratio/low_mean": 0.00026612177282459015, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008133736073432374, "completions/clipped_ratio": 0.021205357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 4075.0, "completions/mean_length": 640.8136596679688, "completions/mean_terminated_length": 565.9578247070312, "completions/min_length": 71.0, "completions/min_terminated_length": 71.0, "epoch": 5.587926509186351, "grad_norm": 0.12353515625, "learning_rate": 1e-06, "loss": -0.0045, "num_tokens": 347635911.0, "reward": 0.5178571939468384, "reward_std": 0.21083195507526398, "rewards/verify_math_reward/mean": 0.5178571343421936, "rewards/verify_math_reward/std": 0.4999600946903229, "step": 598 }, { "clip_ratio/high_max": 0.0014640272147516953, "clip_ratio/high_mean": 0.00047749230498084216, "clip_ratio/low_mean": 0.0003387007839137368, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008161930854839738, "completions/clipped_ratio": 0.0234375, "completions/max_length": 4096.0, "completions/max_terminated_length": 3855.0, "completions/mean_length": 659.5424194335938, "completions/mean_terminated_length": 577.0674438476562, "completions/min_length": 144.0, "completions/min_terminated_length": 144.0, "epoch": 5.597258675998834, "grad_norm": 0.1318359375, "learning_rate": 1e-06, "loss": 0.0019, "num_tokens": 348232437.0, "reward": 0.4877232313156128, "reward_std": 0.2295454740524292, "rewards/verify_math_reward/mean": 0.4877232015132904, "rewards/verify_math_reward/std": 0.500128448009491, "step": 599 }, { "clip_ratio/high_max": 0.0017931208994923509, "clip_ratio/high_mean": 0.000473861554951327, "clip_ratio/low_mean": 0.000331737685655753, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008055992452682403, "completions/clipped_ratio": 0.0279017857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3956.0, "completions/mean_length": 675.8627319335938, "completions/mean_terminated_length": 577.6957397460938, "completions/min_length": 161.0, "completions/min_terminated_length": 161.0, "epoch": 5.606590842811316, "grad_norm": 0.130859375, "learning_rate": 1e-06, "loss": -0.0046, "num_tokens": 348822922.0, "reward": 0.5323660969734192, "reward_std": 0.21421900391578674, "rewards/verify_math_reward/mean": 0.5323660969734192, "rewards/verify_math_reward/std": 0.4992299973964691, "step": 600 }, { "clip_ratio/high_max": 0.0016256882372545078, "clip_ratio/high_mean": 0.000549583998918024, "clip_ratio/low_mean": 0.0004123904088828567, "clip_ratio/low_min": 1.0665528861864004e-05, "clip_ratio/region_mean": 0.0009619744059818913, "completions/clipped_ratio": 0.006696428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3994.0, "completions/mean_length": 584.552490234375, "completions/mean_terminated_length": 560.8797607421875, "completions/min_length": 122.0, "completions/min_terminated_length": 122.0, "epoch": 5.615923009623797, "grad_norm": 0.1279296875, "learning_rate": 1e-06, "loss": 0.0077, "num_tokens": 349410545.0, "reward": 0.5602678656578064, "reward_std": 0.24968752264976501, "rewards/verify_math_reward/mean": 0.5602678656578064, "rewards/verify_math_reward/std": 0.4966317415237427, "step": 601 }, { "clip_ratio/high_max": 0.0013063722926744958, "clip_ratio/high_mean": 0.0003865236153615115, "clip_ratio/low_mean": 0.00041439876008553256, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008009223861336068, "completions/clipped_ratio": 0.0078125, "completions/max_length": 4096.0, "completions/max_terminated_length": 3812.0, "completions/mean_length": 579.1998291015625, "completions/mean_terminated_length": 551.5084838867188, "completions/min_length": 57.0, "completions/min_terminated_length": 57.0, "epoch": 5.625255176436279, "grad_norm": 0.140625, "learning_rate": 1e-06, "loss": 0.0129, "num_tokens": 349980436.0, "reward": 0.5625, "reward_std": 0.23258204758167267, "rewards/verify_math_reward/mean": 0.5625, "rewards/verify_math_reward/std": 0.49635544419288635, "step": 602 }, { "clip_ratio/high_max": 0.0016484307761857053, "clip_ratio/high_mean": 0.000552778579731239, "clip_ratio/low_mean": 0.00043712548176699784, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009899040687741945, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3108.0, "completions/mean_length": 615.982177734375, "completions/mean_terminated_length": 564.7474365234375, "completions/min_length": 125.0, "completions/min_terminated_length": 125.0, "epoch": 5.634587343248761, "grad_norm": 0.142578125, "learning_rate": 1e-06, "loss": -0.003, "num_tokens": 350573884.0, "reward": 0.5703125, "reward_std": 0.2594209611415863, "rewards/verify_math_reward/mean": 0.5703125, "rewards/verify_math_reward/std": 0.49530795216560364, "step": 603 }, { "clip_ratio/high_max": 0.0018310111427126685, "clip_ratio/high_mean": 0.0005517259573935007, "clip_ratio/low_mean": 0.00037904168902969104, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009307676305070345, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 4056.0, "completions/mean_length": 637.4955444335938, "completions/mean_terminated_length": 598.4605102539062, "completions/min_length": 117.0, "completions/min_terminated_length": 117.0, "epoch": 5.6439195100612425, "grad_norm": 0.12255859375, "learning_rate": 1e-06, "loss": -0.0067, "num_tokens": 351190560.0, "reward": 0.5959821939468384, "reward_std": 0.22905904054641724, "rewards/verify_math_reward/mean": 0.5959821343421936, "rewards/verify_math_reward/std": 0.490975022315979, "step": 604 }, { "clip_ratio/high_max": 0.001524799096841889, "clip_ratio/high_mean": 0.00046769564380610973, "clip_ratio/low_mean": 0.00047231378539436264, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009400094295415329, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 2504.0, "completions/mean_length": 584.9642944335938, "completions/mean_terminated_length": 545.3363647460938, "completions/min_length": 113.0, "completions/min_terminated_length": 113.0, "epoch": 5.653251676873724, "grad_norm": 0.1318359375, "learning_rate": 1e-06, "loss": 0.0021, "num_tokens": 351763216.0, "reward": 0.5691964626312256, "reward_std": 0.22330942749977112, "rewards/verify_math_reward/mean": 0.5691964030265808, "rewards/verify_math_reward/std": 0.4954652786254883, "step": 605 }, { "clip_ratio/high_max": 0.0016735976951167686, "clip_ratio/high_mean": 0.0004567844382563635, "clip_ratio/low_mean": 0.0003422894055802317, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007990738413354848, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 3886.0, "completions/mean_length": 585.1395263671875, "completions/mean_terminated_length": 545.5135498046875, "completions/min_length": 103.0, "completions/min_terminated_length": 103.0, "epoch": 5.662583843686206, "grad_norm": 0.134765625, "learning_rate": 1e-06, "loss": 0.006, "num_tokens": 352333725.0, "reward": 0.559151828289032, "reward_std": 0.23142297565937042, "rewards/verify_math_reward/mean": 0.5591517686843872, "rewards/verify_math_reward/std": 0.496766060590744, "step": 606 }, { "clip_ratio/high_max": 0.0016542480852876906, "clip_ratio/high_mean": 0.0005613918172002741, "clip_ratio/low_mean": 0.00032017077523960324, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008815625860734144, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3874.0, "completions/mean_length": 611.34375, "completions/mean_terminated_length": 560.040771484375, "completions/min_length": 42.0, "completions/min_terminated_length": 42.0, "epoch": 5.671916010498688, "grad_norm": 0.125, "learning_rate": 1e-06, "loss": 0.0168, "num_tokens": 352915033.0, "reward": 0.6116071939468384, "reward_std": 0.20793946087360382, "rewards/verify_math_reward/mean": 0.6116071343421936, "rewards/verify_math_reward/std": 0.4876568913459778, "step": 607 }, { "clip_ratio/high_max": 0.001837543752117199, "clip_ratio/high_mean": 0.0005368785557493538, "clip_ratio/low_mean": 0.00026023138480013586, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007971099412316107, "completions/clipped_ratio": 0.010044642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3735.0, "completions/mean_length": 548.4944458007812, "completions/mean_terminated_length": 512.4993896484375, "completions/min_length": 92.0, "completions/min_terminated_length": 92.0, "epoch": 5.681248177311169, "grad_norm": 0.1298828125, "learning_rate": 1e-06, "loss": 0.004, "num_tokens": 353463684.0, "reward": 0.6026785969734192, "reward_std": 0.18971771001815796, "rewards/verify_math_reward/mean": 0.6026785969734192, "rewards/verify_math_reward/std": 0.48961687088012695, "step": 608 }, { "clip_ratio/high_max": 0.0018288902429048903, "clip_ratio/high_mean": 0.0005793526834168006, "clip_ratio/low_mean": 0.0003307258682525571, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009100785482587526, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3042.0, "completions/mean_length": 633.583740234375, "completions/mean_terminated_length": 578.6246948242188, "completions/min_length": 111.0, "completions/min_terminated_length": 111.0, "epoch": 5.690580344123651, "grad_norm": 0.126953125, "learning_rate": 1e-06, "loss": -0.0083, "num_tokens": 354063415.0, "reward": 0.5256696939468384, "reward_std": 0.22857119143009186, "rewards/verify_math_reward/mean": 0.5256696343421936, "rewards/verify_math_reward/std": 0.4996195137500763, "step": 609 }, { "clip_ratio/high_max": 0.0015159009162744042, "clip_ratio/high_mean": 0.00041560329555068165, "clip_ratio/low_mean": 0.00027500016324211174, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0006906034586791066, "completions/clipped_ratio": 0.006696428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 4034.0, "completions/mean_length": 538.8772583007812, "completions/mean_terminated_length": 514.8966064453125, "completions/min_length": 143.0, "completions/min_terminated_length": 143.0, "epoch": 5.699912510936133, "grad_norm": 0.12451171875, "learning_rate": 1e-06, "loss": 0.0176, "num_tokens": 354613161.0, "reward": 0.5970982313156128, "reward_std": 0.17784711718559265, "rewards/verify_math_reward/mean": 0.5970982313156128, "rewards/verify_math_reward/std": 0.49075525999069214, "step": 610 }, { "clip_ratio/high_max": 0.001717625229503028, "clip_ratio/high_mean": 0.0004708107526312233, "clip_ratio/low_mean": 0.000301633516187394, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007724442730250303, "completions/clipped_ratio": 0.0078125, "completions/max_length": 4096.0, "completions/max_terminated_length": 2423.0, "completions/mean_length": 582.0267944335938, "completions/mean_terminated_length": 554.3577270507812, "completions/min_length": 27.0, "completions/min_terminated_length": 27.0, "epoch": 5.7092446777486145, "grad_norm": 0.126953125, "learning_rate": 1e-06, "loss": 0.0088, "num_tokens": 355201385.0, "reward": 0.5446428656578064, "reward_std": 0.21463268995285034, "rewards/verify_math_reward/mean": 0.5446428656578064, "rewards/verify_math_reward/std": 0.49828118085861206, "step": 611 }, { "clip_ratio/high_max": 0.0018678093802009244, "clip_ratio/high_mean": 0.000609123405297396, "clip_ratio/low_mean": 0.00041631638282524364, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010254397884637, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 2777.0, "completions/mean_length": 614.341552734375, "completions/mean_terminated_length": 567.0792236328125, "completions/min_length": 2.0, "completions/min_terminated_length": 2.0, "epoch": 5.718576844561096, "grad_norm": 0.1396484375, "learning_rate": 1e-06, "loss": 0.0098, "num_tokens": 355802331.0, "reward": 0.5725446939468384, "reward_std": 0.24446289241313934, "rewards/verify_math_reward/mean": 0.5725446343421936, "rewards/verify_math_reward/std": 0.49498558044433594, "step": 612 }, { "clip_ratio/high_max": 0.0015789622739248443, "clip_ratio/high_mean": 0.0004802795135674387, "clip_ratio/low_mean": 0.0002489008927568648, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007291804026863247, "completions/clipped_ratio": 0.022321428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 2506.0, "completions/mean_length": 614.5592041015625, "completions/mean_terminated_length": 535.0741577148438, "completions/min_length": 124.0, "completions/min_terminated_length": 124.0, "epoch": 5.727909011373578, "grad_norm": 0.1279296875, "learning_rate": 1e-06, "loss": 0.0021, "num_tokens": 356360712.0, "reward": 0.551339328289032, "reward_std": 0.18490806221961975, "rewards/verify_math_reward/mean": 0.5513392686843872, "rewards/verify_math_reward/std": 0.4976350665092468, "step": 613 }, { "clip_ratio/high_max": 0.0014210480057954555, "clip_ratio/high_mean": 0.00043928807838256034, "clip_ratio/low_mean": 0.00035182789724785835, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007911159791547107, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 4019.0, "completions/mean_length": 598.2232666015625, "completions/mean_terminated_length": 542.7029418945312, "completions/min_length": 74.0, "completions/min_terminated_length": 74.0, "epoch": 5.73724117818606, "grad_norm": 0.13671875, "learning_rate": 1e-06, "loss": -0.0115, "num_tokens": 356933112.0, "reward": 0.5613839626312256, "reward_std": 0.2124505490064621, "rewards/verify_math_reward/mean": 0.5613839030265808, "rewards/verify_math_reward/std": 0.496494859457016, "step": 614 }, { "clip_ratio/high_max": 0.001356075529656664, "clip_ratio/high_mean": 0.0004500520685724041, "clip_ratio/low_mean": 0.00033761252723252255, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007876646013755817, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 2545.0, "completions/mean_length": 604.6920166015625, "completions/mean_terminated_length": 557.2986450195312, "completions/min_length": 59.0, "completions/min_terminated_length": 59.0, "epoch": 5.746573344998541, "grad_norm": 0.140625, "learning_rate": 1e-06, "loss": 0.0154, "num_tokens": 357514260.0, "reward": 0.5133928656578064, "reward_std": 0.21937061846256256, "rewards/verify_math_reward/mean": 0.5133928656578064, "rewards/verify_math_reward/std": 0.500099778175354, "step": 615 }, { "clip_ratio/high_max": 0.0020445919399207924, "clip_ratio/high_mean": 0.0006977882353567111, "clip_ratio/low_mean": 0.00032132818421359843, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010191164237767225, "completions/clipped_ratio": 0.0200892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3926.0, "completions/mean_length": 628.6395263671875, "completions/mean_terminated_length": 557.5546875, "completions/min_length": 24.0, "completions/min_terminated_length": 24.0, "epoch": 5.755905511811024, "grad_norm": 0.1484375, "learning_rate": 1e-06, "loss": 0.0185, "num_tokens": 358096625.0, "reward": 0.5725446939468384, "reward_std": 0.253483384847641, "rewards/verify_math_reward/mean": 0.5725446343421936, "rewards/verify_math_reward/std": 0.49498558044433594, "step": 616 }, { "clip_ratio/high_max": 0.0017153845365101006, "clip_ratio/high_mean": 0.0005488036445058242, "clip_ratio/low_mean": 0.0003400926821086614, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008888963166100439, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3174.0, "completions/mean_length": 594.0435791015625, "completions/mean_terminated_length": 542.48583984375, "completions/min_length": 113.0, "completions/min_terminated_length": 113.0, "epoch": 5.765237678623506, "grad_norm": 0.1337890625, "learning_rate": 1e-06, "loss": -0.0044, "num_tokens": 358661976.0, "reward": 0.5580357313156128, "reward_std": 0.2055736631155014, "rewards/verify_math_reward/mean": 0.5580357313156128, "rewards/verify_math_reward/std": 0.49689778685569763, "step": 617 }, { "clip_ratio/high_max": 0.0015714221190137323, "clip_ratio/high_mean": 0.00046793652518317685, "clip_ratio/low_mean": 0.0002259613844444175, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0006938979076949181, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 4054.0, "completions/mean_length": 653.9152221679688, "completions/mean_terminated_length": 611.1322021484375, "completions/min_length": 157.0, "completions/min_terminated_length": 157.0, "epoch": 5.7745698454359875, "grad_norm": 0.1123046875, "learning_rate": 1e-06, "loss": 0.0014, "num_tokens": 359287116.0, "reward": 0.5915178656578064, "reward_std": 0.2069612443447113, "rewards/verify_math_reward/mean": 0.5915178656578064, "rewards/verify_math_reward/std": 0.49182769656181335, "step": 618 }, { "clip_ratio/high_max": 0.001240374596818583, "clip_ratio/high_mean": 0.00037524275262512674, "clip_ratio/low_mean": 0.00035552919950987416, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007307719456548512, "completions/clipped_ratio": 0.021205357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3951.0, "completions/mean_length": 627.8359375, "completions/mean_terminated_length": 552.698974609375, "completions/min_length": 158.0, "completions/min_terminated_length": 158.0, "epoch": 5.783902012248469, "grad_norm": 0.1279296875, "learning_rate": 1e-06, "loss": 0.002, "num_tokens": 359861337.0, "reward": 0.5491071939468384, "reward_std": 0.19418711960315704, "rewards/verify_math_reward/mean": 0.5491071343421936, "rewards/verify_math_reward/std": 0.49786055088043213, "step": 619 }, { "clip_ratio/high_max": 0.0017908200443343958, "clip_ratio/high_mean": 0.0005974826347028284, "clip_ratio/low_mean": 0.0002967977391108434, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008942803697209456, "completions/clipped_ratio": 0.0078125, "completions/max_length": 4096.0, "completions/max_terminated_length": 2140.0, "completions/mean_length": 549.5892944335938, "completions/mean_terminated_length": 521.664794921875, "completions/min_length": 107.0, "completions/min_terminated_length": 107.0, "epoch": 5.793234179060951, "grad_norm": 0.140625, "learning_rate": 1e-06, "loss": 0.0022, "num_tokens": 360421977.0, "reward": 0.5479910969734192, "reward_std": 0.25998368859291077, "rewards/verify_math_reward/mean": 0.5479910969734192, "rewards/verify_math_reward/std": 0.49796950817108154, "step": 620 }, { "clip_ratio/high_max": 0.0013891612707084278, "clip_ratio/high_mean": 0.00039783442730367824, "clip_ratio/low_mean": 0.00032748142780292255, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007253158555613481, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3945.0, "completions/mean_length": 599.9017944335938, "completions/mean_terminated_length": 548.4303588867188, "completions/min_length": 105.0, "completions/min_terminated_length": 105.0, "epoch": 5.802566345873433, "grad_norm": 0.1220703125, "learning_rate": 1e-06, "loss": 0.0007, "num_tokens": 360993321.0, "reward": 0.5524553656578064, "reward_std": 0.19531185925006866, "rewards/verify_math_reward/mean": 0.5524553656578064, "rewards/verify_math_reward/std": 0.49751853942871094, "step": 621 }, { "clip_ratio/high_max": 0.0020891256326649454, "clip_ratio/high_mean": 0.0006852979413451976, "clip_ratio/low_mean": 0.00036986430779961665, "clip_ratio/low_min": 9.530344868835527e-06, "clip_ratio/region_mean": 0.001055162253578601, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 4015.0, "completions/mean_length": 619.2455444335938, "completions/mean_terminated_length": 568.0588989257812, "completions/min_length": 137.0, "completions/min_terminated_length": 137.0, "epoch": 5.811898512685914, "grad_norm": 0.1416015625, "learning_rate": 1e-06, "loss": 0.0055, "num_tokens": 361579125.0, "reward": 0.578125, "reward_std": 0.24014613032341003, "rewards/verify_math_reward/mean": 0.578125, "rewards/verify_math_reward/std": 0.4941346049308777, "step": 622 }, { "clip_ratio/high_max": 0.00170293934570509, "clip_ratio/high_mean": 0.0005552408042603929, "clip_ratio/low_mean": 0.000346487800925388, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009017286201924435, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3962.0, "completions/mean_length": 642.864990234375, "completions/mean_terminated_length": 588.0532836914062, "completions/min_length": 108.0, "completions/min_terminated_length": 108.0, "epoch": 5.821230679498396, "grad_norm": 0.134765625, "learning_rate": 1e-06, "loss": 0.0115, "num_tokens": 362198188.0, "reward": 0.535714328289032, "reward_std": 0.22628137469291687, "rewards/verify_math_reward/mean": 0.5357142686843872, "rewards/verify_math_reward/std": 0.4990014135837555, "step": 623 }, { "clip_ratio/high_max": 0.001384750374199939, "clip_ratio/high_mean": 0.00036116401918206975, "clip_ratio/low_mean": 0.00028290550721976615, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0006440695348146619, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3538.0, "completions/mean_length": 637.4308471679688, "completions/mean_terminated_length": 582.5328979492188, "completions/min_length": 79.0, "completions/min_terminated_length": 79.0, "epoch": 5.830562846310878, "grad_norm": 0.11376953125, "learning_rate": 1e-06, "loss": 0.0018, "num_tokens": 362813758.0, "reward": 0.4877232313156128, "reward_std": 0.17318309843540192, "rewards/verify_math_reward/mean": 0.4877232015132904, "rewards/verify_math_reward/std": 0.5001283884048462, "step": 624 }, { "clip_ratio/high_max": 0.002005233105592197, "clip_ratio/high_mean": 0.0005235718522271782, "clip_ratio/low_mean": 0.00034254040178893774, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008661122628836893, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 3183.0, "completions/mean_length": 582.4855346679688, "completions/mean_terminated_length": 538.814697265625, "completions/min_length": 39.0, "completions/min_terminated_length": 39.0, "epoch": 5.83989501312336, "grad_norm": 0.1259765625, "learning_rate": 1e-06, "loss": 0.0034, "num_tokens": 363370241.0, "reward": 0.6127232313156128, "reward_std": 0.21958746016025543, "rewards/verify_math_reward/mean": 0.6127232313156128, "rewards/verify_math_reward/std": 0.4873998463153839, "step": 625 }, { "clip_ratio/high_max": 0.001632858402444981, "clip_ratio/high_mean": 0.00048267613510688534, "clip_ratio/low_mean": 0.00028833066107836203, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007710067802690901, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3588.0, "completions/mean_length": 576.6707763671875, "completions/mean_terminated_length": 524.8572998046875, "completions/min_length": 80.0, "completions/min_terminated_length": 80.0, "epoch": 5.849227179935841, "grad_norm": 0.1259765625, "learning_rate": 1e-06, "loss": -0.0081, "num_tokens": 363913666.0, "reward": 0.5647321939468384, "reward_std": 0.1759282499551773, "rewards/verify_math_reward/mean": 0.5647321343421936, "rewards/verify_math_reward/std": 0.49606895446777344, "step": 626 }, { "clip_ratio/high_max": 0.0016306336801790167, "clip_ratio/high_mean": 0.0004919825648812548, "clip_ratio/low_mean": 0.00039717310869491484, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008891556699381908, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 3771.0, "completions/mean_length": 584.0546875, "completions/mean_terminated_length": 544.41650390625, "completions/min_length": 118.0, "completions/min_terminated_length": 118.0, "epoch": 5.858559346748323, "grad_norm": 0.1298828125, "learning_rate": 1e-06, "loss": 0.0068, "num_tokens": 364490979.0, "reward": 0.5691964626312256, "reward_std": 0.2117016762495041, "rewards/verify_math_reward/mean": 0.5691964030265808, "rewards/verify_math_reward/std": 0.4954652488231659, "step": 627 }, { "clip_ratio/high_max": 0.0017665231662249425, "clip_ratio/high_mean": 0.0005111080458846118, "clip_ratio/low_mean": 0.00038048836950110854, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008915963871913846, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3010.0, "completions/mean_length": 620.357177734375, "completions/mean_terminated_length": 565.188232421875, "completions/min_length": 97.0, "completions/min_terminated_length": 97.0, "epoch": 5.867891513560805, "grad_norm": 0.134765625, "learning_rate": 1e-06, "loss": 0.0034, "num_tokens": 365069779.0, "reward": 0.5401785969734192, "reward_std": 0.22349488735198975, "rewards/verify_math_reward/mean": 0.5401785969734192, "rewards/verify_math_reward/std": 0.49866142868995667, "step": 628 }, { "clip_ratio/high_max": 0.0014437277868637466, "clip_ratio/high_mean": 0.0004319528011365037, "clip_ratio/low_mean": 0.00029387151062110206, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007258243049363955, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3923.0, "completions/mean_length": 607.8995971679688, "completions/mean_terminated_length": 556.5458374023438, "completions/min_length": 64.0, "completions/min_terminated_length": 64.0, "epoch": 5.8772236803732865, "grad_norm": 0.126953125, "learning_rate": 1e-06, "loss": 0.0023, "num_tokens": 365659329.0, "reward": 0.5256696939468384, "reward_std": 0.22446875274181366, "rewards/verify_math_reward/mean": 0.5256696343421936, "rewards/verify_math_reward/std": 0.4996195435523987, "step": 629 }, { "clip_ratio/high_max": 0.0016678804149705684, "clip_ratio/high_mean": 0.0005439781342602146, "clip_ratio/low_mean": 0.0003042860504365308, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008482641760565457, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 2244.0, "completions/mean_length": 582.1317138671875, "completions/mean_terminated_length": 538.4564819335938, "completions/min_length": 4.0, "completions/min_terminated_length": 4.0, "epoch": 5.886555847185768, "grad_norm": 0.1328125, "learning_rate": 1e-06, "loss": 0.0001, "num_tokens": 366227895.0, "reward": 0.5959821939468384, "reward_std": 0.23059743642807007, "rewards/verify_math_reward/mean": 0.5959821343421936, "rewards/verify_math_reward/std": 0.490975022315979, "step": 630 }, { "clip_ratio/high_max": 0.00126131130491558, "clip_ratio/high_mean": 0.0003636357573668647, "clip_ratio/low_mean": 0.00033631018482083164, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0006999459496910276, "completions/clipped_ratio": 0.0078125, "completions/max_length": 4096.0, "completions/max_terminated_length": 3798.0, "completions/mean_length": 559.3917846679688, "completions/mean_terminated_length": 531.54443359375, "completions/min_length": 82.0, "completions/min_terminated_length": 82.0, "epoch": 5.89588801399825, "grad_norm": 0.11962890625, "learning_rate": 1e-06, "loss": 0.0115, "num_tokens": 366788126.0, "reward": 0.5837053656578064, "reward_std": 0.19772110879421234, "rewards/verify_math_reward/mean": 0.5837053656578064, "rewards/verify_math_reward/std": 0.49321895837783813, "step": 631 }, { "clip_ratio/high_max": 0.0018229607449029572, "clip_ratio/high_mean": 0.0005960020198472193, "clip_ratio/low_mean": 0.0004161727232485646, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010121747327502817, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 3837.0, "completions/mean_length": 617.622802734375, "completions/mean_terminated_length": 578.3634643554688, "completions/min_length": 142.0, "completions/min_terminated_length": 142.0, "epoch": 5.905220180810732, "grad_norm": 0.13671875, "learning_rate": 1e-06, "loss": -0.0063, "num_tokens": 367397508.0, "reward": 0.4955357313156128, "reward_std": 0.2520148754119873, "rewards/verify_math_reward/mean": 0.4955357015132904, "rewards/verify_math_reward/std": 0.500259280204773, "step": 632 }, { "clip_ratio/high_max": 0.0017330375358142192, "clip_ratio/high_mean": 0.0004954539767823007, "clip_ratio/low_mean": 0.00033041706001313287, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008258710395239177, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 3841.0, "completions/mean_length": 583.7489013671875, "completions/mean_terminated_length": 544.1072387695312, "completions/min_length": 142.0, "completions/min_terminated_length": 142.0, "epoch": 5.914552347623214, "grad_norm": 0.12451171875, "learning_rate": 1e-06, "loss": 0.0027, "num_tokens": 367960595.0, "reward": 0.5703125, "reward_std": 0.1996813714504242, "rewards/verify_math_reward/mean": 0.5703125, "rewards/verify_math_reward/std": 0.49530795216560364, "step": 633 }, { "clip_ratio/high_max": 0.0013183061364543391, "clip_ratio/high_mean": 0.0004277631906006718, "clip_ratio/low_mean": 0.0003647073425554481, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.000792470529631828, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 4003.0, "completions/mean_length": 623.9498291015625, "completions/mean_terminated_length": 560.8215942382812, "completions/min_length": 130.0, "completions/min_terminated_length": 130.0, "epoch": 5.923884514435695, "grad_norm": 0.1328125, "learning_rate": 1e-06, "loss": -0.0008, "num_tokens": 368546358.0, "reward": 0.5703125, "reward_std": 0.23559045791625977, "rewards/verify_math_reward/mean": 0.5703125, "rewards/verify_math_reward/std": 0.49530795216560364, "step": 634 }, { "clip_ratio/high_max": 0.0013197403495723847, "clip_ratio/high_mean": 0.0004318313540352392, "clip_ratio/low_mean": 0.0004125800021483883, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.00084441136004898, "completions/clipped_ratio": 0.0189732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3013.0, "completions/mean_length": 639.3471069335938, "completions/mean_terminated_length": 572.494873046875, "completions/min_length": 107.0, "completions/min_terminated_length": 107.0, "epoch": 5.933216681248178, "grad_norm": 0.130859375, "learning_rate": 1e-06, "loss": 0.0098, "num_tokens": 369146981.0, "reward": 0.5111607313156128, "reward_std": 0.23041337728500366, "rewards/verify_math_reward/mean": 0.5111607313156128, "rewards/verify_math_reward/std": 0.5001546144485474, "step": 635 }, { "clip_ratio/high_max": 0.0015313922449422535, "clip_ratio/high_mean": 0.00042677192163864675, "clip_ratio/low_mean": 0.0003478974579138594, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.000774669376824022, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 3733.0, "completions/mean_length": 613.5670166015625, "completions/mean_terminated_length": 574.2618408203125, "completions/min_length": 32.0, "completions/min_terminated_length": 32.0, "epoch": 5.942548848060659, "grad_norm": 0.119140625, "learning_rate": 1e-06, "loss": 0.0082, "num_tokens": 369742897.0, "reward": 0.5569196939468384, "reward_std": 0.19877348840236664, "rewards/verify_math_reward/mean": 0.5569196343421936, "rewards/verify_math_reward/std": 0.49702703952789307, "step": 636 }, { "clip_ratio/high_max": 0.001322434779467585, "clip_ratio/high_mean": 0.00038604866460900666, "clip_ratio/low_mean": 0.00029808235240125214, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0006841310159870773, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3976.0, "completions/mean_length": 601.7310791015625, "completions/mean_terminated_length": 554.2975463867188, "completions/min_length": 91.0, "completions/min_terminated_length": 91.0, "epoch": 5.951881014873141, "grad_norm": 0.1201171875, "learning_rate": 1e-06, "loss": 0.0038, "num_tokens": 370328624.0, "reward": 0.520089328289032, "reward_std": 0.1770893931388855, "rewards/verify_math_reward/mean": 0.5200892686843872, "rewards/verify_math_reward/std": 0.4998753070831299, "step": 637 }, { "clip_ratio/high_max": 0.0017185953838634305, "clip_ratio/high_mean": 0.0004891297135145578, "clip_ratio/low_mean": 0.0003073795163572868, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007965092245285632, "completions/clipped_ratio": 0.022321428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3333.0, "completions/mean_length": 686.513427734375, "completions/mean_terminated_length": 608.6712036132812, "completions/min_length": 122.0, "completions/min_terminated_length": 122.0, "epoch": 5.961213181685623, "grad_norm": 0.119140625, "learning_rate": 1e-06, "loss": 0.0022, "num_tokens": 370957556.0, "reward": 0.5267857313156128, "reward_std": 0.2003137171268463, "rewards/verify_math_reward/mean": 0.5267857313156128, "rewards/verify_math_reward/std": 0.4995608627796173, "step": 638 }, { "clip_ratio/high_max": 0.001580068470502738, "clip_ratio/high_mean": 0.0004835813867885008, "clip_ratio/low_mean": 0.00032849010995050776, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008120714919641614, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 3756.0, "completions/mean_length": 575.1138916015625, "completions/mean_terminated_length": 535.374755859375, "completions/min_length": 114.0, "completions/min_terminated_length": 114.0, "epoch": 5.970545348498105, "grad_norm": 0.1337890625, "learning_rate": 1e-06, "loss": 0.0186, "num_tokens": 371523114.0, "reward": 0.546875, "reward_std": 0.20847007632255554, "rewards/verify_math_reward/mean": 0.546875, "rewards/verify_math_reward/std": 0.4980759024620056, "step": 639 }, { "clip_ratio/high_max": 0.0014115801695879782, "clip_ratio/high_mean": 0.0003921603085927927, "clip_ratio/low_mean": 0.00027514910800618964, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0006673094191000928, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 2793.0, "completions/mean_length": 570.0580444335938, "completions/mean_terminated_length": 518.147216796875, "completions/min_length": 59.0, "completions/min_terminated_length": 59.0, "epoch": 5.979877515310586, "grad_norm": 0.1181640625, "learning_rate": 1e-06, "loss": -0.0003, "num_tokens": 372060878.0, "reward": 0.5948660969734192, "reward_std": 0.14884108304977417, "rewards/verify_math_reward/mean": 0.5948660969734192, "rewards/verify_math_reward/std": 0.49119213223457336, "step": 640 }, { "clip_ratio/high_max": 0.001312239854996733, "clip_ratio/high_mean": 0.0003718040077274054, "clip_ratio/low_mean": 0.0003047645578817537, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0006765685648133513, "completions/clipped_ratio": 0.0234375, "completions/max_length": 4096.0, "completions/max_terminated_length": 3832.0, "completions/mean_length": 659.7600708007812, "completions/mean_terminated_length": 577.290283203125, "completions/min_length": 144.0, "completions/min_terminated_length": 144.0, "epoch": 5.989209682123068, "grad_norm": 0.1298828125, "learning_rate": 1e-06, "loss": 0.0055, "num_tokens": 372665975.0, "reward": 0.4888392984867096, "reward_std": 0.18994270265102386, "rewards/verify_math_reward/mean": 0.4888392984867096, "rewards/verify_math_reward/std": 0.5001546144485474, "step": 641 }, { "clip_ratio/high_max": 0.0014908608982295846, "clip_ratio/high_mean": 0.0004632329918194955, "clip_ratio/low_mean": 0.0003131747744191671, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.000776407773628307, "completions/clipped_ratio": 0.014204545454545414, "completions/max_length": 4096.0, "completions/max_terminated_length": 2240.0, "completions/mean_length": 628.64208984375, "completions/mean_terminated_length": 578.6801147460938, "completions/min_length": 131.0, "completions/min_terminated_length": 131.0, "epoch": 5.99854184893555, "grad_norm": 0.12060546875, "learning_rate": 1e-06, "loss": -0.0045, "num_tokens": 373226323.0, "reward": 0.5881696939468384, "reward_std": 0.17995189130306244, "rewards/verify_math_reward/mean": 0.5881696343421936, "rewards/verify_math_reward/std": 0.4924396276473999, "step": 642 }, { "clip_ratio/high_max": 0.00173607175838697, "clip_ratio/high_mean": 0.0005547704724904179, "clip_ratio/low_mean": 0.00038455201729448163, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009393225045641884, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 2927.0, "completions/mean_length": 620.4564819335938, "completions/mean_terminated_length": 573.2771606445312, "completions/min_length": 36.0, "completions/min_terminated_length": 36.0, "epoch": 6.009332166812482, "grad_norm": 0.1279296875, "learning_rate": 1e-06, "loss": 0.0021, "num_tokens": 373838044.0, "reward": 0.5424107313156128, "reward_std": 0.23357053101062775, "rewards/verify_math_reward/mean": 0.5424107313156128, "rewards/verify_math_reward/std": 0.4984763562679291, "step": 643 }, { "clip_ratio/high_max": 0.001706956718408037, "clip_ratio/high_mean": 0.000544264402378758, "clip_ratio/low_mean": 0.0003396990161945723, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008839634210744407, "completions/clipped_ratio": 0.0078125, "completions/max_length": 4096.0, "completions/max_terminated_length": 2822.0, "completions/mean_length": 547.1674194335938, "completions/mean_terminated_length": 519.223876953125, "completions/min_length": 127.0, "completions/min_terminated_length": 127.0, "epoch": 6.0186643336249634, "grad_norm": 0.1357421875, "learning_rate": 1e-06, "loss": 0.007, "num_tokens": 374377250.0, "reward": 0.6104910969734192, "reward_std": 0.22146859765052795, "rewards/verify_math_reward/mean": 0.6104910969734192, "rewards/verify_math_reward/std": 0.48791125416755676, "step": 644 }, { "clip_ratio/high_max": 0.002222527713456657, "clip_ratio/high_mean": 0.0006332240118354093, "clip_ratio/low_mean": 0.000419498909423055, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010527229205763433, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 2226.0, "completions/mean_length": 612.2745971679688, "completions/mean_terminated_length": 552.9603271484375, "completions/min_length": 125.0, "completions/min_terminated_length": 125.0, "epoch": 6.027996500437445, "grad_norm": 0.1435546875, "learning_rate": 1e-06, "loss": 0.0045, "num_tokens": 374955936.0, "reward": 0.5412946939468384, "reward_std": 0.2486380934715271, "rewards/verify_math_reward/mean": 0.5412946343421936, "rewards/verify_math_reward/std": 0.49857014417648315, "step": 645 }, { "clip_ratio/high_max": 0.0014041293006812339, "clip_ratio/high_mean": 0.000395850021050137, "clip_ratio/low_mean": 0.0002981900096301615, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0006940400326129748, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3806.0, "completions/mean_length": 620.0714721679688, "completions/mean_terminated_length": 572.8869018554688, "completions/min_length": 166.0, "completions/min_terminated_length": 166.0, "epoch": 6.037328667249927, "grad_norm": 0.11376953125, "learning_rate": 1e-06, "loss": 0.0136, "num_tokens": 375563200.0, "reward": 0.4910714626312256, "reward_std": 0.19425876438617706, "rewards/verify_math_reward/mean": 0.4910714328289032, "rewards/verify_math_reward/std": 0.5001994967460632, "step": 646 }, { "clip_ratio/high_max": 0.0017485125017628889, "clip_ratio/high_mean": 0.0004991767368665023, "clip_ratio/low_mean": 0.00040243194121103443, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009016086760311737, "completions/clipped_ratio": 0.025669642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 4090.0, "completions/mean_length": 695.9922485351562, "completions/mean_terminated_length": 606.4158325195312, "completions/min_length": 120.0, "completions/min_terminated_length": 120.0, "epoch": 6.046660834062409, "grad_norm": 0.1357421875, "learning_rate": 1e-06, "loss": 0.0, "num_tokens": 376176937.0, "reward": 0.5066964626312256, "reward_std": 0.24983063340187073, "rewards/verify_math_reward/mean": 0.5066964030265808, "rewards/verify_math_reward/std": 0.5002344250679016, "step": 647 }, { "clip_ratio/high_max": 0.0018755322616925696, "clip_ratio/high_mean": 0.0005498871350937407, "clip_ratio/low_mean": 0.00027450664151729143, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008243937718361849, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 3832.0, "completions/mean_length": 591.6350708007812, "completions/mean_terminated_length": 548.0780029296875, "completions/min_length": 89.0, "completions/min_terminated_length": 89.0, "epoch": 6.05599300087489, "grad_norm": 0.1376953125, "learning_rate": 1e-06, "loss": 0.0062, "num_tokens": 376749522.0, "reward": 0.5703125, "reward_std": 0.19666872918605804, "rewards/verify_math_reward/mean": 0.5703125, "rewards/verify_math_reward/std": 0.49530795216560364, "step": 648 }, { "clip_ratio/high_max": 0.001407445083714265, "clip_ratio/high_mean": 0.0004722145440609893, "clip_ratio/low_mean": 0.00032295044161401165, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.000795164978626417, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3991.0, "completions/mean_length": 635.8616333007812, "completions/mean_terminated_length": 580.9387817382812, "completions/min_length": 160.0, "completions/min_terminated_length": 160.0, "epoch": 6.065325167687372, "grad_norm": 0.119140625, "learning_rate": 1e-06, "loss": 0.005, "num_tokens": 377349630.0, "reward": 0.5546875, "reward_std": 0.1961480975151062, "rewards/verify_math_reward/mean": 0.5546875, "rewards/verify_math_reward/std": 0.4972778558731079, "step": 649 }, { "clip_ratio/high_max": 0.0015356802682617854, "clip_ratio/high_mean": 0.0005339092857639116, "clip_ratio/low_mean": 0.00033801248764575575, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008719217780708277, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 2643.0, "completions/mean_length": 575.2623291015625, "completions/mean_terminated_length": 527.469482421875, "completions/min_length": 4.0, "completions/min_terminated_length": 4.0, "epoch": 6.074657334499854, "grad_norm": 0.123046875, "learning_rate": 1e-06, "loss": 0.0091, "num_tokens": 377907249.0, "reward": 0.5625, "reward_std": 0.21241775155067444, "rewards/verify_math_reward/mean": 0.5625, "rewards/verify_math_reward/std": 0.49635544419288635, "step": 650 }, { "clip_ratio/high_max": 0.0015866073154029436, "clip_ratio/high_mean": 0.0005240302166384936, "clip_ratio/low_mean": 0.00036624932818085654, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.000890279525265214, "completions/clipped_ratio": 0.008928571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 2850.0, "completions/mean_length": 552.8348388671875, "completions/mean_terminated_length": 520.9144287109375, "completions/min_length": 11.0, "completions/min_terminated_length": 11.0, "epoch": 6.083989501312336, "grad_norm": 0.1416015625, "learning_rate": 1e-06, "loss": 0.0043, "num_tokens": 378460813.0, "reward": 0.5345982313156128, "reward_std": 0.213243305683136, "rewards/verify_math_reward/mean": 0.5345982313156128, "rewards/verify_math_reward/std": 0.4990801215171814, "step": 651 }, { "clip_ratio/high_max": 0.001572258031956153, "clip_ratio/high_mean": 0.0004430160465744848, "clip_ratio/low_mean": 0.0002523147768442868, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0006953308227366506, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 3970.0, "completions/mean_length": 580.8504638671875, "completions/mean_terminated_length": 541.1760864257812, "completions/min_length": 121.0, "completions/min_terminated_length": 121.0, "epoch": 6.093321668124818, "grad_norm": 0.1259765625, "learning_rate": 1e-06, "loss": 0.0054, "num_tokens": 379028471.0, "reward": 0.5770089626312256, "reward_std": 0.19912005960941315, "rewards/verify_math_reward/mean": 0.5770089030265808, "rewards/verify_math_reward/std": 0.4943099319934845, "step": 652 }, { "clip_ratio/high_max": 0.001971721856534714, "clip_ratio/high_mean": 0.0006360389720612147, "clip_ratio/low_mean": 0.0003380997884505632, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009741387666508672, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 2744.0, "completions/mean_length": 616.0111694335938, "completions/mean_terminated_length": 552.7386474609375, "completions/min_length": 4.0, "completions/min_terminated_length": 4.0, "epoch": 6.1026538349373, "grad_norm": 0.1357421875, "learning_rate": 1e-06, "loss": 0.0018, "num_tokens": 379605673.0, "reward": 0.5625, "reward_std": 0.21752658486366272, "rewards/verify_math_reward/mean": 0.5625, "rewards/verify_math_reward/std": 0.49635544419288635, "step": 653 }, { "clip_ratio/high_max": 0.001424068503183662, "clip_ratio/high_mean": 0.0004155589747369959, "clip_ratio/low_mean": 0.00036105752406001557, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007766165081193321, "completions/clipped_ratio": 0.010044642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 4081.0, "completions/mean_length": 630.1295166015625, "completions/mean_terminated_length": 594.9627685546875, "completions/min_length": 158.0, "completions/min_terminated_length": 158.0, "epoch": 6.111986001749782, "grad_norm": 0.126953125, "learning_rate": 1e-06, "loss": 0.0105, "num_tokens": 380230621.0, "reward": 0.5078125, "reward_std": 0.23596911132335663, "rewards/verify_math_reward/mean": 0.5078125, "rewards/verify_math_reward/std": 0.5002182126045227, "step": 654 }, { "clip_ratio/high_max": 0.0014793456921324832, "clip_ratio/high_mean": 0.00037749966213596053, "clip_ratio/low_mean": 0.0003206056824183179, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0006981053547860938, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 2218.0, "completions/mean_length": 577.046875, "completions/mean_terminated_length": 525.2389526367188, "completions/min_length": 125.0, "completions/min_terminated_length": 125.0, "epoch": 6.121318168562263, "grad_norm": 0.130859375, "learning_rate": 1e-06, "loss": -0.0099, "num_tokens": 380776551.0, "reward": 0.5613839626312256, "reward_std": 0.1774352639913559, "rewards/verify_math_reward/mean": 0.5613839030265808, "rewards/verify_math_reward/std": 0.496494859457016, "step": 655 }, { "clip_ratio/high_max": 0.001543802687592688, "clip_ratio/high_mean": 0.000473415589567594, "clip_ratio/low_mean": 0.00034356997582563054, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008169855509549961, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 2219.0, "completions/mean_length": 570.9788208007812, "completions/mean_terminated_length": 523.1278686523438, "completions/min_length": 57.0, "completions/min_terminated_length": 57.0, "epoch": 6.130650335374745, "grad_norm": 0.1337890625, "learning_rate": 1e-06, "loss": -0.0049, "num_tokens": 381316228.0, "reward": 0.6194196939468384, "reward_std": 0.2100103199481964, "rewards/verify_math_reward/mean": 0.6194196343421936, "rewards/verify_math_reward/std": 0.48580074310302734, "step": 656 }, { "clip_ratio/high_max": 0.001508063778601354, "clip_ratio/high_mean": 0.0004405946004908401, "clip_ratio/low_mean": 0.0003125828362726679, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007531774517701706, "completions/clipped_ratio": 0.0078125, "completions/max_length": 4096.0, "completions/max_terminated_length": 3474.0, "completions/mean_length": 593.1796875, "completions/mean_terminated_length": 565.5984497070312, "completions/min_length": 158.0, "completions/min_terminated_length": 158.0, "epoch": 6.139982502187227, "grad_norm": 0.1259765625, "learning_rate": 1e-06, "loss": 0.0225, "num_tokens": 381904045.0, "reward": 0.5814732313156128, "reward_std": 0.22052742540836334, "rewards/verify_math_reward/mean": 0.5814732313156128, "rewards/verify_math_reward/std": 0.4935929775238037, "step": 657 }, { "clip_ratio/high_max": 0.0016387092109653167, "clip_ratio/high_mean": 0.0004399012011617742, "clip_ratio/low_mean": 0.00039467282090299705, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008345740106960875, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3968.0, "completions/mean_length": 613.1127319335938, "completions/mean_terminated_length": 549.7874755859375, "completions/min_length": 92.0, "completions/min_terminated_length": 92.0, "epoch": 6.1493146689997085, "grad_norm": 0.1328125, "learning_rate": 1e-06, "loss": 0.0001, "num_tokens": 382481666.0, "reward": 0.5502232313156128, "reward_std": 0.21339528262615204, "rewards/verify_math_reward/mean": 0.5502232313156128, "rewards/verify_math_reward/std": 0.49774909019470215, "step": 658 }, { "clip_ratio/high_max": 0.0015169033158599632, "clip_ratio/high_mean": 0.0004784027078130748, "clip_ratio/low_mean": 0.00028230874386281357, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007607114562233619, "completions/clipped_ratio": 0.0078125, "completions/max_length": 4096.0, "completions/max_terminated_length": 2820.0, "completions/mean_length": 544.4375, "completions/mean_terminated_length": 516.4724731445312, "completions/min_length": 105.0, "completions/min_terminated_length": 105.0, "epoch": 6.15864683581219, "grad_norm": 0.1259765625, "learning_rate": 1e-06, "loss": 0.0035, "num_tokens": 383025882.0, "reward": 0.6328125, "reward_std": 0.1977197229862213, "rewards/verify_math_reward/mean": 0.6328125, "rewards/verify_math_reward/std": 0.48230743408203125, "step": 659 }, { "clip_ratio/high_max": 0.0015689903948441497, "clip_ratio/high_mean": 0.000494471459091983, "clip_ratio/low_mean": 0.000269647275558782, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007641187344233913, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3440.0, "completions/mean_length": 612.4654541015625, "completions/mean_terminated_length": 565.1776123046875, "completions/min_length": 5.0, "completions/min_terminated_length": 5.0, "epoch": 6.167979002624672, "grad_norm": 0.11474609375, "learning_rate": 1e-06, "loss": -0.0064, "num_tokens": 383612043.0, "reward": 0.582589328289032, "reward_std": 0.19389037787914276, "rewards/verify_math_reward/mean": 0.5825892686843872, "rewards/verify_math_reward/std": 0.493407279253006, "step": 660 }, { "clip_ratio/high_max": 0.0018581387485028245, "clip_ratio/high_mean": 0.000576645387127428, "clip_ratio/low_mean": 0.00036334505170998455, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009399904201927711, "completions/clipped_ratio": 0.021205357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3861.0, "completions/mean_length": 675.46875, "completions/mean_terminated_length": 601.3637084960938, "completions/min_length": 136.0, "completions/min_terminated_length": 136.0, "epoch": 6.177311169437154, "grad_norm": 0.13671875, "learning_rate": 1e-06, "loss": 0.0068, "num_tokens": 384241719.0, "reward": 0.4508928656578064, "reward_std": 0.23011252284049988, "rewards/verify_math_reward/mean": 0.4508928656578064, "rewards/verify_math_reward/std": 0.49786055088043213, "step": 661 }, { "clip_ratio/high_max": 0.0015810354198038112, "clip_ratio/high_mean": 0.0004957555406690517, "clip_ratio/low_mean": 0.0002787279472613591, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007744834929326316, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3741.0, "completions/mean_length": 588.1685791015625, "completions/mean_terminated_length": 540.5509033203125, "completions/min_length": 3.0, "completions/min_terminated_length": 3.0, "epoch": 6.186643336249635, "grad_norm": 0.1328125, "learning_rate": 1e-06, "loss": 0.0036, "num_tokens": 384802494.0, "reward": 0.5725446939468384, "reward_std": 0.20914226770401, "rewards/verify_math_reward/mean": 0.5725446343421936, "rewards/verify_math_reward/std": 0.49498558044433594, "step": 662 }, { "clip_ratio/high_max": 0.0020061723298567813, "clip_ratio/high_mean": 0.0006206578104865912, "clip_ratio/low_mean": 0.00029738227794950944, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009180401002595318, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3680.0, "completions/mean_length": 572.9989013671875, "completions/mean_terminated_length": 525.1753540039062, "completions/min_length": 16.0, "completions/min_terminated_length": 16.0, "epoch": 6.195975503062117, "grad_norm": 0.1484375, "learning_rate": 1e-06, "loss": 0.0183, "num_tokens": 385360165.0, "reward": 0.613839328289032, "reward_std": 0.22120577096939087, "rewards/verify_math_reward/mean": 0.6138392686843872, "rewards/verify_math_reward/std": 0.48714008927345276, "step": 663 }, { "clip_ratio/high_max": 0.0014628641565650469, "clip_ratio/high_mean": 0.0004099119246347982, "clip_ratio/low_mean": 0.00031930648310662946, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007292184095604171, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 1950.0, "completions/mean_length": 592.3471069335938, "completions/mean_terminated_length": 536.7335815429688, "completions/min_length": 156.0, "completions/min_terminated_length": 156.0, "epoch": 6.205307669874599, "grad_norm": 0.1279296875, "learning_rate": 1e-06, "loss": 0.0045, "num_tokens": 385914692.0, "reward": 0.5390625, "reward_std": 0.1865277737379074, "rewards/verify_math_reward/mean": 0.5390625, "rewards/verify_math_reward/std": 0.4987502098083496, "step": 664 }, { "clip_ratio/high_max": 0.0013592599443654763, "clip_ratio/high_mean": 0.0004112145575163595, "clip_ratio/low_mean": 0.0002543166864370505, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0006655312358816445, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 2683.0, "completions/mean_length": 626.7824096679688, "completions/mean_terminated_length": 563.7056884765625, "completions/min_length": 5.0, "completions/min_terminated_length": 5.0, "epoch": 6.2146398366870805, "grad_norm": 0.12451171875, "learning_rate": 1e-06, "loss": -0.0024, "num_tokens": 386509633.0, "reward": 0.4743303656578064, "reward_std": 0.1748419553041458, "rewards/verify_math_reward/mean": 0.4743303656578064, "rewards/verify_math_reward/std": 0.4996195137500763, "step": 665 }, { "clip_ratio/high_max": 0.0016358569173462456, "clip_ratio/high_mean": 0.0004455278162822651, "clip_ratio/low_mean": 0.0003062796452013572, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007518074326071655, "completions/clipped_ratio": 0.005580357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 4042.0, "completions/mean_length": 543.4832763671875, "completions/mean_terminated_length": 523.5477294921875, "completions/min_length": 77.0, "completions/min_terminated_length": 77.0, "epoch": 6.223972003499562, "grad_norm": 0.130859375, "learning_rate": 1e-06, "loss": -0.0032, "num_tokens": 387054490.0, "reward": 0.5926339626312256, "reward_std": 0.19133350253105164, "rewards/verify_math_reward/mean": 0.5926339030265808, "rewards/verify_math_reward/std": 0.49161848425865173, "step": 666 }, { "clip_ratio/high_max": 0.0012536881449705106, "clip_ratio/high_mean": 0.0003850666768130395, "clip_ratio/low_mean": 0.00037460315843418357, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007596698401357571, "completions/clipped_ratio": 0.010044642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 4060.0, "completions/mean_length": 630.3236694335938, "completions/mean_terminated_length": 595.158935546875, "completions/min_length": 160.0, "completions/min_terminated_length": 160.0, "epoch": 6.233304170312044, "grad_norm": 0.123046875, "learning_rate": 1e-06, "loss": -0.0004, "num_tokens": 387664516.0, "reward": 0.5680803656578064, "reward_std": 0.2067355215549469, "rewards/verify_math_reward/mean": 0.5680803656578064, "rewards/verify_math_reward/std": 0.4956200420856476, "step": 667 }, { "clip_ratio/high_max": 0.001667837970671826, "clip_ratio/high_mean": 0.0004726103929897363, "clip_ratio/low_mean": 0.00034267061596438, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008152810041792691, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3206.0, "completions/mean_length": 637.919677734375, "completions/mean_terminated_length": 579.0420532226562, "completions/min_length": 11.0, "completions/min_terminated_length": 11.0, "epoch": 6.242636337124526, "grad_norm": 0.1376953125, "learning_rate": 1e-06, "loss": 0.0015, "num_tokens": 388270436.0, "reward": 0.5, "reward_std": 0.23582643270492554, "rewards/verify_math_reward/mean": 0.5, "rewards/verify_math_reward/std": 0.5002792477607727, "step": 668 }, { "clip_ratio/high_max": 0.0011765300996557926, "clip_ratio/high_mean": 0.000360934918262501, "clip_ratio/low_mean": 0.0002547097612932703, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0006156446756904188, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 3207.0, "completions/mean_length": 619.7176513671875, "completions/mean_terminated_length": 580.48193359375, "completions/min_length": 24.0, "completions/min_terminated_length": 24.0, "epoch": 6.251968503937007, "grad_norm": 0.1171875, "learning_rate": 1e-06, "loss": 0.012, "num_tokens": 388876095.0, "reward": 0.5267857313156128, "reward_std": 0.18869741261005402, "rewards/verify_math_reward/mean": 0.5267857313156128, "rewards/verify_math_reward/std": 0.4995608627796173, "step": 669 }, { "clip_ratio/high_max": 0.0016365329174732324, "clip_ratio/high_mean": 0.00046390640022764273, "clip_ratio/low_mean": 0.00034075080952788994, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008046572138482588, "completions/clipped_ratio": 0.0189732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3929.0, "completions/mean_length": 610.5234375, "completions/mean_terminated_length": 543.11376953125, "completions/min_length": 111.0, "completions/min_terminated_length": 111.0, "epoch": 6.26130067074949, "grad_norm": 0.1259765625, "learning_rate": 1e-06, "loss": 0.0003, "num_tokens": 389443724.0, "reward": 0.5993303656578064, "reward_std": 0.18336893618106842, "rewards/verify_math_reward/mean": 0.5993303656578064, "rewards/verify_math_reward/std": 0.49030786752700806, "step": 670 }, { "clip_ratio/high_max": 0.0020880389129160903, "clip_ratio/high_mean": 0.0006353806134029583, "clip_ratio/low_mean": 0.0003999091197783855, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010352897297707386, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 2489.0, "completions/mean_length": 580.3303833007812, "completions/mean_terminated_length": 524.5260620117188, "completions/min_length": 112.0, "completions/min_terminated_length": 112.0, "epoch": 6.270632837561972, "grad_norm": 0.1455078125, "learning_rate": 1e-06, "loss": 0.0017, "num_tokens": 389995396.0, "reward": 0.5803571939468384, "reward_std": 0.24536260962486267, "rewards/verify_math_reward/mean": 0.5803571343421936, "rewards/verify_math_reward/std": 0.4937761425971985, "step": 671 }, { "clip_ratio/high_max": 0.0013693723931282875, "clip_ratio/high_mean": 0.0004878874083260598, "clip_ratio/low_mean": 0.0002530723661493539, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.000740959772883798, "completions/clipped_ratio": 0.0200892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3128.0, "completions/mean_length": 623.4129638671875, "completions/mean_terminated_length": 552.2210083007812, "completions/min_length": 43.0, "completions/min_terminated_length": 43.0, "epoch": 6.2799650043744535, "grad_norm": 0.12890625, "learning_rate": 1e-06, "loss": 0.0076, "num_tokens": 390574550.0, "reward": 0.574776828289032, "reward_std": 0.20805639028549194, "rewards/verify_math_reward/mean": 0.5747767686843872, "rewards/verify_math_reward/std": 0.49465295672416687, "step": 672 }, { "clip_ratio/high_max": 0.0016816371062304825, "clip_ratio/high_mean": 0.0004886971887572145, "clip_ratio/low_mean": 0.0003883276204987851, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008770248023211025, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3445.0, "completions/mean_length": 585.2176513671875, "completions/mean_terminated_length": 537.5599975585938, "completions/min_length": 117.0, "completions/min_terminated_length": 117.0, "epoch": 6.289297171186935, "grad_norm": 0.134765625, "learning_rate": 1e-06, "loss": 0.0104, "num_tokens": 391149481.0, "reward": 0.543526828289032, "reward_std": 0.23563112318515778, "rewards/verify_math_reward/mean": 0.5435267686843872, "rewards/verify_math_reward/std": 0.49838003516197205, "step": 673 }, { "clip_ratio/high_max": 0.001866939315732452, "clip_ratio/high_mean": 0.0006465037920406758, "clip_ratio/low_mean": 0.0003531074702323167, "clip_ratio/low_min": 1.5024038475530688e-05, "clip_ratio/region_mean": 0.0009996112567023374, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3129.0, "completions/mean_length": 617.8192138671875, "completions/mean_terminated_length": 570.6041259765625, "completions/min_length": 79.0, "completions/min_terminated_length": 79.0, "epoch": 6.298629337999417, "grad_norm": 0.134765625, "learning_rate": 1e-06, "loss": -0.0054, "num_tokens": 391740735.0, "reward": 0.5569196939468384, "reward_std": 0.24319295585155487, "rewards/verify_math_reward/mean": 0.5569196343421936, "rewards/verify_math_reward/std": 0.49702703952789307, "step": 674 }, { "clip_ratio/high_max": 0.001925237715113326, "clip_ratio/high_mean": 0.0005865288562745263, "clip_ratio/low_mean": 0.0003072172133897766, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008937460552260745, "completions/clipped_ratio": 0.021205357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 2857.0, "completions/mean_length": 651.921875, "completions/mean_terminated_length": 577.3067016601562, "completions/min_length": 110.0, "completions/min_terminated_length": 110.0, "epoch": 6.307961504811899, "grad_norm": 0.1298828125, "learning_rate": 1e-06, "loss": -0.0036, "num_tokens": 392322777.0, "reward": 0.5546875, "reward_std": 0.21854645013809204, "rewards/verify_math_reward/mean": 0.5546875, "rewards/verify_math_reward/std": 0.4972778558731079, "step": 675 }, { "clip_ratio/high_max": 0.00126882118274807, "clip_ratio/high_mean": 0.00040721619598116376, "clip_ratio/low_mean": 0.0003446972309575358, "clip_ratio/low_min": 5.852059985045344e-06, "clip_ratio/region_mean": 0.000751913428757689, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3996.0, "completions/mean_length": 663.4620971679688, "completions/mean_terminated_length": 608.9773559570312, "completions/min_length": 89.0, "completions/min_terminated_length": 89.0, "epoch": 6.31729367162438, "grad_norm": 0.11474609375, "learning_rate": 1e-06, "loss": 0.0136, "num_tokens": 392946999.0, "reward": 0.543526828289032, "reward_std": 0.22233189642429352, "rewards/verify_math_reward/mean": 0.5435267686843872, "rewards/verify_math_reward/std": 0.49838000535964966, "step": 676 }, { "clip_ratio/high_max": 0.0015275381419996847, "clip_ratio/high_mean": 0.00044322943040242535, "clip_ratio/low_mean": 0.0003800341171427135, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008232635477725125, "completions/clipped_ratio": 0.0200892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 4015.0, "completions/mean_length": 614.3080444335938, "completions/mean_terminated_length": 542.9293823242188, "completions/min_length": 82.0, "completions/min_terminated_length": 82.0, "epoch": 6.326625838436862, "grad_norm": 0.1259765625, "learning_rate": 1e-06, "loss": -0.0027, "num_tokens": 393515051.0, "reward": 0.5558035969734192, "reward_std": 0.20718877017498016, "rewards/verify_math_reward/mean": 0.5558035969734192, "rewards/verify_math_reward/std": 0.49715372920036316, "step": 677 }, { "clip_ratio/high_max": 0.001644826803385513, "clip_ratio/high_mean": 0.0005085031702947163, "clip_ratio/low_mean": 0.00031327084036547603, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008217740023610531, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 2954.0, "completions/mean_length": 603.734375, "completions/mean_terminated_length": 548.3015747070312, "completions/min_length": 86.0, "completions/min_terminated_length": 86.0, "epoch": 6.335958005249344, "grad_norm": 0.1357421875, "learning_rate": 1e-06, "loss": 0.013, "num_tokens": 394101701.0, "reward": 0.5412946939468384, "reward_std": 0.21568436920642853, "rewards/verify_math_reward/mean": 0.5412946343421936, "rewards/verify_math_reward/std": 0.49857014417648315, "step": 678 }, { "clip_ratio/high_max": 0.0017491900125605753, "clip_ratio/high_mean": 0.0005638512056975742, "clip_ratio/low_mean": 0.00034446125062004285, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009083124632525141, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3163.0, "completions/mean_length": 609.7890625, "completions/mean_terminated_length": 546.4033813476562, "completions/min_length": 72.0, "completions/min_terminated_length": 72.0, "epoch": 6.3452901720618256, "grad_norm": 0.130859375, "learning_rate": 1e-06, "loss": 0.0008, "num_tokens": 394671032.0, "reward": 0.566964328289032, "reward_std": 0.22681200504302979, "rewards/verify_math_reward/mean": 0.5669642686843872, "rewards/verify_math_reward/std": 0.49577224254608154, "step": 679 }, { "clip_ratio/high_max": 0.0015627999573553097, "clip_ratio/high_mean": 0.0004745071148590796, "clip_ratio/low_mean": 0.00038098836557765026, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008554954756618827, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3060.0, "completions/mean_length": 631.9799194335938, "completions/mean_terminated_length": 584.95703125, "completions/min_length": 141.0, "completions/min_terminated_length": 141.0, "epoch": 6.354622338874307, "grad_norm": 0.12109375, "learning_rate": 1e-06, "loss": -0.0106, "num_tokens": 395273998.0, "reward": 0.5446428656578064, "reward_std": 0.22552183270454407, "rewards/verify_math_reward/mean": 0.5446428656578064, "rewards/verify_math_reward/std": 0.49828118085861206, "step": 680 }, { "clip_ratio/high_max": 0.001402102237989311, "clip_ratio/high_mean": 0.0003810048119703424, "clip_ratio/low_mean": 0.000357528841277599, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.00073853366120602, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3522.0, "completions/mean_length": 663.310302734375, "completions/mean_terminated_length": 604.8649291992188, "completions/min_length": 166.0, "completions/min_terminated_length": 166.0, "epoch": 6.363954505686789, "grad_norm": 0.1328125, "learning_rate": 1e-06, "loss": 0.009, "num_tokens": 395897684.0, "reward": 0.5323660969734192, "reward_std": 0.22638945281505585, "rewards/verify_math_reward/mean": 0.5323660969734192, "rewards/verify_math_reward/std": 0.4992299973964691, "step": 681 }, { "clip_ratio/high_max": 0.0015754902833577944, "clip_ratio/high_mean": 0.00041263157504545234, "clip_ratio/low_mean": 0.00030170024456310784, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007143318134694709, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 2620.0, "completions/mean_length": 615.755615234375, "completions/mean_terminated_length": 560.5136108398438, "completions/min_length": 81.0, "completions/min_terminated_length": 81.0, "epoch": 6.373286672499271, "grad_norm": 0.125, "learning_rate": 1e-06, "loss": 0.0004, "num_tokens": 396487401.0, "reward": 0.5323660969734192, "reward_std": 0.20294009149074554, "rewards/verify_math_reward/mean": 0.5323660969734192, "rewards/verify_math_reward/std": 0.4992299973964691, "step": 682 }, { "clip_ratio/high_max": 0.0015777024700582842, "clip_ratio/high_mean": 0.00047866311706457054, "clip_ratio/low_mean": 0.00033791923738135665, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008165823555827956, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 1780.0, "completions/mean_length": 584.7924194335938, "completions/mean_terminated_length": 537.1289672851562, "completions/min_length": 11.0, "completions/min_terminated_length": 11.0, "epoch": 6.3826188393117524, "grad_norm": 0.1279296875, "learning_rate": 1e-06, "loss": -0.0009, "num_tokens": 397054823.0, "reward": 0.6015625, "reward_std": 0.20910878479480743, "rewards/verify_math_reward/mean": 0.6015625, "rewards/verify_math_reward/std": 0.48984986543655396, "step": 683 }, { "clip_ratio/high_max": 0.0014945502534828847, "clip_ratio/high_mean": 0.0004472033600677605, "clip_ratio/low_mean": 0.00036544585134379304, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008126492184601375, "completions/clipped_ratio": 0.022321428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 4008.0, "completions/mean_length": 652.505615234375, "completions/mean_terminated_length": 573.886962890625, "completions/min_length": 146.0, "completions/min_terminated_length": 146.0, "epoch": 6.391951006124234, "grad_norm": 0.12890625, "learning_rate": 1e-06, "loss": 0.0072, "num_tokens": 397647868.0, "reward": 0.4754464626312256, "reward_std": 0.21797555685043335, "rewards/verify_math_reward/mean": 0.4754464328289032, "rewards/verify_math_reward/std": 0.4996756613254547, "step": 684 }, { "clip_ratio/high_max": 0.0017288713715970516, "clip_ratio/high_mean": 0.0006311056781669322, "clip_ratio/low_mean": 0.00029005787143887574, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009211635442625266, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 3996.0, "completions/mean_length": 588.9163208007812, "completions/mean_terminated_length": 545.325439453125, "completions/min_length": 127.0, "completions/min_terminated_length": 127.0, "epoch": 6.401283172936716, "grad_norm": 0.13671875, "learning_rate": 1e-06, "loss": 0.0262, "num_tokens": 398222129.0, "reward": 0.6205357313156128, "reward_std": 0.247055783867836, "rewards/verify_math_reward/mean": 0.6205357313156128, "rewards/verify_math_reward/std": 0.4855247139930725, "step": 685 }, { "clip_ratio/high_max": 0.0014622002709074877, "clip_ratio/high_mean": 0.0004083467861164536, "clip_ratio/low_mean": 0.0002982504461215285, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007065972254167718, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3721.0, "completions/mean_length": 606.5647583007812, "completions/mean_terminated_length": 551.1768798828125, "completions/min_length": 131.0, "completions/min_terminated_length": 131.0, "epoch": 6.410615339749198, "grad_norm": 0.12890625, "learning_rate": 1e-06, "loss": 0.0007, "num_tokens": 398795771.0, "reward": 0.5602678656578064, "reward_std": 0.19272036850452423, "rewards/verify_math_reward/mean": 0.5602678656578064, "rewards/verify_math_reward/std": 0.4966317415237427, "step": 686 }, { "clip_ratio/high_max": 0.001748134076478891, "clip_ratio/high_mean": 0.0005480914528561698, "clip_ratio/low_mean": 0.0004106713050759936, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009587627728251391, "completions/clipped_ratio": 0.0189732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3772.0, "completions/mean_length": 651.4989013671875, "completions/mean_terminated_length": 584.8816528320312, "completions/min_length": 164.0, "completions/min_terminated_length": 164.0, "epoch": 6.41994750656168, "grad_norm": 0.1357421875, "learning_rate": 1e-06, "loss": -0.0018, "num_tokens": 399394314.0, "reward": 0.5267857313156128, "reward_std": 0.25216320157051086, "rewards/verify_math_reward/mean": 0.5267857313156128, "rewards/verify_math_reward/std": 0.4995608627796173, "step": 687 }, { "clip_ratio/high_max": 0.0015942801092023728, "clip_ratio/high_mean": 0.0005259980257505958, "clip_ratio/low_mean": 0.00030949494271226285, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008354929623237695, "completions/clipped_ratio": 0.0234375, "completions/max_length": 4096.0, "completions/max_terminated_length": 2747.0, "completions/mean_length": 665.8549194335938, "completions/mean_terminated_length": 583.5314331054688, "completions/min_length": 147.0, "completions/min_terminated_length": 147.0, "epoch": 6.429279673374162, "grad_norm": 0.126953125, "learning_rate": 1e-06, "loss": 0.0156, "num_tokens": 399992456.0, "reward": 0.5245535969734192, "reward_std": 0.22687868773937225, "rewards/verify_math_reward/mean": 0.5245535969734192, "rewards/verify_math_reward/std": 0.4996756613254547, "step": 688 }, { "clip_ratio/high_max": 0.001243736903234094, "clip_ratio/high_mean": 0.0003723399368027458, "clip_ratio/low_mean": 0.00023856667121435748, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.00061090661029084, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3889.0, "completions/mean_length": 636.7935791015625, "completions/mean_terminated_length": 589.8359985351562, "completions/min_length": 136.0, "completions/min_terminated_length": 136.0, "epoch": 6.438611840186644, "grad_norm": 0.119140625, "learning_rate": 1e-06, "loss": -0.008, "num_tokens": 400604735.0, "reward": 0.5267857313156128, "reward_std": 0.1880679875612259, "rewards/verify_math_reward/mean": 0.5267857313156128, "rewards/verify_math_reward/std": 0.4995608627796173, "step": 689 }, { "clip_ratio/high_max": 0.0018221590062239557, "clip_ratio/high_mean": 0.0005158418440487367, "clip_ratio/low_mean": 0.0003384311767149484, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008542730292901979, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3536.0, "completions/mean_length": 620.3158569335938, "completions/mean_terminated_length": 565.146240234375, "completions/min_length": 153.0, "completions/min_terminated_length": 153.0, "epoch": 6.447944006999125, "grad_norm": 0.1240234375, "learning_rate": 1e-06, "loss": 0.0103, "num_tokens": 401188026.0, "reward": 0.5680803656578064, "reward_std": 0.21542111039161682, "rewards/verify_math_reward/mean": 0.5680803656578064, "rewards/verify_math_reward/std": 0.4956200420856476, "step": 690 }, { "clip_ratio/high_max": 0.0018276918799529085, "clip_ratio/high_mean": 0.0005538284267458948, "clip_ratio/low_mean": 0.0004197250436845934, "clip_ratio/low_min": 1.637840614421293e-05, "clip_ratio/region_mean": 0.0009735534704304882, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3412.0, "completions/mean_length": 654.6875, "completions/mean_terminated_length": 592.1181640625, "completions/min_length": 19.0, "completions/min_terminated_length": 19.0, "epoch": 6.457276173811607, "grad_norm": 0.13671875, "learning_rate": 1e-06, "loss": -0.0011, "num_tokens": 401797562.0, "reward": 0.5323660969734192, "reward_std": 0.2566012442111969, "rewards/verify_math_reward/mean": 0.5323660969734192, "rewards/verify_math_reward/std": 0.4992299973964691, "step": 691 }, { "clip_ratio/high_max": 0.00205546538472845, "clip_ratio/high_mean": 0.000640464154912479, "clip_ratio/low_mean": 0.0003406134549095441, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009810775991354603, "completions/clipped_ratio": 0.008928571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3196.0, "completions/mean_length": 571.5435791015625, "completions/mean_terminated_length": 539.7916870117188, "completions/min_length": 78.0, "completions/min_terminated_length": 78.0, "epoch": 6.466608340624089, "grad_norm": 0.12255859375, "learning_rate": 1e-06, "loss": 0.006, "num_tokens": 402367185.0, "reward": 0.5479910969734192, "reward_std": 0.23251745104789734, "rewards/verify_math_reward/mean": 0.5479910969734192, "rewards/verify_math_reward/std": 0.49796950817108154, "step": 692 }, { "clip_ratio/high_max": 0.0014482236711046426, "clip_ratio/high_mean": 0.0004982175551049295, "clip_ratio/low_mean": 0.0003383869577646692, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008366045176444459, "completions/clipped_ratio": 0.0189732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 4052.0, "completions/mean_length": 623.8058471679688, "completions/mean_terminated_length": 556.6530151367188, "completions/min_length": 106.0, "completions/min_terminated_length": 106.0, "epoch": 6.475940507436571, "grad_norm": 0.1357421875, "learning_rate": 1e-06, "loss": -0.004, "num_tokens": 402938875.0, "reward": 0.5636160969734192, "reward_std": 0.2563712000846863, "rewards/verify_math_reward/mean": 0.5636160969734192, "rewards/verify_math_reward/std": 0.49621346592903137, "step": 693 }, { "clip_ratio/high_max": 0.0013574084459833102, "clip_ratio/high_mean": 0.0003852013112464192, "clip_ratio/low_mean": 0.0003038295905071209, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0006890309043683374, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3993.0, "completions/mean_length": 570.4553833007812, "completions/mean_terminated_length": 522.5972900390625, "completions/min_length": 124.0, "completions/min_terminated_length": 124.0, "epoch": 6.485272674249052, "grad_norm": 0.126953125, "learning_rate": 1e-06, "loss": -0.0017, "num_tokens": 403492419.0, "reward": 0.582589328289032, "reward_std": 0.17874544858932495, "rewards/verify_math_reward/mean": 0.5825892686843872, "rewards/verify_math_reward/std": 0.4934072494506836, "step": 694 }, { "clip_ratio/high_max": 0.0019339576683705673, "clip_ratio/high_mean": 0.0005122741404193221, "clip_ratio/low_mean": 0.0003889548996767189, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009012290420287172, "completions/clipped_ratio": 0.021205357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 2496.0, "completions/mean_length": 621.0926513671875, "completions/mean_terminated_length": 545.8095703125, "completions/min_length": 4.0, "completions/min_terminated_length": 4.0, "epoch": 6.494604841061534, "grad_norm": 0.1357421875, "learning_rate": 1e-06, "loss": 0.0029, "num_tokens": 404058454.0, "reward": 0.5636160969734192, "reward_std": 0.21395939588546753, "rewards/verify_math_reward/mean": 0.5636160969734192, "rewards/verify_math_reward/std": 0.49621346592903137, "step": 695 }, { "clip_ratio/high_max": 0.0017154207966996182, "clip_ratio/high_mean": 0.0005345684314761456, "clip_ratio/low_mean": 0.00036346616070659366, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008980346010503126, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3987.0, "completions/mean_length": 637.8170166015625, "completions/mean_terminated_length": 578.9376220703125, "completions/min_length": 29.0, "completions/min_terminated_length": 29.0, "epoch": 6.503937007874016, "grad_norm": 0.126953125, "learning_rate": 1e-06, "loss": 0.0041, "num_tokens": 404662090.0, "reward": 0.5011160969734192, "reward_std": 0.21782009303569794, "rewards/verify_math_reward/mean": 0.5011160969734192, "rewards/verify_math_reward/std": 0.5002779960632324, "step": 696 }, { "clip_ratio/high_max": 0.0018197655481344555, "clip_ratio/high_mean": 0.0005400435502451728, "clip_ratio/low_mean": 0.0002911914510832503, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008312349955303944, "completions/clipped_ratio": 0.021205357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3057.0, "completions/mean_length": 646.8951416015625, "completions/mean_terminated_length": 572.1710205078125, "completions/min_length": 142.0, "completions/min_terminated_length": 142.0, "epoch": 6.5132691746864975, "grad_norm": 0.1181640625, "learning_rate": 1e-06, "loss": 0.0024, "num_tokens": 405260556.0, "reward": 0.527901828289032, "reward_std": 0.21196311712265015, "rewards/verify_math_reward/mean": 0.5279017686843872, "rewards/verify_math_reward/std": 0.49949970841407776, "step": 697 }, { "clip_ratio/high_max": 0.0015164133819780545, "clip_ratio/high_mean": 0.0004549493255581183, "clip_ratio/low_mean": 0.00024323473064669088, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0006981840529078909, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 2078.0, "completions/mean_length": 619.0592041015625, "completions/mean_terminated_length": 563.86962890625, "completions/min_length": 143.0, "completions/min_terminated_length": 143.0, "epoch": 6.522601341498979, "grad_norm": 0.134765625, "learning_rate": 1e-06, "loss": 0.0141, "num_tokens": 405857641.0, "reward": 0.5558035969734192, "reward_std": 0.2067769169807434, "rewards/verify_math_reward/mean": 0.5558035969734192, "rewards/verify_math_reward/std": 0.49715372920036316, "step": 698 }, { "clip_ratio/high_max": 0.0016490780781168723, "clip_ratio/high_mean": 0.00043405360872839083, "clip_ratio/low_mean": 0.0003074140852277196, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007414677061206021, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3062.0, "completions/mean_length": 571.9620971679688, "completions/mean_terminated_length": 516.0249633789062, "completions/min_length": 122.0, "completions/min_terminated_length": 122.0, "epoch": 6.531933508311461, "grad_norm": 0.126953125, "learning_rate": 1e-06, "loss": 0.0022, "num_tokens": 406402503.0, "reward": 0.6127232313156128, "reward_std": 0.1875341385602951, "rewards/verify_math_reward/mean": 0.6127232313156128, "rewards/verify_math_reward/std": 0.4873998463153839, "step": 699 }, { "clip_ratio/high_max": 0.0014831389435130404, "clip_ratio/high_mean": 0.000444190265625366, "clip_ratio/low_mean": 0.0002342008714322219, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0006783911376260221, "completions/clipped_ratio": 0.010044642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3494.0, "completions/mean_length": 555.341552734375, "completions/mean_terminated_length": 519.4159545898438, "completions/min_length": 33.0, "completions/min_terminated_length": 33.0, "epoch": 6.541265675123943, "grad_norm": 0.12158203125, "learning_rate": 1e-06, "loss": 0.0003, "num_tokens": 406943409.0, "reward": 0.5970982313156128, "reward_std": 0.1910046637058258, "rewards/verify_math_reward/mean": 0.5970982313156128, "rewards/verify_math_reward/std": 0.4907552897930145, "step": 700 }, { "clip_ratio/high_max": 0.0014010896938998485, "clip_ratio/high_mean": 0.00037620840021190816, "clip_ratio/low_mean": 0.0002854122628832556, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0006616206599119323, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 2719.0, "completions/mean_length": 634.6205444335938, "completions/mean_terminated_length": 575.686767578125, "completions/min_length": 68.0, "completions/min_terminated_length": 68.0, "epoch": 6.550597841936424, "grad_norm": 0.11572265625, "learning_rate": 1e-06, "loss": -0.0017, "num_tokens": 407535725.0, "reward": 0.5613839626312256, "reward_std": 0.17295697331428528, "rewards/verify_math_reward/mean": 0.5613839030265808, "rewards/verify_math_reward/std": 0.496494859457016, "step": 701 }, { "clip_ratio/high_max": 0.0016476720811624546, "clip_ratio/high_mean": 0.0004877237863638584, "clip_ratio/low_mean": 0.00032663168167346157, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008143554750859039, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3671.0, "completions/mean_length": 609.7288208007812, "completions/mean_terminated_length": 546.342041015625, "completions/min_length": 116.0, "completions/min_terminated_length": 116.0, "epoch": 6.559930008748906, "grad_norm": 0.130859375, "learning_rate": 1e-06, "loss": 0.0018, "num_tokens": 408100810.0, "reward": 0.5625, "reward_std": 0.21564048528671265, "rewards/verify_math_reward/mean": 0.5625, "rewards/verify_math_reward/std": 0.49635544419288635, "step": 702 }, { "clip_ratio/high_max": 0.0014412458749575308, "clip_ratio/high_mean": 0.0003959407438287599, "clip_ratio/low_mean": 0.0004448934485026257, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008408341946051223, "completions/clipped_ratio": 0.021205357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3537.0, "completions/mean_length": 674.734375, "completions/mean_terminated_length": 600.6134643554688, "completions/min_length": 115.0, "completions/min_terminated_length": 115.0, "epoch": 6.569262175561388, "grad_norm": 0.1220703125, "learning_rate": 1e-06, "loss": -0.0, "num_tokens": 408703892.0, "reward": 0.5212053656578064, "reward_std": 0.21060903370380402, "rewards/verify_math_reward/mean": 0.5212053656578064, "rewards/verify_math_reward/std": 0.49982911348342896, "step": 703 }, { "clip_ratio/high_max": 0.0017365259636790142, "clip_ratio/high_mean": 0.0005543074890965727, "clip_ratio/low_mean": 0.00036931589875166537, "clip_ratio/low_min": 1.3748350284004118e-05, "clip_ratio/region_mean": 0.0009236233872798039, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 3110.0, "completions/mean_length": 570.349365234375, "completions/mean_terminated_length": 530.5564575195312, "completions/min_length": 85.0, "completions/min_terminated_length": 85.0, "epoch": 6.57859434237387, "grad_norm": 0.1396484375, "learning_rate": 1e-06, "loss": 0.0092, "num_tokens": 409259517.0, "reward": 0.5770089626312256, "reward_std": 0.24539652466773987, "rewards/verify_math_reward/mean": 0.5770089030265808, "rewards/verify_math_reward/std": 0.4943099319934845, "step": 704 }, { "clip_ratio/high_max": 0.0015137692444113782, "clip_ratio/high_mean": 0.0004934037027624072, "clip_ratio/low_mean": 0.000305142387333035, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007985460842974135, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3367.0, "completions/mean_length": 627.0189819335938, "completions/mean_terminated_length": 579.9287719726562, "completions/min_length": 63.0, "completions/min_terminated_length": 63.0, "epoch": 6.587926509186351, "grad_norm": 0.126953125, "learning_rate": 1e-06, "loss": 0.0033, "num_tokens": 409853334.0, "reward": 0.5613839626312256, "reward_std": 0.22454431653022766, "rewards/verify_math_reward/mean": 0.5613839030265808, "rewards/verify_math_reward/std": 0.496494859457016, "step": 705 }, { "clip_ratio/high_max": 0.0016408362607762683, "clip_ratio/high_mean": 0.000524249555382994, "clip_ratio/low_mean": 0.00033646618055627187, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008607157360529527, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3904.0, "completions/mean_length": 629.6920166015625, "completions/mean_terminated_length": 578.6591186523438, "completions/min_length": 65.0, "completions/min_terminated_length": 65.0, "epoch": 6.597258675998834, "grad_norm": 0.1484375, "learning_rate": 1e-06, "loss": 0.0032, "num_tokens": 410457346.0, "reward": 0.5334821939468384, "reward_std": 0.25070035457611084, "rewards/verify_math_reward/mean": 0.5334821343421936, "rewards/verify_math_reward/std": 0.49915632605552673, "step": 706 }, { "clip_ratio/high_max": 0.0015540211488769273, "clip_ratio/high_mean": 0.00046608909974565904, "clip_ratio/low_mean": 0.00030339352542796405, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007694826294937229, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 3779.0, "completions/mean_length": 591.3092041015625, "completions/mean_terminated_length": 547.748046875, "completions/min_length": 143.0, "completions/min_terminated_length": 143.0, "epoch": 6.606590842811316, "grad_norm": 0.12060546875, "learning_rate": 1e-06, "loss": -0.0006, "num_tokens": 411033495.0, "reward": 0.5680803656578064, "reward_std": 0.1964409202337265, "rewards/verify_math_reward/mean": 0.5680803656578064, "rewards/verify_math_reward/std": 0.4956200420856476, "step": 707 }, { "clip_ratio/high_max": 0.0014835074653092306, "clip_ratio/high_mean": 0.00046489866849697137, "clip_ratio/low_mean": 0.00039885721116661443, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008637558812552015, "completions/clipped_ratio": 0.022321428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3547.0, "completions/mean_length": 609.9877319335938, "completions/mean_terminated_length": 530.3983764648438, "completions/min_length": 51.0, "completions/min_terminated_length": 51.0, "epoch": 6.615923009623797, "grad_norm": 0.140625, "learning_rate": 1e-06, "loss": 0.0021, "num_tokens": 411588252.0, "reward": 0.5848214626312256, "reward_std": 0.22094251215457916, "rewards/verify_math_reward/mean": 0.5848214030265808, "rewards/verify_math_reward/std": 0.49302801489830017, "step": 708 }, { "clip_ratio/high_max": 0.0015491494705202058, "clip_ratio/high_mean": 0.0005491037081810646, "clip_ratio/low_mean": 0.00035680034864071786, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009059040594365797, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 2603.0, "completions/mean_length": 605.4296875, "completions/mean_terminated_length": 562.0440673828125, "completions/min_length": 112.0, "completions/min_terminated_length": 112.0, "epoch": 6.625255176436279, "grad_norm": 0.126953125, "learning_rate": 1e-06, "loss": -0.0009, "num_tokens": 412174957.0, "reward": 0.5479910969734192, "reward_std": 0.21751701831817627, "rewards/verify_math_reward/mean": 0.5479910969734192, "rewards/verify_math_reward/std": 0.49796947836875916, "step": 709 }, { "clip_ratio/high_max": 0.00162834150614799, "clip_ratio/high_mean": 0.00046722792126274726, "clip_ratio/low_mean": 0.0003135976434123222, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007808255677446141, "completions/clipped_ratio": 0.021205357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 2484.0, "completions/mean_length": 627.3136596679688, "completions/mean_terminated_length": 552.1653442382812, "completions/min_length": 59.0, "completions/min_terminated_length": 59.0, "epoch": 6.634587343248761, "grad_norm": 0.1201171875, "learning_rate": 1e-06, "loss": 0.001, "num_tokens": 412758998.0, "reward": 0.566964328289032, "reward_std": 0.20718877017498016, "rewards/verify_math_reward/mean": 0.5669642686843872, "rewards/verify_math_reward/std": 0.49577224254608154, "step": 710 }, { "clip_ratio/high_max": 0.0015301951243600342, "clip_ratio/high_mean": 0.0004615695543179754, "clip_ratio/low_mean": 0.0002979102235940445, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007594797734782333, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3632.0, "completions/mean_length": 615.3370971679688, "completions/mean_terminated_length": 568.0882568359375, "completions/min_length": 164.0, "completions/min_terminated_length": 164.0, "epoch": 6.6439195100612425, "grad_norm": 0.12255859375, "learning_rate": 1e-06, "loss": 0.0021, "num_tokens": 413345044.0, "reward": 0.546875, "reward_std": 0.22237467765808105, "rewards/verify_math_reward/mean": 0.546875, "rewards/verify_math_reward/std": 0.4980759024620056, "step": 711 }, { "clip_ratio/high_max": 0.0016038255198509432, "clip_ratio/high_mean": 0.00043382210117215436, "clip_ratio/low_mean": 0.0004151835817083338, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008490056723076123, "completions/clipped_ratio": 0.021205357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 2339.0, "completions/mean_length": 612.5982666015625, "completions/mean_terminated_length": 537.131103515625, "completions/min_length": 120.0, "completions/min_terminated_length": 120.0, "epoch": 6.653251676873724, "grad_norm": 0.1337890625, "learning_rate": 1e-06, "loss": -0.0013, "num_tokens": 413912476.0, "reward": 0.4977678656578064, "reward_std": 0.21902543306350708, "rewards/verify_math_reward/mean": 0.4977678656578064, "rewards/verify_math_reward/std": 0.5002742409706116, "step": 712 }, { "clip_ratio/high_max": 0.0017356780263071414, "clip_ratio/high_mean": 0.0005065845016360981, "clip_ratio/low_mean": 0.00033234570923923457, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008389302265641163, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 2958.0, "completions/mean_length": 592.5435791015625, "completions/mean_terminated_length": 544.9852905273438, "completions/min_length": 79.0, "completions/min_terminated_length": 79.0, "epoch": 6.662583843686206, "grad_norm": 0.125, "learning_rate": 1e-06, "loss": -0.002, "num_tokens": 414485259.0, "reward": 0.574776828289032, "reward_std": 0.20072515308856964, "rewards/verify_math_reward/mean": 0.5747767686843872, "rewards/verify_math_reward/std": 0.49465295672416687, "step": 713 }, { "clip_ratio/high_max": 0.0015549954614471062, "clip_ratio/high_mean": 0.00040323632765648654, "clip_ratio/low_mean": 0.00026332150775942864, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0006665578289357654, "completions/clipped_ratio": 0.024553571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 4027.0, "completions/mean_length": 665.232177734375, "completions/mean_terminated_length": 578.8741455078125, "completions/min_length": 46.0, "completions/min_terminated_length": 46.0, "epoch": 6.671916010498688, "grad_norm": 0.1162109375, "learning_rate": 1e-06, "loss": 0.0033, "num_tokens": 415090307.0, "reward": 0.5234375, "reward_std": 0.16871507465839386, "rewards/verify_math_reward/mean": 0.5234375, "rewards/verify_math_reward/std": 0.49972933530807495, "step": 714 }, { "clip_ratio/high_max": 0.0016007302656362299, "clip_ratio/high_mean": 0.000484343542325405, "clip_ratio/low_mean": 0.0003593466458369221, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008436901816821774, "completions/clipped_ratio": 0.021205357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3986.0, "completions/mean_length": 640.3292846679688, "completions/mean_terminated_length": 565.4629516601562, "completions/min_length": 119.0, "completions/min_terminated_length": 119.0, "epoch": 6.681248177311169, "grad_norm": 0.1357421875, "learning_rate": 1e-06, "loss": -0.0023, "num_tokens": 415676954.0, "reward": 0.5647321939468384, "reward_std": 0.20824116468429565, "rewards/verify_math_reward/mean": 0.5647321343421936, "rewards/verify_math_reward/std": 0.49606895446777344, "step": 715 }, { "clip_ratio/high_max": 0.0015444225155079039, "clip_ratio/high_mean": 0.00040532365369472245, "clip_ratio/low_mean": 0.0003145333384964033, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007198569969659729, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 4070.0, "completions/mean_length": 600.1082763671875, "completions/mean_terminated_length": 556.656494140625, "completions/min_length": 11.0, "completions/min_terminated_length": 11.0, "epoch": 6.690580344123651, "grad_norm": 0.12255859375, "learning_rate": 1e-06, "loss": -0.004, "num_tokens": 416260347.0, "reward": 0.5078125, "reward_std": 0.1994103640317917, "rewards/verify_math_reward/mean": 0.5078125, "rewards/verify_math_reward/std": 0.5002182126045227, "step": 716 }, { "clip_ratio/high_max": 0.0017529315591673367, "clip_ratio/high_mean": 0.0005029141623253963, "clip_ratio/low_mean": 0.00026844842068385333, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007713625773249078, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3406.0, "completions/mean_length": 648.497802734375, "completions/mean_terminated_length": 593.7755126953125, "completions/min_length": 138.0, "completions/min_terminated_length": 138.0, "epoch": 6.699912510936133, "grad_norm": 0.11669921875, "learning_rate": 1e-06, "loss": -0.0055, "num_tokens": 416873065.0, "reward": 0.5368303656578064, "reward_std": 0.20204602181911469, "rewards/verify_math_reward/mean": 0.5368303656578064, "rewards/verify_math_reward/std": 0.49892017245292664, "step": 717 }, { "clip_ratio/high_max": 0.0016268120270979125, "clip_ratio/high_mean": 0.000511392492626328, "clip_ratio/low_mean": 0.0002864757314000599, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007978682178872987, "completions/clipped_ratio": 0.010044642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3151.0, "completions/mean_length": 599.5614013671875, "completions/mean_terminated_length": 564.0845336914062, "completions/min_length": 3.0, "completions/min_terminated_length": 3.0, "epoch": 6.7092446777486145, "grad_norm": 0.140625, "learning_rate": 1e-06, "loss": 0.0007, "num_tokens": 417457728.0, "reward": 0.582589328289032, "reward_std": 0.24017822742462158, "rewards/verify_math_reward/mean": 0.5825892686843872, "rewards/verify_math_reward/std": 0.493407279253006, "step": 718 }, { "clip_ratio/high_max": 0.001902307061754982, "clip_ratio/high_mean": 0.0006159242644798724, "clip_ratio/low_mean": 0.0003254028285937238, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009413270954610198, "completions/clipped_ratio": 0.0189732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 2462.0, "completions/mean_length": 592.3136596679688, "completions/mean_terminated_length": 524.5517578125, "completions/min_length": 88.0, "completions/min_terminated_length": 88.0, "epoch": 6.718576844561096, "grad_norm": 0.1328125, "learning_rate": 1e-06, "loss": -0.02, "num_tokens": 418017689.0, "reward": 0.5580357313156128, "reward_std": 0.23372025787830353, "rewards/verify_math_reward/mean": 0.5580357313156128, "rewards/verify_math_reward/std": 0.49689778685569763, "step": 719 }, { "clip_ratio/high_max": 0.0015699679061071947, "clip_ratio/high_mean": 0.0004537762915788335, "clip_ratio/low_mean": 0.0003439263783775459, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007977026662047138, "completions/clipped_ratio": 0.024553571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 2795.0, "completions/mean_length": 652.4866333007812, "completions/mean_terminated_length": 565.8077392578125, "completions/min_length": 141.0, "completions/min_terminated_length": 141.0, "epoch": 6.727909011373578, "grad_norm": 0.12109375, "learning_rate": 1e-06, "loss": 0.0074, "num_tokens": 418601125.0, "reward": 0.527901828289032, "reward_std": 0.2062469869852066, "rewards/verify_math_reward/mean": 0.5279017686843872, "rewards/verify_math_reward/std": 0.49949970841407776, "step": 720 }, { "clip_ratio/high_max": 0.0012845893561461708, "clip_ratio/high_mean": 0.0003507326888438911, "clip_ratio/low_mean": 0.0002174667658891849, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.000568199451208784, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3118.0, "completions/mean_length": 615.109375, "completions/mean_terminated_length": 563.86181640625, "completions/min_length": 145.0, "completions/min_terminated_length": 145.0, "epoch": 6.73724117818606, "grad_norm": 0.1162109375, "learning_rate": 1e-06, "loss": -0.0012, "num_tokens": 419179935.0, "reward": 0.5613839626312256, "reward_std": 0.1699492633342743, "rewards/verify_math_reward/mean": 0.5613839030265808, "rewards/verify_math_reward/std": 0.496494859457016, "step": 721 }, { "clip_ratio/high_max": 0.0013956112788946484, "clip_ratio/high_mean": 0.00044027766944054747, "clip_ratio/low_mean": 0.00026302494245555863, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007033026072349458, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3425.0, "completions/mean_length": 577.5714721679688, "completions/mean_terminated_length": 525.771240234375, "completions/min_length": 108.0, "completions/min_terminated_length": 108.0, "epoch": 6.746573344998541, "grad_norm": 0.1318359375, "learning_rate": 1e-06, "loss": 0.0023, "num_tokens": 419746967.0, "reward": 0.5491071939468384, "reward_std": 0.19170865416526794, "rewards/verify_math_reward/mean": 0.5491071343421936, "rewards/verify_math_reward/std": 0.49786055088043213, "step": 722 }, { "clip_ratio/high_max": 0.0014392837410923676, "clip_ratio/high_mean": 0.0004395788671445189, "clip_ratio/low_mean": 0.0002816496747755082, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007212285481728031, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3204.0, "completions/mean_length": 580.255615234375, "completions/mean_terminated_length": 532.5305786132812, "completions/min_length": 144.0, "completions/min_terminated_length": 144.0, "epoch": 6.755905511811024, "grad_norm": 0.1201171875, "learning_rate": 1e-06, "loss": 0.0065, "num_tokens": 420305660.0, "reward": 0.5546875, "reward_std": 0.1844968944787979, "rewards/verify_math_reward/mean": 0.5546875, "rewards/verify_math_reward/std": 0.4972778558731079, "step": 723 }, { "clip_ratio/high_max": 0.002062841680526617, "clip_ratio/high_mean": 0.0006532569404953392, "clip_ratio/low_mean": 0.00031706071627013444, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009703176574475947, "completions/clipped_ratio": 0.006696428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 1602.0, "completions/mean_length": 554.2623291015625, "completions/mean_terminated_length": 530.3853759765625, "completions/min_length": 117.0, "completions/min_terminated_length": 117.0, "epoch": 6.765237678623506, "grad_norm": 0.1318359375, "learning_rate": 1e-06, "loss": 0.0006, "num_tokens": 420858735.0, "reward": 0.6104910969734192, "reward_std": 0.23671838641166687, "rewards/verify_math_reward/mean": 0.6104910969734192, "rewards/verify_math_reward/std": 0.48791125416755676, "step": 724 }, { "clip_ratio/high_max": 0.0013924341101301252, "clip_ratio/high_mean": 0.00039767156886227895, "clip_ratio/low_mean": 0.0002900869521909044, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0006877585174152046, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3799.0, "completions/mean_length": 629.2210083007812, "completions/mean_terminated_length": 578.1812133789062, "completions/min_length": 132.0, "completions/min_terminated_length": 132.0, "epoch": 6.7745698454359875, "grad_norm": 0.119140625, "learning_rate": 1e-06, "loss": 0.0155, "num_tokens": 421454917.0, "reward": 0.5323660969734192, "reward_std": 0.19561494886875153, "rewards/verify_math_reward/mean": 0.5323660969734192, "rewards/verify_math_reward/std": 0.4992299973964691, "step": 725 }, { "clip_ratio/high_max": 0.001639011170482263, "clip_ratio/high_mean": 0.0005354585969143955, "clip_ratio/low_mean": 0.00034351671774857095, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008789753064775141, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3800.0, "completions/mean_length": 628.3783569335938, "completions/mean_terminated_length": 581.3065795898438, "completions/min_length": 134.0, "completions/min_terminated_length": 134.0, "epoch": 6.783902012248469, "grad_norm": 0.140625, "learning_rate": 1e-06, "loss": 0.0, "num_tokens": 422056240.0, "reward": 0.5736607313156128, "reward_std": 0.24310559034347534, "rewards/verify_math_reward/mean": 0.5736607313156128, "rewards/verify_math_reward/std": 0.4948205351829529, "step": 726 }, { "clip_ratio/high_max": 0.0014604899488404044, "clip_ratio/high_mean": 0.0004352309608748328, "clip_ratio/low_mean": 0.00034727866113826167, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007825096436135937, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 2662.0, "completions/mean_length": 620.208740234375, "completions/mean_terminated_length": 565.0374145507812, "completions/min_length": 13.0, "completions/min_terminated_length": 13.0, "epoch": 6.793234179060951, "grad_norm": 0.12890625, "learning_rate": 1e-06, "loss": -0.0019, "num_tokens": 422645003.0, "reward": 0.504464328289032, "reward_std": 0.23085565865039825, "rewards/verify_math_reward/mean": 0.5044642686843872, "rewards/verify_math_reward/std": 0.5002593398094177, "step": 727 }, { "clip_ratio/high_max": 0.0016713964269001735, "clip_ratio/high_mean": 0.0005271735949463618, "clip_ratio/low_mean": 0.00038745741767343134, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009146310057985829, "completions/clipped_ratio": 0.0200892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3814.0, "completions/mean_length": 653.46875, "completions/mean_terminated_length": 582.8929443359375, "completions/min_length": 82.0, "completions/min_terminated_length": 82.0, "epoch": 6.802566345873433, "grad_norm": 0.1298828125, "learning_rate": 1e-06, "loss": -0.0041, "num_tokens": 423240791.0, "reward": 0.4977678656578064, "reward_std": 0.22710509598255157, "rewards/verify_math_reward/mean": 0.4977678656578064, "rewards/verify_math_reward/std": 0.5002742409706116, "step": 728 }, { "clip_ratio/high_max": 0.0019422290079091908, "clip_ratio/high_mean": 0.0005786306226127635, "clip_ratio/low_mean": 0.0003068054797950026, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008854360980876663, "completions/clipped_ratio": 0.0200892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3729.0, "completions/mean_length": 651.4855346679688, "completions/mean_terminated_length": 580.8690185546875, "completions/min_length": 141.0, "completions/min_terminated_length": 141.0, "epoch": 6.811898512685914, "grad_norm": 0.1337890625, "learning_rate": 1e-06, "loss": -0.0068, "num_tokens": 423832170.0, "reward": 0.5535714626312256, "reward_std": 0.2062576562166214, "rewards/verify_math_reward/mean": 0.5535714030265808, "rewards/verify_math_reward/std": 0.4973994791507721, "step": 729 }, { "clip_ratio/high_max": 0.0014903579613019247, "clip_ratio/high_mean": 0.000479897981222166, "clip_ratio/low_mean": 0.000424975715304754, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009048736919794464, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3378.0, "completions/mean_length": 605.8236694335938, "completions/mean_terminated_length": 554.4393920898438, "completions/min_length": 130.0, "completions/min_terminated_length": 130.0, "epoch": 6.821230679498396, "grad_norm": 0.142578125, "learning_rate": 1e-06, "loss": -0.0108, "num_tokens": 424398276.0, "reward": 0.5011160969734192, "reward_std": 0.22436249256134033, "rewards/verify_math_reward/mean": 0.5011160969734192, "rewards/verify_math_reward/std": 0.5002779960632324, "step": 730 }, { "clip_ratio/high_max": 0.001639176145545207, "clip_ratio/high_mean": 0.0004777425956490333, "clip_ratio/low_mean": 0.00033155464973333437, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008092972320810077, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3994.0, "completions/mean_length": 620.1629638671875, "completions/mean_terminated_length": 572.9796752929688, "completions/min_length": 129.0, "completions/min_terminated_length": 129.0, "epoch": 6.830562846310878, "grad_norm": 0.1298828125, "learning_rate": 1e-06, "loss": -0.0008, "num_tokens": 424993590.0, "reward": 0.543526828289032, "reward_std": 0.19343462586402893, "rewards/verify_math_reward/mean": 0.5435267686843872, "rewards/verify_math_reward/std": 0.49838000535964966, "step": 731 }, { "clip_ratio/high_max": 0.0014302250756372814, "clip_ratio/high_mean": 0.0003572983173398825, "clip_ratio/low_mean": 0.000372949505390352, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007302478170458926, "completions/clipped_ratio": 0.024553571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3233.0, "completions/mean_length": 681.872802734375, "completions/mean_terminated_length": 595.93359375, "completions/min_length": 85.0, "completions/min_terminated_length": 85.0, "epoch": 6.83989501312336, "grad_norm": 0.1318359375, "learning_rate": 1e-06, "loss": -0.0098, "num_tokens": 425605020.0, "reward": 0.5345982313156128, "reward_std": 0.2198163866996765, "rewards/verify_math_reward/mean": 0.5345982313156128, "rewards/verify_math_reward/std": 0.4990801215171814, "step": 732 }, { "clip_ratio/high_max": 0.0015339939745899756, "clip_ratio/high_mean": 0.00047710767648823094, "clip_ratio/low_mean": 0.00034324261071105866, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008203502979995392, "completions/clipped_ratio": 0.010044642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3768.0, "completions/mean_length": 598.1473388671875, "completions/mean_terminated_length": 562.6561279296875, "completions/min_length": 95.0, "completions/min_terminated_length": 95.0, "epoch": 6.849227179935841, "grad_norm": 0.1318359375, "learning_rate": 1e-06, "loss": 0.0049, "num_tokens": 426205184.0, "reward": 0.5424107313156128, "reward_std": 0.2335616648197174, "rewards/verify_math_reward/mean": 0.5424107313156128, "rewards/verify_math_reward/std": 0.4984763562679291, "step": 733 }, { "clip_ratio/high_max": 0.0014596762493965798, "clip_ratio/high_mean": 0.00039061952406882483, "clip_ratio/low_mean": 0.0002955498315486693, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0006861693489099707, "completions/clipped_ratio": 0.0189732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 4042.0, "completions/mean_length": 620.6942138671875, "completions/mean_terminated_length": 553.481201171875, "completions/min_length": 118.0, "completions/min_terminated_length": 118.0, "epoch": 6.858559346748323, "grad_norm": 0.11767578125, "learning_rate": 1e-06, "loss": 0.0023, "num_tokens": 426780406.0, "reward": 0.5546875, "reward_std": 0.17281359434127808, "rewards/verify_math_reward/mean": 0.5546875, "rewards/verify_math_reward/std": 0.4972778558731079, "step": 734 }, { "clip_ratio/high_max": 0.0016730620045564137, "clip_ratio/high_mean": 0.00047316162499555503, "clip_ratio/low_mean": 0.0003248614663107219, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007980230843713798, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 4043.0, "completions/mean_length": 626.6808471679688, "completions/mean_terminated_length": 571.6122436523438, "completions/min_length": 81.0, "completions/min_terminated_length": 81.0, "epoch": 6.867891513560805, "grad_norm": 0.1259765625, "learning_rate": 1e-06, "loss": 0.0181, "num_tokens": 427384352.0, "reward": 0.5546875, "reward_std": 0.20756922662258148, "rewards/verify_math_reward/mean": 0.5546875, "rewards/verify_math_reward/std": 0.4972778558731079, "step": 735 }, { "clip_ratio/high_max": 0.0015789661929375143, "clip_ratio/high_mean": 0.0004630958856068901, "clip_ratio/low_mean": 0.0003535834071044519, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.000816679294075584, "completions/clipped_ratio": 0.010044642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3800.0, "completions/mean_length": 590.3381958007812, "completions/mean_terminated_length": 554.7677001953125, "completions/min_length": 116.0, "completions/min_terminated_length": 116.0, "epoch": 6.8772236803732865, "grad_norm": 0.130859375, "learning_rate": 1e-06, "loss": 0.0135, "num_tokens": 427976567.0, "reward": 0.5870535969734192, "reward_std": 0.20496748387813568, "rewards/verify_math_reward/mean": 0.5870535969734192, "rewards/verify_math_reward/std": 0.49263834953308105, "step": 736 }, { "clip_ratio/high_max": 0.0017882666834339034, "clip_ratio/high_mean": 0.0005872034359981626, "clip_ratio/low_mean": 0.0003146516877450267, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009018551154440502, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3476.0, "completions/mean_length": 584.255615234375, "completions/mean_terminated_length": 536.5848388671875, "completions/min_length": 78.0, "completions/min_terminated_length": 78.0, "epoch": 6.886555847185768, "grad_norm": 0.134765625, "learning_rate": 1e-06, "loss": 0.0059, "num_tokens": 428549924.0, "reward": 0.5658482313156128, "reward_std": 0.2166614681482315, "rewards/verify_math_reward/mean": 0.5658482313156128, "rewards/verify_math_reward/std": 0.49592188000679016, "step": 737 }, { "clip_ratio/high_max": 0.0013999576303831418, "clip_ratio/high_mean": 0.0004432034563706111, "clip_ratio/low_mean": 0.0003115712222552247, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007547746636191732, "completions/clipped_ratio": 0.0200892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3706.0, "completions/mean_length": 664.646240234375, "completions/mean_terminated_length": 594.299560546875, "completions/min_length": 161.0, "completions/min_terminated_length": 161.0, "epoch": 6.89588801399825, "grad_norm": 0.12451171875, "learning_rate": 1e-06, "loss": 0.0138, "num_tokens": 429165791.0, "reward": 0.5189732313156128, "reward_std": 0.20644131302833557, "rewards/verify_math_reward/mean": 0.5189732313156128, "rewards/verify_math_reward/std": 0.49991893768310547, "step": 738 }, { "clip_ratio/high_max": 0.001364655442557705, "clip_ratio/high_mean": 0.00037603266787300527, "clip_ratio/low_mean": 0.0002650274038842326, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0006410600663002697, "completions/clipped_ratio": 0.008928571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3938.0, "completions/mean_length": 599.0748291015625, "completions/mean_terminated_length": 567.5709838867188, "completions/min_length": 41.0, "completions/min_terminated_length": 41.0, "epoch": 6.905220180810732, "grad_norm": 0.1259765625, "learning_rate": 1e-06, "loss": 0.0014, "num_tokens": 429750978.0, "reward": 0.5725446939468384, "reward_std": 0.16927777230739594, "rewards/verify_math_reward/mean": 0.5725446343421936, "rewards/verify_math_reward/std": 0.49498558044433594, "step": 739 }, { "clip_ratio/high_max": 0.0013580989325419068, "clip_ratio/high_mean": 0.0003983628373589454, "clip_ratio/low_mean": 0.00028498169774593407, "clip_ratio/low_min": 6.322071840259014e-06, "clip_ratio/region_mean": 0.0006833445477241185, "completions/clipped_ratio": 0.0189732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3680.0, "completions/mean_length": 643.421875, "completions/mean_terminated_length": 576.6484375, "completions/min_length": 25.0, "completions/min_terminated_length": 25.0, "epoch": 6.914552347623214, "grad_norm": 0.1240234375, "learning_rate": 1e-06, "loss": 0.0061, "num_tokens": 430352252.0, "reward": 0.5334821939468384, "reward_std": 0.18611155450344086, "rewards/verify_math_reward/mean": 0.5334821343421936, "rewards/verify_math_reward/std": 0.49915632605552673, "step": 740 }, { "clip_ratio/high_max": 0.0017543586300234892, "clip_ratio/high_mean": 0.0005219036647758912, "clip_ratio/low_mean": 0.000338342642862699, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008602463058196008, "completions/clipped_ratio": 0.004464285714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 3953.0, "completions/mean_length": 546.8951416015625, "completions/mean_terminated_length": 530.9798583984375, "completions/min_length": 103.0, "completions/min_terminated_length": 103.0, "epoch": 6.923884514435695, "grad_norm": 0.1435546875, "learning_rate": 1e-06, "loss": 0.0093, "num_tokens": 430906374.0, "reward": 0.590401828289032, "reward_std": 0.22608637809753418, "rewards/verify_math_reward/mean": 0.5904017686843872, "rewards/verify_math_reward/std": 0.49203425645828247, "step": 741 }, { "clip_ratio/high_max": 0.0015941227948133019, "clip_ratio/high_mean": 0.0004207086522001191, "clip_ratio/low_mean": 0.0003866800755076838, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008073887197497243, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 4079.0, "completions/mean_length": 597.2689819335938, "completions/mean_terminated_length": 541.7335815429688, "completions/min_length": 101.0, "completions/min_terminated_length": 101.0, "epoch": 6.933216681248178, "grad_norm": 0.1298828125, "learning_rate": 1e-06, "loss": 0.004, "num_tokens": 431468687.0, "reward": 0.5613839626312256, "reward_std": 0.1962568461894989, "rewards/verify_math_reward/mean": 0.5613839030265808, "rewards/verify_math_reward/std": 0.496494859457016, "step": 742 }, { "clip_ratio/high_max": 0.0014094443440626492, "clip_ratio/high_mean": 0.00040140482110473386, "clip_ratio/low_mean": 0.0002468744604584572, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0006482792828137462, "completions/clipped_ratio": 0.004464285714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 2361.0, "completions/mean_length": 540.9788208007812, "completions/mean_terminated_length": 525.0370483398438, "completions/min_length": 148.0, "completions/min_terminated_length": 148.0, "epoch": 6.942548848060659, "grad_norm": 0.10986328125, "learning_rate": 1e-06, "loss": 0.0016, "num_tokens": 432031988.0, "reward": 0.5758928656578064, "reward_std": 0.16833347082138062, "rewards/verify_math_reward/mean": 0.5758928656578064, "rewards/verify_math_reward/std": 0.49448272585868835, "step": 743 }, { "clip_ratio/high_max": 0.0016836990744195646, "clip_ratio/high_mean": 0.000504533958974207, "clip_ratio/low_mean": 0.0003987863572092465, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009033203023136593, "completions/clipped_ratio": 0.022321428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3824.0, "completions/mean_length": 659.8370971679688, "completions/mean_terminated_length": 581.3858032226562, "completions/min_length": 128.0, "completions/min_terminated_length": 128.0, "epoch": 6.951881014873141, "grad_norm": 0.1298828125, "learning_rate": 1e-06, "loss": 0.0162, "num_tokens": 432627586.0, "reward": 0.4988839626312256, "reward_std": 0.2198163866996765, "rewards/verify_math_reward/mean": 0.4988839328289032, "rewards/verify_math_reward/std": 0.5002779960632324, "step": 744 }, { "clip_ratio/high_max": 0.0018406853705528192, "clip_ratio/high_mean": 0.0005424522314569913, "clip_ratio/low_mean": 0.00038954928982093406, "clip_ratio/low_min": 7.519249265897088e-06, "clip_ratio/region_mean": 0.0009320015251432778, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3735.0, "completions/mean_length": 635.1417846679688, "completions/mean_terminated_length": 576.216796875, "completions/min_length": 103.0, "completions/min_terminated_length": 103.0, "epoch": 6.961213181685623, "grad_norm": 0.1240234375, "learning_rate": 1e-06, "loss": -0.0021, "num_tokens": 433225793.0, "reward": 0.5412946939468384, "reward_std": 0.21830865740776062, "rewards/verify_math_reward/mean": 0.5412946343421936, "rewards/verify_math_reward/std": 0.49857014417648315, "step": 745 }, { "clip_ratio/high_max": 0.0019208669509680476, "clip_ratio/high_mean": 0.000601478594035143, "clip_ratio/low_mean": 0.0003656832822116485, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009671619141045085, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3292.0, "completions/mean_length": 602.5234375, "completions/mean_terminated_length": 551.090576171875, "completions/min_length": 120.0, "completions/min_terminated_length": 120.0, "epoch": 6.970545348498105, "grad_norm": 0.1337890625, "learning_rate": 1e-06, "loss": -0.0047, "num_tokens": 433803326.0, "reward": 0.543526828289032, "reward_std": 0.23976704478263855, "rewards/verify_math_reward/mean": 0.5435267686843872, "rewards/verify_math_reward/std": 0.49838000535964966, "step": 746 }, { "clip_ratio/high_max": 0.0015970139256751281, "clip_ratio/high_mean": 0.00045741989526959514, "clip_ratio/low_mean": 0.0003967752943481173, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008541951947336202, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 2383.0, "completions/mean_length": 594.7545166015625, "completions/mean_terminated_length": 547.2262573242188, "completions/min_length": 21.0, "completions/min_terminated_length": 21.0, "epoch": 6.979877515310586, "grad_norm": 0.125, "learning_rate": 1e-06, "loss": -0.0001, "num_tokens": 434373586.0, "reward": 0.5725446939468384, "reward_std": 0.20132221281528473, "rewards/verify_math_reward/mean": 0.5725446343421936, "rewards/verify_math_reward/std": 0.49498558044433594, "step": 747 }, { "clip_ratio/high_max": 0.0018936878550448455, "clip_ratio/high_mean": 0.0005422328524673503, "clip_ratio/low_mean": 0.0002730562445663054, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008152890968631255, "completions/clipped_ratio": 0.021205357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3853.0, "completions/mean_length": 613.2913208007812, "completions/mean_terminated_length": 537.8392333984375, "completions/min_length": 89.0, "completions/min_terminated_length": 89.0, "epoch": 6.989209682123068, "grad_norm": 0.1240234375, "learning_rate": 1e-06, "loss": 0.0002, "num_tokens": 434925647.0, "reward": 0.5959821939468384, "reward_std": 0.19535604119300842, "rewards/verify_math_reward/mean": 0.5959821343421936, "rewards/verify_math_reward/std": 0.490975022315979, "step": 748 }, { "clip_ratio/high_max": 0.0013596016688097734, "clip_ratio/high_mean": 0.00037905835620222206, "clip_ratio/low_mean": 0.00038117095652978605, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.000760229314892058, "completions/clipped_ratio": 0.005681818181818232, "completions/max_length": 4096.0, "completions/max_terminated_length": 4028.0, "completions/mean_length": 575.4716186523438, "completions/mean_terminated_length": 555.3543090820312, "completions/min_length": 113.0, "completions/min_terminated_length": 113.0, "epoch": 6.99854184893555, "grad_norm": 0.134765625, "learning_rate": 1e-06, "loss": 0.0004, "num_tokens": 435517554.0, "reward": 0.5111607313156128, "reward_std": 0.19948776066303253, "rewards/verify_math_reward/mean": 0.5111607313156128, "rewards/verify_math_reward/std": 0.5001546144485474, "step": 749 }, { "clip_ratio/high_max": 0.0014337654092742014, "clip_ratio/high_mean": 0.00040704050911699596, "clip_ratio/low_mean": 0.0005006092478652135, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009076497672140249, "completions/clipped_ratio": 0.0078125, "completions/max_length": 4096.0, "completions/max_terminated_length": 2166.0, "completions/mean_length": 578.4486694335938, "completions/mean_terminated_length": 550.7514038085938, "completions/min_length": 126.0, "completions/min_terminated_length": 126.0, "epoch": 7.009332166812482, "grad_norm": 0.1357421875, "learning_rate": 1e-06, "loss": 0.0055, "num_tokens": 436103060.0, "reward": 0.5412946939468384, "reward_std": 0.2098594754934311, "rewards/verify_math_reward/mean": 0.5412946343421936, "rewards/verify_math_reward/std": 0.49857014417648315, "step": 750 }, { "clip_ratio/high_max": 0.0016529791428183671, "clip_ratio/high_mean": 0.0004881096972439991, "clip_ratio/low_mean": 0.0002503577248944566, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007384674263448687, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 3644.0, "completions/mean_length": 584.1328125, "completions/mean_terminated_length": 540.4824829101562, "completions/min_length": 62.0, "completions/min_terminated_length": 62.0, "epoch": 7.0186643336249634, "grad_norm": 0.12890625, "learning_rate": 1e-06, "loss": 0.0108, "num_tokens": 436672571.0, "reward": 0.590401828289032, "reward_std": 0.20238234102725983, "rewards/verify_math_reward/mean": 0.5904017686843872, "rewards/verify_math_reward/std": 0.49203425645828247, "step": 751 }, { "clip_ratio/high_max": 0.0016284942066704389, "clip_ratio/high_mean": 0.00045171342856065166, "clip_ratio/low_mean": 0.00042511201991146663, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008768254574533785, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3379.0, "completions/mean_length": 607.5379638671875, "completions/mean_terminated_length": 556.1788940429688, "completions/min_length": 128.0, "completions/min_terminated_length": 128.0, "epoch": 7.027996500437445, "grad_norm": 0.1357421875, "learning_rate": 1e-06, "loss": -0.0058, "num_tokens": 437247989.0, "reward": 0.5546875, "reward_std": 0.22462210059165955, "rewards/verify_math_reward/mean": 0.5546875, "rewards/verify_math_reward/std": 0.4972778558731079, "step": 752 }, { "clip_ratio/high_max": 0.0018130133648810443, "clip_ratio/high_mean": 0.0005223414323154429, "clip_ratio/low_mean": 0.0003100730389178352, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008324144719153992, "completions/clipped_ratio": 0.008928571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3473.0, "completions/mean_length": 620.0045166015625, "completions/mean_terminated_length": 588.689208984375, "completions/min_length": 126.0, "completions/min_terminated_length": 126.0, "epoch": 7.037328667249927, "grad_norm": 0.126953125, "learning_rate": 1e-06, "loss": -0.0031, "num_tokens": 437856401.0, "reward": 0.5502232313156128, "reward_std": 0.21970760822296143, "rewards/verify_math_reward/mean": 0.5502232313156128, "rewards/verify_math_reward/std": 0.49774909019470215, "step": 753 }, { "clip_ratio/high_max": 0.0014793316986470018, "clip_ratio/high_mean": 0.00046869984316799673, "clip_ratio/low_mean": 0.0003304205778249525, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007991204211066361, "completions/clipped_ratio": 0.0234375, "completions/max_length": 4096.0, "completions/max_terminated_length": 2394.0, "completions/mean_length": 624.075927734375, "completions/mean_terminated_length": 540.7496948242188, "completions/min_length": 127.0, "completions/min_terminated_length": 127.0, "epoch": 7.046660834062409, "grad_norm": 0.134765625, "learning_rate": 1e-06, "loss": -0.0113, "num_tokens": 438416461.0, "reward": 0.5691964626312256, "reward_std": 0.19422808289527893, "rewards/verify_math_reward/mean": 0.5691964030265808, "rewards/verify_math_reward/std": 0.4954652488231659, "step": 754 }, { "clip_ratio/high_max": 0.0014441558842008817, "clip_ratio/high_mean": 0.0004057947363662606, "clip_ratio/low_mean": 0.00032311120219219447, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007289059467439074, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 2099.0, "completions/mean_length": 617.875, "completions/mean_terminated_length": 570.66064453125, "completions/min_length": 11.0, "completions/min_terminated_length": 11.0, "epoch": 7.05599300087489, "grad_norm": 0.126953125, "learning_rate": 1e-06, "loss": -0.0098, "num_tokens": 439021605.0, "reward": 0.546875, "reward_std": 0.2086530178785324, "rewards/verify_math_reward/mean": 0.546875, "rewards/verify_math_reward/std": 0.4980759024620056, "step": 755 }, { "clip_ratio/high_max": 0.001188702220133564, "clip_ratio/high_mean": 0.0003310770450752898, "clip_ratio/low_mean": 0.00032470773794557317, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0006557847978001519, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 4087.0, "completions/mean_length": 646.9676513671875, "completions/mean_terminated_length": 584.2579345703125, "completions/min_length": 151.0, "completions/min_terminated_length": 151.0, "epoch": 7.065325167687372, "grad_norm": 0.123046875, "learning_rate": 1e-06, "loss": 0.0001, "num_tokens": 439624824.0, "reward": 0.5625, "reward_std": 0.19361938536167145, "rewards/verify_math_reward/mean": 0.5625, "rewards/verify_math_reward/std": 0.49635544419288635, "step": 756 }, { "clip_ratio/high_max": 0.0017265345031773904, "clip_ratio/high_mean": 0.0005095569831610192, "clip_ratio/low_mean": 0.0002730259932377521, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007825829634384718, "completions/clipped_ratio": 0.008928571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3326.0, "completions/mean_length": 564.208740234375, "completions/mean_terminated_length": 532.3908081054688, "completions/min_length": 134.0, "completions/min_terminated_length": 134.0, "epoch": 7.074657334499854, "grad_norm": 0.12060546875, "learning_rate": 1e-06, "loss": 0.0015, "num_tokens": 440184827.0, "reward": 0.5915178656578064, "reward_std": 0.18960639834403992, "rewards/verify_math_reward/mean": 0.5915178656578064, "rewards/verify_math_reward/std": 0.49182769656181335, "step": 757 }, { "clip_ratio/high_max": 0.001866878319560783, "clip_ratio/high_mean": 0.0005475571178976679, "clip_ratio/low_mean": 0.0004235813817103917, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009711385109767434, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3874.0, "completions/mean_length": 613.9129638671875, "completions/mean_terminated_length": 550.6022338867188, "completions/min_length": 76.0, "completions/min_terminated_length": 76.0, "epoch": 7.083989501312336, "grad_norm": 0.1484375, "learning_rate": 1e-06, "loss": -0.006, "num_tokens": 440757349.0, "reward": 0.5334821939468384, "reward_std": 0.2717750668525696, "rewards/verify_math_reward/mean": 0.5334821343421936, "rewards/verify_math_reward/std": 0.49915632605552673, "step": 758 }, { "clip_ratio/high_max": 0.0018774439668050036, "clip_ratio/high_mean": 0.0006037406417362945, "clip_ratio/low_mean": 0.00038811334184174484, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009918539881255128, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3606.0, "completions/mean_length": 592.1763916015625, "completions/mean_terminated_length": 532.5198974609375, "completions/min_length": 93.0, "completions/min_terminated_length": 93.0, "epoch": 7.093321668124818, "grad_norm": 0.1376953125, "learning_rate": 1e-06, "loss": -0.0027, "num_tokens": 441318699.0, "reward": 0.5323660969734192, "reward_std": 0.22300560772418976, "rewards/verify_math_reward/mean": 0.5323660969734192, "rewards/verify_math_reward/std": 0.4992299973964691, "step": 759 }, { "clip_ratio/high_max": 0.0019466903140710201, "clip_ratio/high_mean": 0.0005515515213119215, "clip_ratio/low_mean": 0.0003898823424606235, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009414338574060821, "completions/clipped_ratio": 0.0078125, "completions/max_length": 4096.0, "completions/max_terminated_length": 3343.0, "completions/mean_length": 599.1027221679688, "completions/mean_terminated_length": 571.5680541992188, "completions/min_length": 125.0, "completions/min_terminated_length": 125.0, "epoch": 7.1026538349373, "grad_norm": 0.130859375, "learning_rate": 1e-06, "loss": -0.004, "num_tokens": 441918743.0, "reward": 0.5491071939468384, "reward_std": 0.20474882423877716, "rewards/verify_math_reward/mean": 0.5491071343421936, "rewards/verify_math_reward/std": 0.49786055088043213, "step": 760 }, { "clip_ratio/high_max": 0.0017717557166179176, "clip_ratio/high_mean": 0.000545461072647413, "clip_ratio/low_mean": 0.000388486207157257, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.000933947267185431, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 2396.0, "completions/mean_length": 605.9765625, "completions/mean_terminated_length": 542.5215454101562, "completions/min_length": 129.0, "completions/min_terminated_length": 129.0, "epoch": 7.111986001749782, "grad_norm": 0.1328125, "learning_rate": 1e-06, "loss": 0.0163, "num_tokens": 442478890.0, "reward": 0.5636160969734192, "reward_std": 0.23124006390571594, "rewards/verify_math_reward/mean": 0.5636160969734192, "rewards/verify_math_reward/std": 0.49621346592903137, "step": 761 }, { "clip_ratio/high_max": 0.0013604059067802154, "clip_ratio/high_mean": 0.0004352437889565408, "clip_ratio/low_mean": 0.0003454785612575506, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.000780722353283636, "completions/clipped_ratio": 0.0189732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3919.0, "completions/mean_length": 667.7265625, "completions/mean_terminated_length": 601.4232177734375, "completions/min_length": 131.0, "completions/min_terminated_length": 131.0, "epoch": 7.121318168562263, "grad_norm": 0.11865234375, "learning_rate": 1e-06, "loss": 0.0092, "num_tokens": 443097885.0, "reward": 0.5345982313156128, "reward_std": 0.19700251519680023, "rewards/verify_math_reward/mean": 0.5345982313156128, "rewards/verify_math_reward/std": 0.4990801215171814, "step": 762 }, { "clip_ratio/high_max": 0.0014831846856395714, "clip_ratio/high_mean": 0.0003804415205195255, "clip_ratio/low_mean": 0.0004031428738926479, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007835843871362158, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3097.0, "completions/mean_length": 609.3080444335938, "completions/mean_terminated_length": 545.9136352539062, "completions/min_length": 125.0, "completions/min_terminated_length": 125.0, "epoch": 7.130650335374745, "grad_norm": 0.138671875, "learning_rate": 1e-06, "loss": 0.0103, "num_tokens": 443673041.0, "reward": 0.504464328289032, "reward_std": 0.22262795269489288, "rewards/verify_math_reward/mean": 0.5044642686843872, "rewards/verify_math_reward/std": 0.5002593398094177, "step": 763 }, { "clip_ratio/high_max": 0.001707993393210927, "clip_ratio/high_mean": 0.00048011166109063197, "clip_ratio/low_mean": 0.00033218250530353544, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008122941662804806, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 3149.0, "completions/mean_length": 603.8671875, "completions/mean_terminated_length": 560.462158203125, "completions/min_length": 111.0, "completions/min_terminated_length": 111.0, "epoch": 7.139982502187227, "grad_norm": 0.1220703125, "learning_rate": 1e-06, "loss": 0.0016, "num_tokens": 444249250.0, "reward": 0.578125, "reward_std": 0.1947498470544815, "rewards/verify_math_reward/mean": 0.578125, "rewards/verify_math_reward/std": 0.4941346049308777, "step": 764 }, { "clip_ratio/high_max": 0.0014983212004153756, "clip_ratio/high_mean": 0.00043512692445801804, "clip_ratio/low_mean": 0.000351197007034898, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.000786323927968624, "completions/clipped_ratio": 0.0200892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3946.0, "completions/mean_length": 653.546875, "completions/mean_terminated_length": 582.9727172851562, "completions/min_length": 125.0, "completions/min_terminated_length": 125.0, "epoch": 7.1493146689997085, "grad_norm": 0.126953125, "learning_rate": 1e-06, "loss": 0.0108, "num_tokens": 444860572.0, "reward": 0.527901828289032, "reward_std": 0.2144797444343567, "rewards/verify_math_reward/mean": 0.5279017686843872, "rewards/verify_math_reward/std": 0.49949970841407776, "step": 765 }, { "clip_ratio/high_max": 0.0017015719322444056, "clip_ratio/high_mean": 0.0005034475834690966, "clip_ratio/low_mean": 0.00027002869319403544, "clip_ratio/low_min": 1.3130252227711026e-05, "clip_ratio/region_mean": 0.0007734762771178794, "completions/clipped_ratio": 0.024553571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 2724.0, "completions/mean_length": 632.859375, "completions/mean_terminated_length": 545.6864624023438, "completions/min_length": 124.0, "completions/min_terminated_length": 124.0, "epoch": 7.15864683581219, "grad_norm": 0.1298828125, "learning_rate": 1e-06, "loss": 0.0029, "num_tokens": 445420454.0, "reward": 0.574776828289032, "reward_std": 0.20835062861442566, "rewards/verify_math_reward/mean": 0.5747767686843872, "rewards/verify_math_reward/std": 0.49465295672416687, "step": 766 }, { "clip_ratio/high_max": 0.0014358019634528318, "clip_ratio/high_mean": 0.00041219516538149037, "clip_ratio/low_mean": 0.00035338151928954176, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007655766912648687, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3487.0, "completions/mean_length": 632.9006958007812, "completions/mean_terminated_length": 581.9150390625, "completions/min_length": 156.0, "completions/min_terminated_length": 156.0, "epoch": 7.167979002624672, "grad_norm": 0.1318359375, "learning_rate": 1e-06, "loss": 0.0155, "num_tokens": 446027669.0, "reward": 0.4921875298023224, "reward_std": 0.21943660080432892, "rewards/verify_math_reward/mean": 0.4921875, "rewards/verify_math_reward/std": 0.5002182126045227, "step": 767 }, { "clip_ratio/high_max": 0.0017051438917405903, "clip_ratio/high_mean": 0.0005269981031688076, "clip_ratio/low_mean": 0.0003154945430878797, "clip_ratio/low_min": 1.4070238648855593e-05, "clip_ratio/region_mean": 0.000842492652736837, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 2211.0, "completions/mean_length": 593.2332763671875, "completions/mean_terminated_length": 549.696044921875, "completions/min_length": 4.0, "completions/min_terminated_length": 4.0, "epoch": 7.177311169437154, "grad_norm": 0.1337890625, "learning_rate": 1e-06, "loss": -0.0006, "num_tokens": 446600230.0, "reward": 0.5267857313156128, "reward_std": 0.2086516171693802, "rewards/verify_math_reward/mean": 0.5267857313156128, "rewards/verify_math_reward/std": 0.4995608627796173, "step": 768 }, { "clip_ratio/high_max": 0.0013604813557321904, "clip_ratio/high_mean": 0.00041439705387347203, "clip_ratio/low_mean": 0.0003031894350442599, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007175864857345005, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 2965.0, "completions/mean_length": 597.5982666015625, "completions/mean_terminated_length": 550.108642578125, "completions/min_length": 153.0, "completions/min_terminated_length": 153.0, "epoch": 7.186643336249635, "grad_norm": 0.123046875, "learning_rate": 1e-06, "loss": 0.0081, "num_tokens": 447174566.0, "reward": 0.5736607313156128, "reward_std": 0.21301396191120148, "rewards/verify_math_reward/mean": 0.5736607313156128, "rewards/verify_math_reward/std": 0.4948205351829529, "step": 769 }, { "clip_ratio/high_max": 0.0016332325531038805, "clip_ratio/high_mean": 0.000469606401566125, "clip_ratio/low_mean": 0.00035437491192169546, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008239813132604468, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3869.0, "completions/mean_length": 621.4207763671875, "completions/mean_terminated_length": 562.26220703125, "completions/min_length": 111.0, "completions/min_terminated_length": 111.0, "epoch": 7.195975503062117, "grad_norm": 0.1298828125, "learning_rate": 1e-06, "loss": 0.0138, "num_tokens": 447762439.0, "reward": 0.5714285969734192, "reward_std": 0.1998663991689682, "rewards/verify_math_reward/mean": 0.5714285969734192, "rewards/verify_math_reward/std": 0.49514803290367126, "step": 770 }, { "clip_ratio/high_max": 0.0015349312843682128, "clip_ratio/high_mean": 0.0004354138745839009, "clip_ratio/low_mean": 0.00032673705868546676, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007621509257660364, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 4047.0, "completions/mean_length": 625.669677734375, "completions/mean_terminated_length": 586.5011596679688, "completions/min_length": 82.0, "completions/min_terminated_length": 82.0, "epoch": 7.205307669874599, "grad_norm": 0.1142578125, "learning_rate": 1e-06, "loss": 0.0007, "num_tokens": 448367511.0, "reward": 0.5111607313156128, "reward_std": 0.20842549204826355, "rewards/verify_math_reward/mean": 0.5111607313156128, "rewards/verify_math_reward/std": 0.5001546144485474, "step": 771 }, { "clip_ratio/high_max": 0.0017207709597641951, "clip_ratio/high_mean": 0.0005706156265432583, "clip_ratio/low_mean": 0.00045667639528801374, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010272920326315216, "completions/clipped_ratio": 0.010044642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3449.0, "completions/mean_length": 612.0145263671875, "completions/mean_terminated_length": 576.6640014648438, "completions/min_length": 113.0, "completions/min_terminated_length": 113.0, "epoch": 7.2146398366870805, "grad_norm": 0.12890625, "learning_rate": 1e-06, "loss": 0.0129, "num_tokens": 448972404.0, "reward": 0.5837053656578064, "reward_std": 0.21751701831817627, "rewards/verify_math_reward/mean": 0.5837053656578064, "rewards/verify_math_reward/std": 0.49321892857551575, "step": 772 }, { "clip_ratio/high_max": 0.0015187498365776264, "clip_ratio/high_mean": 0.00037981771674822085, "clip_ratio/low_mean": 0.0003381844456953331, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.000718002160283504, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 2319.0, "completions/mean_length": 558.6529541015625, "completions/mean_terminated_length": 510.6346435546875, "completions/min_length": 129.0, "completions/min_terminated_length": 129.0, "epoch": 7.223972003499562, "grad_norm": 0.1337890625, "learning_rate": 1e-06, "loss": 0.0023, "num_tokens": 449518685.0, "reward": 0.5323660969734192, "reward_std": 0.1911466419696808, "rewards/verify_math_reward/mean": 0.5323660969734192, "rewards/verify_math_reward/std": 0.4992299973964691, "step": 773 }, { "clip_ratio/high_max": 0.0015405614449264249, "clip_ratio/high_mean": 0.00047592004079888284, "clip_ratio/low_mean": 0.00040787546913634287, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008837955238050199, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 3259.0, "completions/mean_length": 583.0491333007812, "completions/mean_terminated_length": 539.3853149414062, "completions/min_length": 123.0, "completions/min_terminated_length": 123.0, "epoch": 7.233304170312044, "grad_norm": 0.142578125, "learning_rate": 1e-06, "loss": -0.0006, "num_tokens": 450087857.0, "reward": 0.5256696939468384, "reward_std": 0.2395801991224289, "rewards/verify_math_reward/mean": 0.5256696343421936, "rewards/verify_math_reward/std": 0.4996195137500763, "step": 774 }, { "clip_ratio/high_max": 0.001768720663676504, "clip_ratio/high_mean": 0.0005720045446651056, "clip_ratio/low_mean": 0.00032422032188605954, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.000896224875759799, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3860.0, "completions/mean_length": 672.036865234375, "completions/mean_terminated_length": 613.7401123046875, "completions/min_length": 164.0, "completions/min_terminated_length": 164.0, "epoch": 7.242636337124526, "grad_norm": 0.1259765625, "learning_rate": 1e-06, "loss": 0.0145, "num_tokens": 450719850.0, "reward": 0.5758928656578064, "reward_std": 0.22199852764606476, "rewards/verify_math_reward/mean": 0.5758928656578064, "rewards/verify_math_reward/std": 0.49448275566101074, "step": 775 }, { "clip_ratio/high_max": 0.0014075522412895225, "clip_ratio/high_mean": 0.0003883136796503095, "clip_ratio/low_mean": 0.00035391475341839396, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007422284356835007, "completions/clipped_ratio": 0.0200892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3844.0, "completions/mean_length": 656.575927734375, "completions/mean_terminated_length": 586.0637817382812, "completions/min_length": 104.0, "completions/min_terminated_length": 104.0, "epoch": 7.251968503937007, "grad_norm": 0.125, "learning_rate": 1e-06, "loss": 0.006, "num_tokens": 451320398.0, "reward": 0.566964328289032, "reward_std": 0.21271198987960815, "rewards/verify_math_reward/mean": 0.5669642686843872, "rewards/verify_math_reward/std": 0.49577224254608154, "step": 776 }, { "clip_ratio/high_max": 0.0017032995419867802, "clip_ratio/high_mean": 0.0004951114142386359, "clip_ratio/low_mean": 0.00035783809050826676, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008529495089533157, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 2585.0, "completions/mean_length": 572.6171875, "completions/mean_terminated_length": 524.7885131835938, "completions/min_length": 132.0, "completions/min_terminated_length": 132.0, "epoch": 7.26130067074949, "grad_norm": 0.1298828125, "learning_rate": 1e-06, "loss": -0.0051, "num_tokens": 451866575.0, "reward": 0.6026785969734192, "reward_std": 0.20388300716876984, "rewards/verify_math_reward/mean": 0.6026785969734192, "rewards/verify_math_reward/std": 0.48961687088012695, "step": 777 }, { "clip_ratio/high_max": 0.0016962697645794833, "clip_ratio/high_mean": 0.0005330779235919181, "clip_ratio/low_mean": 0.00035467849852466315, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008877564205249655, "completions/clipped_ratio": 0.0189732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 1960.0, "completions/mean_length": 625.5814819335938, "completions/mean_terminated_length": 558.4630126953125, "completions/min_length": 76.0, "completions/min_terminated_length": 76.0, "epoch": 7.270632837561972, "grad_norm": 0.140625, "learning_rate": 1e-06, "loss": -0.0035, "num_tokens": 452442936.0, "reward": 0.5580357313156128, "reward_std": 0.23334047198295593, "rewards/verify_math_reward/mean": 0.5580357313156128, "rewards/verify_math_reward/std": 0.49689781665802, "step": 778 }, { "clip_ratio/high_max": 0.0016587730769970221, "clip_ratio/high_mean": 0.0004879003981841379, "clip_ratio/low_mean": 0.00035009510349937045, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008379955088457791, "completions/clipped_ratio": 0.021205357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3254.0, "completions/mean_length": 680.1529541015625, "completions/mean_terminated_length": 606.1493530273438, "completions/min_length": 185.0, "completions/min_terminated_length": 185.0, "epoch": 7.2799650043744535, "grad_norm": 0.12158203125, "learning_rate": 1e-06, "loss": -0.0053, "num_tokens": 453067065.0, "reward": 0.4888392984867096, "reward_std": 0.2131224423646927, "rewards/verify_math_reward/mean": 0.4888392984867096, "rewards/verify_math_reward/std": 0.5001546144485474, "step": 779 }, { "clip_ratio/high_max": 0.0017931463626155164, "clip_ratio/high_mean": 0.0005471243384818081, "clip_ratio/low_mean": 0.00044033032418155926, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009874546667560935, "completions/clipped_ratio": 0.0200892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 1901.0, "completions/mean_length": 599.9408569335938, "completions/mean_terminated_length": 528.2677001953125, "completions/min_length": 96.0, "completions/min_terminated_length": 96.0, "epoch": 7.289297171186935, "grad_norm": 0.1396484375, "learning_rate": 1e-06, "loss": -0.0168, "num_tokens": 453619108.0, "reward": 0.535714328289032, "reward_std": 0.2287980318069458, "rewards/verify_math_reward/mean": 0.5357142686843872, "rewards/verify_math_reward/std": 0.4990014135837555, "step": 780 }, { "clip_ratio/high_max": 0.0018340009501116583, "clip_ratio/high_mean": 0.0005197865559694037, "clip_ratio/low_mean": 0.00035022596580347454, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.00087001253177732, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 2557.0, "completions/mean_length": 591.4542846679688, "completions/mean_terminated_length": 543.8812255859375, "completions/min_length": 107.0, "completions/min_terminated_length": 107.0, "epoch": 7.298629337999417, "grad_norm": 0.1416015625, "learning_rate": 1e-06, "loss": 0.0074, "num_tokens": 454182203.0, "reward": 0.598214328289032, "reward_std": 0.21177834272384644, "rewards/verify_math_reward/mean": 0.5982142686843872, "rewards/verify_math_reward/std": 0.49053287506103516, "step": 781 }, { "clip_ratio/high_max": 0.0014043086439414765, "clip_ratio/high_mean": 0.000403375347104884, "clip_ratio/low_mean": 0.0004151679099777539, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008185432534446591, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3706.0, "completions/mean_length": 634.075927734375, "completions/mean_terminated_length": 583.1075439453125, "completions/min_length": 152.0, "completions/min_terminated_length": 152.0, "epoch": 7.307961504811899, "grad_norm": 0.1337890625, "learning_rate": 1e-06, "loss": -0.0089, "num_tokens": 454790495.0, "reward": 0.4776785969734192, "reward_std": 0.23401561379432678, "rewards/verify_math_reward/mean": 0.4776785671710968, "rewards/verify_math_reward/std": 0.4997805058956146, "step": 782 }, { "clip_ratio/high_max": 0.0015813286304364738, "clip_ratio/high_mean": 0.00044801169804031815, "clip_ratio/low_mean": 0.0002970719417589862, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007450836478710698, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3505.0, "completions/mean_length": 622.0848388671875, "completions/mean_terminated_length": 570.93994140625, "completions/min_length": 139.0, "completions/min_terminated_length": 139.0, "epoch": 7.31729367162438, "grad_norm": 0.1279296875, "learning_rate": 1e-06, "loss": 0.016, "num_tokens": 455379979.0, "reward": 0.5580357313156128, "reward_std": 0.19238406419754028, "rewards/verify_math_reward/mean": 0.5580357313156128, "rewards/verify_math_reward/std": 0.49689781665802, "step": 783 }, { "clip_ratio/high_max": 0.001472742427722551, "clip_ratio/high_mean": 0.00048223072303699155, "clip_ratio/low_mean": 0.0003173981750705934, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007996289045877347, "completions/clipped_ratio": 0.0200892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3856.0, "completions/mean_length": 605.130615234375, "completions/mean_terminated_length": 533.5637817382812, "completions/min_length": 128.0, "completions/min_terminated_length": 128.0, "epoch": 7.326625838436862, "grad_norm": 0.1259765625, "learning_rate": 1e-06, "loss": -0.0162, "num_tokens": 455938392.0, "reward": 0.5926339626312256, "reward_std": 0.19986683130264282, "rewards/verify_math_reward/mean": 0.5926339030265808, "rewards/verify_math_reward/std": 0.49161848425865173, "step": 784 }, { "clip_ratio/high_max": 0.0015499775236094138, "clip_ratio/high_mean": 0.00046867967193975346, "clip_ratio/low_mean": 0.00030276298502940335, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007714426546954201, "completions/clipped_ratio": 0.006696428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 2070.0, "completions/mean_length": 566.927490234375, "completions/mean_terminated_length": 543.135986328125, "completions/min_length": 4.0, "completions/min_terminated_length": 4.0, "epoch": 7.335958005249344, "grad_norm": 0.115234375, "learning_rate": 1e-06, "loss": -0.0012, "num_tokens": 456503423.0, "reward": 0.582589328289032, "reward_std": 0.19365398585796356, "rewards/verify_math_reward/mean": 0.5825892686843872, "rewards/verify_math_reward/std": 0.493407279253006, "step": 785 }, { "clip_ratio/high_max": 0.001369633419017191, "clip_ratio/high_mean": 0.0004456426138403913, "clip_ratio/low_mean": 0.00035350752568774624, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007991501388460165, "completions/clipped_ratio": 0.008928571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3906.0, "completions/mean_length": 588.0223388671875, "completions/mean_terminated_length": 556.4189453125, "completions/min_length": 116.0, "completions/min_terminated_length": 116.0, "epoch": 7.3452901720618256, "grad_norm": 0.1259765625, "learning_rate": 1e-06, "loss": 0.0054, "num_tokens": 457087507.0, "reward": 0.5011160969734192, "reward_std": 0.19219790399074554, "rewards/verify_math_reward/mean": 0.5011160969734192, "rewards/verify_math_reward/std": 0.5002779960632324, "step": 786 }, { "clip_ratio/high_max": 0.0013568450876846327, "clip_ratio/high_mean": 0.0004112453474363065, "clip_ratio/low_mean": 0.00037408362936730555, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007853289989725454, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3739.0, "completions/mean_length": 626.2444458007812, "completions/mean_terminated_length": 575.1608276367188, "completions/min_length": 94.0, "completions/min_terminated_length": 94.0, "epoch": 7.354622338874307, "grad_norm": 0.11865234375, "learning_rate": 1e-06, "loss": 0.0184, "num_tokens": 457675630.0, "reward": 0.5491071939468384, "reward_std": 0.20520275831222534, "rewards/verify_math_reward/mean": 0.5491071343421936, "rewards/verify_math_reward/std": 0.49786055088043213, "step": 787 }, { "clip_ratio/high_max": 0.0014830811724095838, "clip_ratio/high_mean": 0.00043378133682381304, "clip_ratio/low_mean": 0.0003535696736207683, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007873510039644316, "completions/clipped_ratio": 0.010044642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 4031.0, "completions/mean_length": 621.7857666015625, "completions/mean_terminated_length": 586.5343627929688, "completions/min_length": 95.0, "completions/min_terminated_length": 95.0, "epoch": 7.363954505686789, "grad_norm": 0.1220703125, "learning_rate": 1e-06, "loss": 0.0101, "num_tokens": 458291454.0, "reward": 0.5647321939468384, "reward_std": 0.18915246427059174, "rewards/verify_math_reward/mean": 0.5647321343421936, "rewards/verify_math_reward/std": 0.49606895446777344, "step": 788 }, { "clip_ratio/high_max": 0.0015857654088904383, "clip_ratio/high_mean": 0.00044632711296799243, "clip_ratio/low_mean": 0.0003439245534764268, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007902516708782059, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 4046.0, "completions/mean_length": 634.1730346679688, "completions/mean_terminated_length": 571.2306518554688, "completions/min_length": 41.0, "completions/min_terminated_length": 41.0, "epoch": 7.373286672499271, "grad_norm": 0.123046875, "learning_rate": 1e-06, "loss": 0.01, "num_tokens": 458879977.0, "reward": 0.5948660969734192, "reward_std": 0.20166738331317902, "rewards/verify_math_reward/mean": 0.5948660969734192, "rewards/verify_math_reward/std": 0.49119213223457336, "step": 789 }, { "clip_ratio/high_max": 0.00176456842564221, "clip_ratio/high_mean": 0.0005661145551130176, "clip_ratio/low_mean": 0.00035024219062052, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009163567383438931, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3104.0, "completions/mean_length": 566.5301513671875, "completions/mean_terminated_length": 518.6187744140625, "completions/min_length": 64.0, "completions/min_terminated_length": 64.0, "epoch": 7.3826188393117524, "grad_norm": 0.1513671875, "learning_rate": 1e-06, "loss": 0.0113, "num_tokens": 459428436.0, "reward": 0.5714285969734192, "reward_std": 0.2574998438358307, "rewards/verify_math_reward/mean": 0.5714285969734192, "rewards/verify_math_reward/std": 0.49514803290367126, "step": 790 }, { "clip_ratio/high_max": 0.0016188510389838484, "clip_ratio/high_mean": 0.00048283214073308045, "clip_ratio/low_mean": 0.000309131930634976, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007919640747786616, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 3700.0, "completions/mean_length": 582.4330444335938, "completions/mean_terminated_length": 538.7615966796875, "completions/min_length": 72.0, "completions/min_terminated_length": 72.0, "epoch": 7.391951006124234, "grad_norm": 0.1259765625, "learning_rate": 1e-06, "loss": 0.0229, "num_tokens": 459995816.0, "reward": 0.5959821939468384, "reward_std": 0.19685347378253937, "rewards/verify_math_reward/mean": 0.5959821343421936, "rewards/verify_math_reward/std": 0.490975022315979, "step": 791 }, { "clip_ratio/high_max": 0.0018546927003626479, "clip_ratio/high_mean": 0.0005647272500937106, "clip_ratio/low_mean": 0.00031266814812624943, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008773953923082445, "completions/clipped_ratio": 0.021205357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 2424.0, "completions/mean_length": 590.0123291015625, "completions/mean_terminated_length": 514.0558471679688, "completions/min_length": 36.0, "completions/min_terminated_length": 36.0, "epoch": 7.401283172936716, "grad_norm": 0.13671875, "learning_rate": 1e-06, "loss": -0.0044, "num_tokens": 460544315.0, "reward": 0.5546875, "reward_std": 0.21533779799938202, "rewards/verify_math_reward/mean": 0.5546875, "rewards/verify_math_reward/std": 0.4972778558731079, "step": 792 }, { "clip_ratio/high_max": 0.0016667567588228849, "clip_ratio/high_mean": 0.00046237490892053756, "clip_ratio/low_mean": 0.00022091619075581548, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.000683291098312111, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 3378.0, "completions/mean_length": 568.8092041015625, "completions/mean_terminated_length": 524.9683837890625, "completions/min_length": 127.0, "completions/min_terminated_length": 127.0, "epoch": 7.410615339749198, "grad_norm": 0.115234375, "learning_rate": 1e-06, "loss": -0.0057, "num_tokens": 461097960.0, "reward": 0.6160714626312256, "reward_std": 0.17577485740184784, "rewards/verify_math_reward/mean": 0.6160714030265808, "rewards/verify_math_reward/std": 0.486612468957901, "step": 793 }, { "clip_ratio/high_max": 0.0015939234272082103, "clip_ratio/high_mean": 0.00048763796485218336, "clip_ratio/low_mean": 0.00032636951834774663, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008140074769471539, "completions/clipped_ratio": 0.008928571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 2657.0, "completions/mean_length": 596.1830444335938, "completions/mean_terminated_length": 564.6531372070312, "completions/min_length": 11.0, "completions/min_terminated_length": 11.0, "epoch": 7.41994750656168, "grad_norm": 0.12353515625, "learning_rate": 1e-06, "loss": 0.0062, "num_tokens": 461685220.0, "reward": 0.551339328289032, "reward_std": 0.22067400813102722, "rewards/verify_math_reward/mean": 0.5513392686843872, "rewards/verify_math_reward/std": 0.4976350665092468, "step": 794 }, { "clip_ratio/high_max": 0.00141688451913069, "clip_ratio/high_mean": 0.00044076187373320863, "clip_ratio/low_mean": 0.0002488006248313468, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0006895625019751606, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 3203.0, "completions/mean_length": 570.896240234375, "completions/mean_terminated_length": 527.0813598632812, "completions/min_length": 69.0, "completions/min_terminated_length": 69.0, "epoch": 7.429279673374162, "grad_norm": 0.125, "learning_rate": 1e-06, "loss": 0.0035, "num_tokens": 462231959.0, "reward": 0.6194196939468384, "reward_std": 0.18536268174648285, "rewards/verify_math_reward/mean": 0.6194196343421936, "rewards/verify_math_reward/std": 0.48580074310302734, "step": 795 }, { "clip_ratio/high_max": 0.00145923654690705, "clip_ratio/high_mean": 0.00043217743564127886, "clip_ratio/low_mean": 0.0002877214802765593, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007198989078460727, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 2720.0, "completions/mean_length": 676.1473388671875, "completions/mean_terminated_length": 637.5485229492188, "completions/min_length": 88.0, "completions/min_terminated_length": 88.0, "epoch": 7.438611840186644, "grad_norm": 0.1279296875, "learning_rate": 1e-06, "loss": 0.0099, "num_tokens": 462896443.0, "reward": 0.4687500298023224, "reward_std": 0.22924308478832245, "rewards/verify_math_reward/mean": 0.46875, "rewards/verify_math_reward/std": 0.4993011951446533, "step": 796 }, { "clip_ratio/high_max": 0.001531427750705916, "clip_ratio/high_mean": 0.0004305337644154861, "clip_ratio/low_mean": 0.00045910697940598766, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008896407489373814, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 3460.0, "completions/mean_length": 614.8995971679688, "completions/mean_terminated_length": 575.6094970703125, "completions/min_length": 5.0, "completions/min_terminated_length": 5.0, "epoch": 7.447944006999125, "grad_norm": 0.1318359375, "learning_rate": 1e-06, "loss": -0.0103, "num_tokens": 463493369.0, "reward": 0.4877232313156128, "reward_std": 0.23649832606315613, "rewards/verify_math_reward/mean": 0.4877232015132904, "rewards/verify_math_reward/std": 0.500128448009491, "step": 797 }, { "clip_ratio/high_max": 0.0018859916326618986, "clip_ratio/high_mean": 0.0006090074216444918, "clip_ratio/low_mean": 0.0004206171322493901, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010296245682184235, "completions/clipped_ratio": 0.010044642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3956.0, "completions/mean_length": 614.5636596679688, "completions/mean_terminated_length": 579.2389526367188, "completions/min_length": 131.0, "completions/min_terminated_length": 131.0, "epoch": 7.457276173811607, "grad_norm": 0.1376953125, "learning_rate": 1e-06, "loss": 0.0119, "num_tokens": 464103322.0, "reward": 0.5948660969734192, "reward_std": 0.22413356602191925, "rewards/verify_math_reward/mean": 0.5948660969734192, "rewards/verify_math_reward/std": 0.49119213223457336, "step": 798 }, { "clip_ratio/high_max": 0.0016947435833571944, "clip_ratio/high_mean": 0.000559796584070682, "clip_ratio/low_mean": 0.0004089853653113096, "clip_ratio/low_min": 1.6587049685767852e-05, "clip_ratio/region_mean": 0.0009687819501777994, "completions/clipped_ratio": 0.025669642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3366.0, "completions/mean_length": 662.6864013671875, "completions/mean_terminated_length": 572.2325439453125, "completions/min_length": 149.0, "completions/min_terminated_length": 149.0, "epoch": 7.466608340624089, "grad_norm": 0.1259765625, "learning_rate": 1e-06, "loss": 0.0026, "num_tokens": 464694537.0, "reward": 0.5803571939468384, "reward_std": 0.24243409931659698, "rewards/verify_math_reward/mean": 0.5803571343421936, "rewards/verify_math_reward/std": 0.4937761127948761, "step": 799 }, { "clip_ratio/high_max": 0.0014676327882625628, "clip_ratio/high_mean": 0.000505304971966325, "clip_ratio/low_mean": 0.0003050643726965063, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008103693355678843, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 3820.0, "completions/mean_length": 621.1998291015625, "completions/mean_terminated_length": 581.9808349609375, "completions/min_length": 154.0, "completions/min_terminated_length": 154.0, "epoch": 7.475940507436571, "grad_norm": 0.126953125, "learning_rate": 1e-06, "loss": -0.0123, "num_tokens": 465301404.0, "reward": 0.590401828289032, "reward_std": 0.22229206562042236, "rewards/verify_math_reward/mean": 0.5904017686843872, "rewards/verify_math_reward/std": 0.49203425645828247, "step": 800 }, { "clip_ratio/high_max": 0.0016326555942214327, "clip_ratio/high_mean": 0.0005000448104510724, "clip_ratio/low_mean": 0.00037784027892939775, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.00087788510700193, "completions/clipped_ratio": 0.0189732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 2789.0, "completions/mean_length": 647.2846069335938, "completions/mean_terminated_length": 580.5858764648438, "completions/min_length": 134.0, "completions/min_terminated_length": 134.0, "epoch": 7.485272674249052, "grad_norm": 0.134765625, "learning_rate": 1e-06, "loss": 0.0064, "num_tokens": 465894539.0, "reward": 0.5167410969734192, "reward_std": 0.22300560772418976, "rewards/verify_math_reward/mean": 0.5167410969734192, "rewards/verify_math_reward/std": 0.4999987483024597, "step": 801 }, { "clip_ratio/high_max": 0.0018936303849841352, "clip_ratio/high_mean": 0.000623231191184459, "clip_ratio/low_mean": 0.0004154656953687663, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001038696858813637, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 2776.0, "completions/mean_length": 632.6049194335938, "completions/mean_terminated_length": 569.6340942382812, "completions/min_length": 5.0, "completions/min_terminated_length": 5.0, "epoch": 7.494604841061534, "grad_norm": 0.142578125, "learning_rate": 1e-06, "loss": -0.0034, "num_tokens": 466480697.0, "reward": 0.5345982313156128, "reward_std": 0.2536267638206482, "rewards/verify_math_reward/mean": 0.5345982313156128, "rewards/verify_math_reward/std": 0.4990801215171814, "step": 802 }, { "clip_ratio/high_max": 0.001562667417601915, "clip_ratio/high_mean": 0.000518414274438328, "clip_ratio/low_mean": 0.0003117373681789104, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.000830151646368904, "completions/clipped_ratio": 0.0189732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 4013.0, "completions/mean_length": 645.4252319335938, "completions/mean_terminated_length": 578.6905517578125, "completions/min_length": 127.0, "completions/min_terminated_length": 127.0, "epoch": 7.503937007874016, "grad_norm": 0.1220703125, "learning_rate": 1e-06, "loss": -0.0021, "num_tokens": 467084038.0, "reward": 0.5535714626312256, "reward_std": 0.22064054012298584, "rewards/verify_math_reward/mean": 0.5535714030265808, "rewards/verify_math_reward/std": 0.4973994791507721, "step": 803 }, { "clip_ratio/high_max": 0.0016991870797937736, "clip_ratio/high_mean": 0.0005164495009921666, "clip_ratio/low_mean": 0.0003523468687944842, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008687963800184662, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 1982.0, "completions/mean_length": 600.036865234375, "completions/mean_terminated_length": 552.580322265625, "completions/min_length": 27.0, "completions/min_terminated_length": 27.0, "epoch": 7.5132691746864975, "grad_norm": 0.134765625, "learning_rate": 1e-06, "loss": -0.0031, "num_tokens": 467664359.0, "reward": 0.5535714626312256, "reward_std": 0.24723871052265167, "rewards/verify_math_reward/mean": 0.5535714030265808, "rewards/verify_math_reward/std": 0.4973995089530945, "step": 804 }, { "clip_ratio/high_max": 0.0017386214040016057, "clip_ratio/high_mean": 0.0005429102311609313, "clip_ratio/low_mean": 0.00034907324879895896, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008919834854168585, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 2433.0, "completions/mean_length": 602.3359375, "completions/mean_terminated_length": 542.8524780273438, "completions/min_length": 113.0, "completions/min_terminated_length": 113.0, "epoch": 7.522601341498979, "grad_norm": 0.142578125, "learning_rate": 1e-06, "loss": 0.0066, "num_tokens": 468227764.0, "reward": 0.5457589626312256, "reward_std": 0.23154176771640778, "rewards/verify_math_reward/mean": 0.5457589030265808, "rewards/verify_math_reward/std": 0.4981798231601715, "step": 805 }, { "clip_ratio/high_max": 0.0014294162710939418, "clip_ratio/high_mean": 0.0004229120048648838, "clip_ratio/low_mean": 0.00026226252475680667, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0006851745201856829, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3511.0, "completions/mean_length": 631.4609375, "completions/mean_terminated_length": 572.4733276367188, "completions/min_length": 113.0, "completions/min_terminated_length": 113.0, "epoch": 7.531933508311461, "grad_norm": 0.1083984375, "learning_rate": 1e-06, "loss": -0.0019, "num_tokens": 468811401.0, "reward": 0.5680803656578064, "reward_std": 0.16905026137828827, "rewards/verify_math_reward/mean": 0.5680803656578064, "rewards/verify_math_reward/std": 0.4956200420856476, "step": 806 }, { "clip_ratio/high_max": 0.001720678735182446, "clip_ratio/high_mean": 0.00045973018529821275, "clip_ratio/low_mean": 0.00036290575133079983, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008226359382206283, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3888.0, "completions/mean_length": 632.536865234375, "completions/mean_terminated_length": 581.5458374023438, "completions/min_length": 5.0, "completions/min_terminated_length": 5.0, "epoch": 7.541265675123943, "grad_norm": 0.12255859375, "learning_rate": 1e-06, "loss": 0.0033, "num_tokens": 469412546.0, "reward": 0.5178571939468384, "reward_std": 0.19261160492897034, "rewards/verify_math_reward/mean": 0.5178571343421936, "rewards/verify_math_reward/std": 0.4999600946903229, "step": 807 }, { "clip_ratio/high_max": 0.0014549345269188052, "clip_ratio/high_mean": 0.00045358947397744487, "clip_ratio/low_mean": 0.00032638281209074194, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007799722761774319, "completions/clipped_ratio": 0.010044642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 2353.0, "completions/mean_length": 633.5045166015625, "completions/mean_terminated_length": 598.3720092773438, "completions/min_length": 120.0, "completions/min_terminated_length": 120.0, "epoch": 7.550597841936424, "grad_norm": 0.12255859375, "learning_rate": 1e-06, "loss": 0.0037, "num_tokens": 470022694.0, "reward": 0.4977678656578064, "reward_std": 0.20793946087360382, "rewards/verify_math_reward/mean": 0.4977678656578064, "rewards/verify_math_reward/std": 0.5002743005752563, "step": 808 }, { "clip_ratio/high_max": 0.001693226299721573, "clip_ratio/high_mean": 0.0005225888533004763, "clip_ratio/low_mean": 0.00032883471340028336, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008514235723851016, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 3600.0, "completions/mean_length": 638.3136596679688, "completions/mean_terminated_length": 595.3367309570312, "completions/min_length": 3.0, "completions/min_terminated_length": 3.0, "epoch": 7.559930008748906, "grad_norm": 0.125, "learning_rate": 1e-06, "loss": 0.0062, "num_tokens": 470637391.0, "reward": 0.551339328289032, "reward_std": 0.24461443722248077, "rewards/verify_math_reward/mean": 0.5513392686843872, "rewards/verify_math_reward/std": 0.4976350665092468, "step": 809 }, { "clip_ratio/high_max": 0.0012140335056756157, "clip_ratio/high_mean": 0.00038195894353521, "clip_ratio/low_mean": 0.0002825527674303885, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0006645117059633776, "completions/clipped_ratio": 0.0078125, "completions/max_length": 4096.0, "completions/max_terminated_length": 3615.0, "completions/mean_length": 570.375, "completions/mean_terminated_length": 542.6141967773438, "completions/min_length": 143.0, "completions/min_terminated_length": 143.0, "epoch": 7.569262175561388, "grad_norm": 0.11767578125, "learning_rate": 1e-06, "loss": -0.002, "num_tokens": 471221815.0, "reward": 0.5479910969734192, "reward_std": 0.18388956785202026, "rewards/verify_math_reward/mean": 0.5479910969734192, "rewards/verify_math_reward/std": 0.49796950817108154, "step": 810 }, { "clip_ratio/high_max": 0.0014798217916904832, "clip_ratio/high_mean": 0.00041883209007664846, "clip_ratio/low_mean": 0.00032986845434379575, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007487005432267324, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 3755.0, "completions/mean_length": 648.0011596679688, "completions/mean_terminated_length": 609.0846557617188, "completions/min_length": 14.0, "completions/min_terminated_length": 14.0, "epoch": 7.57859434237387, "grad_norm": 0.11767578125, "learning_rate": 1e-06, "loss": 0.0162, "num_tokens": 471847016.0, "reward": 0.5401785969734192, "reward_std": 0.18878155946731567, "rewards/verify_math_reward/mean": 0.5401785969734192, "rewards/verify_math_reward/std": 0.49866142868995667, "step": 811 }, { "clip_ratio/high_max": 0.001788241143913183, "clip_ratio/high_mean": 0.0005103839109779074, "clip_ratio/low_mean": 0.000431636027769855, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009420199339729152, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3774.0, "completions/mean_length": 655.0546875, "completions/mean_terminated_length": 604.3952026367188, "completions/min_length": 156.0, "completions/min_terminated_length": 156.0, "epoch": 7.587926509186351, "grad_norm": 0.1279296875, "learning_rate": 1e-06, "loss": 0.0044, "num_tokens": 472473673.0, "reward": 0.5066964626312256, "reward_std": 0.23912444710731506, "rewards/verify_math_reward/mean": 0.5066964030265808, "rewards/verify_math_reward/std": 0.5002344250679016, "step": 812 }, { "clip_ratio/high_max": 0.0016826707324071322, "clip_ratio/high_mean": 0.0004826609840620222, "clip_ratio/low_mean": 0.00019433401951118867, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0006769950123270974, "completions/clipped_ratio": 0.021205357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 1799.0, "completions/mean_length": 616.1395263671875, "completions/mean_terminated_length": 540.7491455078125, "completions/min_length": 117.0, "completions/min_terminated_length": 117.0, "epoch": 7.597258675998834, "grad_norm": 0.1103515625, "learning_rate": 1e-06, "loss": -0.013, "num_tokens": 473035366.0, "reward": 0.606026828289032, "reward_std": 0.16668446362018585, "rewards/verify_math_reward/mean": 0.6060267686843872, "rewards/verify_math_reward/std": 0.48890194296836853, "step": 813 }, { "clip_ratio/high_max": 0.0020013900084450142, "clip_ratio/high_mean": 0.0005548718265799835, "clip_ratio/low_mean": 0.0003268764485255815, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008817482830636436, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 3075.0, "completions/mean_length": 540.7042846679688, "completions/mean_terminated_length": 496.5141296386719, "completions/min_length": 101.0, "completions/min_terminated_length": 101.0, "epoch": 7.606590842811316, "grad_norm": 0.1396484375, "learning_rate": 1e-06, "loss": 0.002, "num_tokens": 473563957.0, "reward": 0.640625, "reward_std": 0.21522751450538635, "rewards/verify_math_reward/mean": 0.640625, "rewards/verify_math_reward/std": 0.48008525371551514, "step": 814 }, { "clip_ratio/high_max": 0.0015992808030205197, "clip_ratio/high_mean": 0.00044640013334174, "clip_ratio/low_mean": 0.0003525676007711809, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007989677169462084, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 2608.0, "completions/mean_length": 610.911865234375, "completions/mean_terminated_length": 547.5465698242188, "completions/min_length": 115.0, "completions/min_terminated_length": 115.0, "epoch": 7.615923009623797, "grad_norm": 0.126953125, "learning_rate": 1e-06, "loss": -0.0103, "num_tokens": 474131334.0, "reward": 0.5345982313156128, "reward_std": 0.1994117796421051, "rewards/verify_math_reward/mean": 0.5345982313156128, "rewards/verify_math_reward/std": 0.4990801215171814, "step": 815 }, { "clip_ratio/high_max": 0.0015622702030668734, "clip_ratio/high_mean": 0.00044291848939792544, "clip_ratio/low_mean": 0.000254774897371135, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0006976933846090105, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 3201.0, "completions/mean_length": 621.5167846679688, "completions/mean_terminated_length": 582.3013916015625, "completions/min_length": 134.0, "completions/min_terminated_length": 134.0, "epoch": 7.625255176436279, "grad_norm": 0.12109375, "learning_rate": 1e-06, "loss": 0.0045, "num_tokens": 474733405.0, "reward": 0.5212053656578064, "reward_std": 0.20099589228630066, "rewards/verify_math_reward/mean": 0.5212053656578064, "rewards/verify_math_reward/std": 0.49982914328575134, "step": 816 }, { "clip_ratio/high_max": 0.0018664657654881012, "clip_ratio/high_mean": 0.0005914838224043706, "clip_ratio/low_mean": 0.00024880816079075885, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008402919856962399, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 2398.0, "completions/mean_length": 609.3984375, "completions/mean_terminated_length": 546.0056762695312, "completions/min_length": 13.0, "completions/min_terminated_length": 13.0, "epoch": 7.634587343248761, "grad_norm": 0.10986328125, "learning_rate": 1e-06, "loss": -0.0069, "num_tokens": 475299522.0, "reward": 0.559151828289032, "reward_std": 0.15770578384399414, "rewards/verify_math_reward/mean": 0.5591517686843872, "rewards/verify_math_reward/std": 0.496766060590744, "step": 817 }, { "clip_ratio/high_max": 0.001357779909085366, "clip_ratio/high_mean": 0.0004008281947562864, "clip_ratio/low_mean": 0.0003323059210060819, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007331341175813577, "completions/clipped_ratio": 0.024553571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3910.0, "completions/mean_length": 664.1295166015625, "completions/mean_terminated_length": 577.74365234375, "completions/min_length": 29.0, "completions/min_terminated_length": 29.0, "epoch": 7.6439195100612425, "grad_norm": 0.12109375, "learning_rate": 1e-06, "loss": -0.0133, "num_tokens": 475895886.0, "reward": 0.5446428656578064, "reward_std": 0.19787125289440155, "rewards/verify_math_reward/mean": 0.5446428656578064, "rewards/verify_math_reward/std": 0.4982811510562897, "step": 818 }, { "clip_ratio/high_max": 0.0013974533867440186, "clip_ratio/high_mean": 0.00043150204078301613, "clip_ratio/low_mean": 0.00034697368744218693, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007784757353874738, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3819.0, "completions/mean_length": 657.693115234375, "completions/mean_terminated_length": 599.152099609375, "completions/min_length": 166.0, "completions/min_terminated_length": 166.0, "epoch": 7.653251676873724, "grad_norm": 0.130859375, "learning_rate": 1e-06, "loss": 0.0025, "num_tokens": 476521307.0, "reward": 0.5412946939468384, "reward_std": 0.2233087122440338, "rewards/verify_math_reward/mean": 0.5412946343421936, "rewards/verify_math_reward/std": 0.49857014417648315, "step": 819 }, { "clip_ratio/high_max": 0.001806898575523519, "clip_ratio/high_mean": 0.0006096012002672069, "clip_ratio/low_mean": 0.0002712697975084666, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008808709849290608, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 4077.0, "completions/mean_length": 577.2109375, "completions/mean_terminated_length": 525.4053955078125, "completions/min_length": 19.0, "completions/min_terminated_length": 19.0, "epoch": 7.662583843686206, "grad_norm": 0.1328125, "learning_rate": 1e-06, "loss": -0.0051, "num_tokens": 477067808.0, "reward": 0.6037946939468384, "reward_std": 0.2060961276292801, "rewards/verify_math_reward/mean": 0.6037946343421936, "rewards/verify_math_reward/std": 0.48938119411468506, "step": 820 }, { "clip_ratio/high_max": 0.0016304007822327549, "clip_ratio/high_mean": 0.0005361508647183655, "clip_ratio/low_mean": 0.00040439952840642945, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009405503815287375, "completions/clipped_ratio": 0.022321428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3089.0, "completions/mean_length": 666.3080444335938, "completions/mean_terminated_length": 588.0045166015625, "completions/min_length": 140.0, "completions/min_terminated_length": 140.0, "epoch": 7.671916010498688, "grad_norm": 0.1337890625, "learning_rate": 1e-06, "loss": 0.0056, "num_tokens": 477661988.0, "reward": 0.5323660969734192, "reward_std": 0.2503633499145508, "rewards/verify_math_reward/mean": 0.5323660969734192, "rewards/verify_math_reward/std": 0.4992299973964691, "step": 821 }, { "clip_ratio/high_max": 0.0014184003975969972, "clip_ratio/high_mean": 0.000398283656068088, "clip_ratio/low_mean": 0.0003752951065507659, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007735787530691596, "completions/clipped_ratio": 0.022321428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3017.0, "completions/mean_length": 643.6886596679688, "completions/mean_terminated_length": 564.8687133789062, "completions/min_length": 147.0, "completions/min_terminated_length": 147.0, "epoch": 7.681248177311169, "grad_norm": 0.125, "learning_rate": 1e-06, "loss": -0.0041, "num_tokens": 478253109.0, "reward": 0.5089285969734192, "reward_std": 0.20601874589920044, "rewards/verify_math_reward/mean": 0.5089285969734192, "rewards/verify_math_reward/std": 0.5001994967460632, "step": 822 }, { "clip_ratio/high_max": 0.001694536106697342, "clip_ratio/high_mean": 0.00048725585725151177, "clip_ratio/low_mean": 0.0002923149418165849, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007795707961122389, "completions/clipped_ratio": 0.024553571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3446.0, "completions/mean_length": 614.138427734375, "completions/mean_terminated_length": 526.4942626953125, "completions/min_length": 64.0, "completions/min_terminated_length": 64.0, "epoch": 7.690580344123651, "grad_norm": 0.134765625, "learning_rate": 1e-06, "loss": -0.0095, "num_tokens": 478799377.0, "reward": 0.5803571939468384, "reward_std": 0.21804973483085632, "rewards/verify_math_reward/mean": 0.5803571343421936, "rewards/verify_math_reward/std": 0.4937761425971985, "step": 823 }, { "clip_ratio/high_max": 0.001586648628290277, "clip_ratio/high_mean": 0.00044741310102835996, "clip_ratio/low_mean": 0.00035425917997145007, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008016722656520869, "completions/clipped_ratio": 0.006696428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 4038.0, "completions/mean_length": 575.5949096679688, "completions/mean_terminated_length": 551.86181640625, "completions/min_length": 76.0, "completions/min_terminated_length": 76.0, "epoch": 7.699912510936133, "grad_norm": 0.11865234375, "learning_rate": 1e-06, "loss": 0.0117, "num_tokens": 479377478.0, "reward": 0.512276828289032, "reward_std": 0.193587988615036, "rewards/verify_math_reward/mean": 0.5122767686843872, "rewards/verify_math_reward/std": 0.500128448009491, "step": 824 }, { "clip_ratio/high_max": 0.0016389518677897286, "clip_ratio/high_mean": 0.0005119308898429153, "clip_ratio/low_mean": 0.0004532880793703953, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009652189673943212, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3913.0, "completions/mean_length": 615.474365234375, "completions/mean_terminated_length": 568.2274169921875, "completions/min_length": 3.0, "completions/min_terminated_length": 3.0, "epoch": 7.7092446777486145, "grad_norm": 0.1416015625, "learning_rate": 1e-06, "loss": -0.0117, "num_tokens": 479974839.0, "reward": 0.5792410969734192, "reward_std": 0.23300601541996002, "rewards/verify_math_reward/mean": 0.5792410969734192, "rewards/verify_math_reward/std": 0.49395665526390076, "step": 825 }, { "clip_ratio/high_max": 0.0013268217981021735, "clip_ratio/high_mean": 0.0003788411020195781, "clip_ratio/low_mean": 0.00033185177494488016, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007106928760549636, "completions/clipped_ratio": 0.022321428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3009.0, "completions/mean_length": 636.4252319335938, "completions/mean_terminated_length": 557.439453125, "completions/min_length": 138.0, "completions/min_terminated_length": 138.0, "epoch": 7.718576844561096, "grad_norm": 0.12353515625, "learning_rate": 1e-06, "loss": 0.0068, "num_tokens": 480557268.0, "reward": 0.5613839626312256, "reward_std": 0.18532174825668335, "rewards/verify_math_reward/mean": 0.5613839030265808, "rewards/verify_math_reward/std": 0.496494859457016, "step": 826 }, { "clip_ratio/high_max": 0.0016341294431185815, "clip_ratio/high_mean": 0.0005368804891077161, "clip_ratio/low_mean": 0.000411711231663503, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009485917289566714, "completions/clipped_ratio": 0.008928571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3866.0, "completions/mean_length": 583.5335083007812, "completions/mean_terminated_length": 551.8896484375, "completions/min_length": 148.0, "completions/min_terminated_length": 148.0, "epoch": 7.727909011373578, "grad_norm": 0.13671875, "learning_rate": 1e-06, "loss": 0.0001, "num_tokens": 481142946.0, "reward": 0.5602678656578064, "reward_std": 0.2356332242488861, "rewards/verify_math_reward/mean": 0.5602678656578064, "rewards/verify_math_reward/std": 0.4966317415237427, "step": 827 }, { "clip_ratio/high_max": 0.0020589468367688823, "clip_ratio/high_mean": 0.00061184364494693, "clip_ratio/low_mean": 0.000335012699224535, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009468563348491443, "completions/clipped_ratio": 0.0189732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 2809.0, "completions/mean_length": 639.5982666015625, "completions/mean_terminated_length": 572.7508544921875, "completions/min_length": 123.0, "completions/min_terminated_length": 123.0, "epoch": 7.73724117818606, "grad_norm": 0.1318359375, "learning_rate": 1e-06, "loss": 0.0012, "num_tokens": 481739138.0, "reward": 0.4899553656578064, "reward_std": 0.21974080801010132, "rewards/verify_math_reward/mean": 0.4899553656578064, "rewards/verify_math_reward/std": 0.5001782774925232, "step": 828 }, { "clip_ratio/high_max": 0.0014042636730664526, "clip_ratio/high_mean": 0.00040027656075380946, "clip_ratio/low_mean": 0.00028463592821026396, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0006849125002190704, "completions/clipped_ratio": 0.021205357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3789.0, "completions/mean_length": 602.0145263671875, "completions/mean_terminated_length": 526.318115234375, "completions/min_length": 85.0, "completions/min_terminated_length": 85.0, "epoch": 7.746573344998541, "grad_norm": 0.126953125, "learning_rate": 1e-06, "loss": 0.0031, "num_tokens": 482290863.0, "reward": 0.5959821939468384, "reward_std": 0.19888931512832642, "rewards/verify_math_reward/mean": 0.5959821343421936, "rewards/verify_math_reward/std": 0.490975022315979, "step": 829 }, { "clip_ratio/high_max": 0.0015067167068991694, "clip_ratio/high_mean": 0.00046176001535513933, "clip_ratio/low_mean": 0.0002626921981345731, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007244522140581466, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3329.0, "completions/mean_length": 617.8515625, "completions/mean_terminated_length": 558.6322631835938, "completions/min_length": 7.0, "completions/min_terminated_length": 7.0, "epoch": 7.755905511811024, "grad_norm": 0.11767578125, "learning_rate": 1e-06, "loss": -0.0069, "num_tokens": 482875834.0, "reward": 0.5256696939468384, "reward_std": 0.21147316694259644, "rewards/verify_math_reward/mean": 0.5256696343421936, "rewards/verify_math_reward/std": 0.4996195435523987, "step": 830 }, { "clip_ratio/high_max": 0.0017874856630442082, "clip_ratio/high_mean": 0.0005050203840255563, "clip_ratio/low_mean": 0.000261511263374814, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007665316479688045, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3012.0, "completions/mean_length": 628.4542846679688, "completions/mean_terminated_length": 573.413818359375, "completions/min_length": 114.0, "completions/min_terminated_length": 114.0, "epoch": 7.765237678623506, "grad_norm": 0.10693359375, "learning_rate": 1e-06, "loss": 0.0001, "num_tokens": 483468321.0, "reward": 0.551339328289032, "reward_std": 0.19426269829273224, "rewards/verify_math_reward/mean": 0.5513392686843872, "rewards/verify_math_reward/std": 0.4976350665092468, "step": 831 }, { "clip_ratio/high_max": 0.0015487432574445847, "clip_ratio/high_mean": 0.0004490351029744488, "clip_ratio/low_mean": 0.00037584222036457504, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008248773301602341, "completions/clipped_ratio": 0.0189732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3966.0, "completions/mean_length": 663.177490234375, "completions/mean_terminated_length": 596.7860717773438, "completions/min_length": 154.0, "completions/min_terminated_length": 154.0, "epoch": 7.7745698454359875, "grad_norm": 0.1240234375, "learning_rate": 1e-06, "loss": 0.0148, "num_tokens": 484074440.0, "reward": 0.5022321939468384, "reward_std": 0.18464843928813934, "rewards/verify_math_reward/mean": 0.5022321343421936, "rewards/verify_math_reward/std": 0.5002743005752563, "step": 832 }, { "clip_ratio/high_max": 0.0013228818834249978, "clip_ratio/high_mean": 0.00035903547359339427, "clip_ratio/low_mean": 0.00034873718891503813, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007077726659190375, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 3382.0, "completions/mean_length": 580.5714721679688, "completions/mean_terminated_length": 536.8768310546875, "completions/min_length": 148.0, "completions/min_terminated_length": 148.0, "epoch": 7.783902012248469, "grad_norm": 0.1279296875, "learning_rate": 1e-06, "loss": 0.0042, "num_tokens": 484637768.0, "reward": 0.5613839626312256, "reward_std": 0.2009527087211609, "rewards/verify_math_reward/mean": 0.5613839030265808, "rewards/verify_math_reward/std": 0.496494859457016, "step": 833 }, { "clip_ratio/high_max": 0.0014881354891258525, "clip_ratio/high_mean": 0.0003941183897495648, "clip_ratio/low_mean": 0.00033160380439767323, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007257221918735013, "completions/clipped_ratio": 0.004464285714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 2027.0, "completions/mean_length": 533.6517944335938, "completions/mean_terminated_length": 517.6771850585938, "completions/min_length": 87.0, "completions/min_terminated_length": 87.0, "epoch": 7.793234179060951, "grad_norm": 0.12060546875, "learning_rate": 1e-06, "loss": 0.0113, "num_tokens": 485196632.0, "reward": 0.6082589626312256, "reward_std": 0.20316511392593384, "rewards/verify_math_reward/mean": 0.6082589030265808, "rewards/verify_math_reward/std": 0.48841196298599243, "step": 834 }, { "clip_ratio/high_max": 0.0013740085478275432, "clip_ratio/high_mean": 0.0003876993386029426, "clip_ratio/low_mean": 0.00024111458844799927, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0006288139406933624, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3709.0, "completions/mean_length": 644.1317138671875, "completions/mean_terminated_length": 581.3704223632812, "completions/min_length": 150.0, "completions/min_terminated_length": 150.0, "epoch": 7.802566345873433, "grad_norm": 0.11328125, "learning_rate": 1e-06, "loss": 0.0136, "num_tokens": 485800878.0, "reward": 0.5636160969734192, "reward_std": 0.17799797654151917, "rewards/verify_math_reward/mean": 0.5636160969734192, "rewards/verify_math_reward/std": 0.49621346592903137, "step": 835 }, { "clip_ratio/high_max": 0.0015182806509983493, "clip_ratio/high_mean": 0.0005289736777740472, "clip_ratio/low_mean": 0.0003237526958628223, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008527263762516668, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 2466.0, "completions/mean_length": 587.7824096679688, "completions/mean_terminated_length": 536.1325073242188, "completions/min_length": 122.0, "completions/min_terminated_length": 122.0, "epoch": 7.811898512685914, "grad_norm": 0.13671875, "learning_rate": 1e-06, "loss": 0.003, "num_tokens": 486374955.0, "reward": 0.5848214626312256, "reward_std": 0.20464006066322327, "rewards/verify_math_reward/mean": 0.5848214030265808, "rewards/verify_math_reward/std": 0.49302801489830017, "step": 836 }, { "clip_ratio/high_max": 0.0011973083001066698, "clip_ratio/high_mean": 0.00036646658156769263, "clip_ratio/low_mean": 0.00034716221784947265, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007136288108995359, "completions/clipped_ratio": 0.0189732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3990.0, "completions/mean_length": 657.4777221679688, "completions/mean_terminated_length": 590.97607421875, "completions/min_length": 86.0, "completions/min_terminated_length": 86.0, "epoch": 7.821230679498396, "grad_norm": 0.1201171875, "learning_rate": 1e-06, "loss": 0.0187, "num_tokens": 486980327.0, "reward": 0.6049107313156128, "reward_std": 0.19186343252658844, "rewards/verify_math_reward/mean": 0.6049107313156128, "rewards/verify_math_reward/std": 0.48914292454719543, "step": 837 }, { "clip_ratio/high_max": 0.0018675804612939828, "clip_ratio/high_mean": 0.0005910816469167912, "clip_ratio/low_mean": 0.00022987412319253053, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008209557640839193, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3371.0, "completions/mean_length": 589.6373291015625, "completions/mean_terminated_length": 538.0147094726562, "completions/min_length": 137.0, "completions/min_terminated_length": 137.0, "epoch": 7.830562846310878, "grad_norm": 0.1328125, "learning_rate": 1e-06, "loss": 0.0145, "num_tokens": 487542538.0, "reward": 0.582589328289032, "reward_std": 0.19617946445941925, "rewards/verify_math_reward/mean": 0.5825892686843872, "rewards/verify_math_reward/std": 0.4934072494506836, "step": 838 }, { "clip_ratio/high_max": 0.0017806277119234437, "clip_ratio/high_mean": 0.0005850000522968912, "clip_ratio/low_mean": 0.00038751102852074837, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009725110694489558, "completions/clipped_ratio": 0.008928571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3113.0, "completions/mean_length": 554.8515625, "completions/mean_terminated_length": 522.9493408203125, "completions/min_length": 85.0, "completions/min_terminated_length": 85.0, "epoch": 7.83989501312336, "grad_norm": 0.1396484375, "learning_rate": 1e-06, "loss": 0.0089, "num_tokens": 488102197.0, "reward": 0.5703125, "reward_std": 0.2285723090171814, "rewards/verify_math_reward/mean": 0.5703125, "rewards/verify_math_reward/std": 0.49530795216560364, "step": 839 }, { "clip_ratio/high_max": 0.001589763724950899, "clip_ratio/high_mean": 0.0004951285475272016, "clip_ratio/low_mean": 0.0003442013744461292, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008393299222007045, "completions/clipped_ratio": 0.004464285714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 2171.0, "completions/mean_length": 558.1283569335938, "completions/mean_terminated_length": 542.2634887695312, "completions/min_length": 149.0, "completions/min_terminated_length": 149.0, "epoch": 7.849227179935841, "grad_norm": 0.123046875, "learning_rate": 1e-06, "loss": -0.0015, "num_tokens": 488670776.0, "reward": 0.5569196939468384, "reward_std": 0.20023521780967712, "rewards/verify_math_reward/mean": 0.5569196343421936, "rewards/verify_math_reward/std": 0.4970270097255707, "step": 840 }, { "clip_ratio/high_max": 0.0018499267971492372, "clip_ratio/high_mean": 0.0005242909760454495, "clip_ratio/low_mean": 0.0003799465844167571, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009042375577337225, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 2832.0, "completions/mean_length": 571.3114013671875, "completions/mean_terminated_length": 511.2996826171875, "completions/min_length": 20.0, "completions/min_terminated_length": 20.0, "epoch": 7.858559346748323, "grad_norm": 0.142578125, "learning_rate": 1e-06, "loss": 0.0114, "num_tokens": 489218631.0, "reward": 0.5948660969734192, "reward_std": 0.2321811467409134, "rewards/verify_math_reward/mean": 0.5948660969734192, "rewards/verify_math_reward/std": 0.49119213223457336, "step": 841 }, { "clip_ratio/high_max": 0.0016465964463350247, "clip_ratio/high_mean": 0.0005022620300678682, "clip_ratio/low_mean": 0.00042260734466026406, "clip_ratio/low_min": 1.0539629329286981e-05, "clip_ratio/region_mean": 0.0009248693768313387, "completions/clipped_ratio": 0.004464285714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 2819.0, "completions/mean_length": 600.6574096679688, "completions/mean_terminated_length": 584.9832153320312, "completions/min_length": 149.0, "completions/min_terminated_length": 149.0, "epoch": 7.867891513560805, "grad_norm": 0.12890625, "learning_rate": 1e-06, "loss": 0.0142, "num_tokens": 489825004.0, "reward": 0.512276828289032, "reward_std": 0.22286362946033478, "rewards/verify_math_reward/mean": 0.5122767686843872, "rewards/verify_math_reward/std": 0.500128448009491, "step": 842 }, { "clip_ratio/high_max": 0.0017361270383844385, "clip_ratio/high_mean": 0.000524047238741332, "clip_ratio/low_mean": 0.0003687840110160323, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008928312572606956, "completions/clipped_ratio": 0.005580357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3633.0, "completions/mean_length": 535.2902221679688, "completions/mean_terminated_length": 515.3086547851562, "completions/min_length": 78.0, "completions/min_terminated_length": 78.0, "epoch": 7.8772236803732865, "grad_norm": 0.1337890625, "learning_rate": 1e-06, "loss": 0.0075, "num_tokens": 490373312.0, "reward": 0.5736607313156128, "reward_std": 0.21714681386947632, "rewards/verify_math_reward/mean": 0.5736607313156128, "rewards/verify_math_reward/std": 0.4948205351829529, "step": 843 }, { "clip_ratio/high_max": 0.0015636389744031476, "clip_ratio/high_mean": 0.00047459722964049433, "clip_ratio/low_mean": 0.0003665347717287659, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008411319840888609, "completions/clipped_ratio": 0.0200892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 2477.0, "completions/mean_length": 596.6752319335938, "completions/mean_terminated_length": 524.9351196289062, "completions/min_length": 130.0, "completions/min_terminated_length": 130.0, "epoch": 7.886555847185768, "grad_norm": 0.12451171875, "learning_rate": 1e-06, "loss": 0.008, "num_tokens": 490917357.0, "reward": 0.609375, "reward_std": 0.19576901197433472, "rewards/verify_math_reward/mean": 0.609375, "rewards/verify_math_reward/std": 0.48816296458244324, "step": 844 }, { "clip_ratio/high_max": 0.0017464211996411905, "clip_ratio/high_mean": 0.0005997047271648626, "clip_ratio/low_mean": 0.0003516608542213362, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009513655941191246, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 3439.0, "completions/mean_length": 590.2154541015625, "completions/mean_terminated_length": 550.646728515625, "completions/min_length": 155.0, "completions/min_terminated_length": 155.0, "epoch": 7.89588801399825, "grad_norm": 0.134765625, "learning_rate": 1e-06, "loss": -0.0099, "num_tokens": 491502582.0, "reward": 0.551339328289032, "reward_std": 0.22849632799625397, "rewards/verify_math_reward/mean": 0.5513392686843872, "rewards/verify_math_reward/std": 0.4976350665092468, "step": 845 }, { "clip_ratio/high_max": 0.0014702572334499564, "clip_ratio/high_mean": 0.0004401191754368483, "clip_ratio/low_mean": 0.0003532125886067661, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007933317669994722, "completions/clipped_ratio": 0.0078125, "completions/max_length": 4096.0, "completions/max_terminated_length": 2273.0, "completions/mean_length": 566.5636596679688, "completions/mean_terminated_length": 538.7728271484375, "completions/min_length": 126.0, "completions/min_terminated_length": 126.0, "epoch": 7.905220180810732, "grad_norm": 0.130859375, "learning_rate": 1e-06, "loss": 0.0026, "num_tokens": 492078215.0, "reward": 0.5647321939468384, "reward_std": 0.2274732142686844, "rewards/verify_math_reward/mean": 0.5647321343421936, "rewards/verify_math_reward/std": 0.49606895446777344, "step": 846 }, { "clip_ratio/high_max": 0.001387545023135317, "clip_ratio/high_mean": 0.00039264502515834465, "clip_ratio/low_mean": 0.00034647212044092157, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007391171459403267, "completions/clipped_ratio": 0.010044642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 2247.0, "completions/mean_length": 589.3326416015625, "completions/mean_terminated_length": 553.751953125, "completions/min_length": 3.0, "completions/min_terminated_length": 3.0, "epoch": 7.914552347623214, "grad_norm": 0.1259765625, "learning_rate": 1e-06, "loss": 0.0001, "num_tokens": 492670225.0, "reward": 0.5334821939468384, "reward_std": 0.19756846129894257, "rewards/verify_math_reward/mean": 0.5334821343421936, "rewards/verify_math_reward/std": 0.49915629625320435, "step": 847 }, { "clip_ratio/high_max": 0.0015947225383570185, "clip_ratio/high_mean": 0.0005433679509678768, "clip_ratio/low_mean": 0.00039698560999568144, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009403535850651679, "completions/clipped_ratio": 0.021205357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 4054.0, "completions/mean_length": 631.8538208007812, "completions/mean_terminated_length": 556.8038330078125, "completions/min_length": 118.0, "completions/min_terminated_length": 118.0, "epoch": 7.923884514435695, "grad_norm": 0.1416015625, "learning_rate": 1e-06, "loss": -0.0014, "num_tokens": 493250070.0, "reward": 0.5223214626312256, "reward_std": 0.221356600522995, "rewards/verify_math_reward/mean": 0.5223214030265808, "rewards/verify_math_reward/std": 0.49978047609329224, "step": 848 }, { "clip_ratio/high_max": 0.0018653158276720205, "clip_ratio/high_mean": 0.0006010659449202649, "clip_ratio/low_mean": 0.00031246157220721216, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009135275104199536, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3219.0, "completions/mean_length": 611.099365234375, "completions/mean_terminated_length": 559.792724609375, "completions/min_length": 103.0, "completions/min_terminated_length": 103.0, "epoch": 7.933216681248178, "grad_norm": 0.1376953125, "learning_rate": 1e-06, "loss": 0.0077, "num_tokens": 493835599.0, "reward": 0.546875, "reward_std": 0.2418368011713028, "rewards/verify_math_reward/mean": 0.546875, "rewards/verify_math_reward/std": 0.4980759024620056, "step": 849 }, { "clip_ratio/high_max": 0.0014806769668211928, "clip_ratio/high_mean": 0.0003980743955480648, "clip_ratio/low_mean": 0.00033593845381574283, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007340128595387796, "completions/clipped_ratio": 0.021205357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3864.0, "completions/mean_length": 664.3660888671875, "completions/mean_terminated_length": 590.0205078125, "completions/min_length": 115.0, "completions/min_terminated_length": 115.0, "epoch": 7.942548848060659, "grad_norm": 0.1171875, "learning_rate": 1e-06, "loss": 0.004, "num_tokens": 494447823.0, "reward": 0.520089328289032, "reward_std": 0.1957676112651825, "rewards/verify_math_reward/mean": 0.5200892686843872, "rewards/verify_math_reward/std": 0.4998753070831299, "step": 850 }, { "clip_ratio/high_max": 0.0016430820141977165, "clip_ratio/high_mean": 0.0004774774679390248, "clip_ratio/low_mean": 0.0002885843025524082, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007660617720830487, "completions/clipped_ratio": 0.0200892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3250.0, "completions/mean_length": 590.1685791015625, "completions/mean_terminated_length": 518.2949829101562, "completions/min_length": 115.0, "completions/min_terminated_length": 115.0, "epoch": 7.951881014873141, "grad_norm": 0.1357421875, "learning_rate": 1e-06, "loss": -0.0007, "num_tokens": 494989038.0, "reward": 0.5145089626312256, "reward_std": 0.21782150864601135, "rewards/verify_math_reward/mean": 0.5145089030265808, "rewards/verify_math_reward/std": 0.5000685453414917, "step": 851 }, { "clip_ratio/high_max": 0.0016200119262066437, "clip_ratio/high_mean": 0.0004887748599458064, "clip_ratio/low_mean": 0.0003507403177991364, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008395151726290351, "completions/clipped_ratio": 0.0234375, "completions/max_length": 4096.0, "completions/max_terminated_length": 3150.0, "completions/mean_length": 673.3984375, "completions/mean_terminated_length": 591.2559814453125, "completions/min_length": 131.0, "completions/min_terminated_length": 131.0, "epoch": 7.961213181685623, "grad_norm": 0.12060546875, "learning_rate": 1e-06, "loss": 0.0079, "num_tokens": 495595203.0, "reward": 0.5267857313156128, "reward_std": 0.20497384667396545, "rewards/verify_math_reward/mean": 0.5267857313156128, "rewards/verify_math_reward/std": 0.4995608627796173, "step": 852 }, { "clip_ratio/high_max": 0.001667117568104004, "clip_ratio/high_mean": 0.0005183360390219605, "clip_ratio/low_mean": 0.0003972172396515816, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009155532734439475, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 2391.0, "completions/mean_length": 601.5658569335938, "completions/mean_terminated_length": 558.1322021484375, "completions/min_length": 96.0, "completions/min_terminated_length": 96.0, "epoch": 7.970545348498105, "grad_norm": 0.130859375, "learning_rate": 1e-06, "loss": 0.0043, "num_tokens": 496170022.0, "reward": 0.6015625, "reward_std": 0.23897428810596466, "rewards/verify_math_reward/mean": 0.6015625, "rewards/verify_math_reward/std": 0.48984986543655396, "step": 853 }, { "clip_ratio/high_max": 0.00172699805443699, "clip_ratio/high_mean": 0.0005003368455618329, "clip_ratio/low_mean": 0.00039065806743110443, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008909949046937982, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 4086.0, "completions/mean_length": 600.7254638671875, "completions/mean_terminated_length": 557.2813720703125, "completions/min_length": 113.0, "completions/min_terminated_length": 113.0, "epoch": 7.979877515310586, "grad_norm": 0.130859375, "learning_rate": 1e-06, "loss": 0.0175, "num_tokens": 496748664.0, "reward": 0.5926339626312256, "reward_std": 0.22199669480323792, "rewards/verify_math_reward/mean": 0.5926339030265808, "rewards/verify_math_reward/std": 0.49161845445632935, "step": 854 }, { "clip_ratio/high_max": 0.0017066129184968304, "clip_ratio/high_mean": 0.0005071540376775374, "clip_ratio/low_mean": 0.00031170578449746245, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.000818859824903484, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3714.0, "completions/mean_length": 618.8314819335938, "completions/mean_terminated_length": 571.630126953125, "completions/min_length": 114.0, "completions/min_terminated_length": 114.0, "epoch": 7.989209682123068, "grad_norm": 0.12353515625, "learning_rate": 1e-06, "loss": 0.0049, "num_tokens": 497355097.0, "reward": 0.5055803656578064, "reward_std": 0.22594577074050903, "rewards/verify_math_reward/mean": 0.5055803656578064, "rewards/verify_math_reward/std": 0.5002480745315552, "step": 855 }, { "clip_ratio/high_max": 0.0014975002650317037, "clip_ratio/high_mean": 0.0004614462129666208, "clip_ratio/low_mean": 0.00026589575156776846, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007273419596458552, "completions/clipped_ratio": 0.0028409090909090606, "completions/max_length": 4096.0, "completions/max_terminated_length": 2522.0, "completions/mean_length": 651.2471923828125, "completions/mean_terminated_length": 641.4330444335938, "completions/min_length": 139.0, "completions/min_terminated_length": 139.0, "epoch": 7.99854184893555, "grad_norm": 0.1181640625, "learning_rate": 1e-06, "loss": -0.0062, "num_tokens": 497941726.0, "reward": 0.578125, "reward_std": 0.19820895791053772, "rewards/verify_math_reward/mean": 0.578125, "rewards/verify_math_reward/std": 0.4941346049308777, "step": 856 }, { "clip_ratio/high_max": 0.0017468438973082812, "clip_ratio/high_mean": 0.00045737842401649687, "clip_ratio/low_mean": 0.00021875535117032996, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0006761337695024849, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3038.0, "completions/mean_length": 632.7980346679688, "completions/mean_terminated_length": 573.8331909179688, "completions/min_length": 148.0, "completions/min_terminated_length": 148.0, "epoch": 8.009332166812483, "grad_norm": 0.11669921875, "learning_rate": 1e-06, "loss": -0.0082, "num_tokens": 498541689.0, "reward": 0.5390625, "reward_std": 0.17788033187389374, "rewards/verify_math_reward/mean": 0.5390625, "rewards/verify_math_reward/std": 0.4987502098083496, "step": 857 }, { "clip_ratio/high_max": 0.0016137808270286769, "clip_ratio/high_mean": 0.0005060916400907445, "clip_ratio/low_mean": 0.00031017795913612645, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008162696040017181, "completions/clipped_ratio": 0.010044642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 2810.0, "completions/mean_length": 585.9140625, "completions/mean_terminated_length": 550.2987060546875, "completions/min_length": 167.0, "completions/min_terminated_length": 167.0, "epoch": 8.018664333624963, "grad_norm": 0.1279296875, "learning_rate": 1e-06, "loss": 0.0194, "num_tokens": 499125660.0, "reward": 0.6127232313156128, "reward_std": 0.2267257571220398, "rewards/verify_math_reward/mean": 0.6127232313156128, "rewards/verify_math_reward/std": 0.4873998463153839, "step": 858 }, { "clip_ratio/high_max": 0.0014067975353100337, "clip_ratio/high_mean": 0.0004142784707710234, "clip_ratio/low_mean": 0.000287919314928331, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007021977953627356, "completions/clipped_ratio": 0.0279017857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3589.0, "completions/mean_length": 677.8326416015625, "completions/mean_terminated_length": 579.72216796875, "completions/min_length": 105.0, "completions/min_terminated_length": 105.0, "epoch": 8.027996500437446, "grad_norm": 0.126953125, "learning_rate": 1e-06, "loss": -0.0054, "num_tokens": 499715190.0, "reward": 0.515625, "reward_std": 0.20098520815372467, "rewards/verify_math_reward/mean": 0.515625, "rewards/verify_math_reward/std": 0.5000349283218384, "step": 859 }, { "clip_ratio/high_max": 0.0015272035479938495, "clip_ratio/high_mean": 0.0004498293205870141, "clip_ratio/low_mean": 0.0002851049209766643, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.000734934239744689, "completions/clipped_ratio": 0.024553571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 4089.0, "completions/mean_length": 686.5045166015625, "completions/mean_terminated_length": 600.681884765625, "completions/min_length": 66.0, "completions/min_terminated_length": 66.0, "epoch": 8.037328667249927, "grad_norm": 0.119140625, "learning_rate": 1e-06, "loss": 0.004, "num_tokens": 500320754.0, "reward": 0.5133928656578064, "reward_std": 0.20298008620738983, "rewards/verify_math_reward/mean": 0.5133928656578064, "rewards/verify_math_reward/std": 0.500099778175354, "step": 860 }, { "clip_ratio/high_max": 0.0017860611260402948, "clip_ratio/high_mean": 0.0004994106366211781, "clip_ratio/low_mean": 0.0002945915612144745, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007940021796457586, "completions/clipped_ratio": 0.0189732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3768.0, "completions/mean_length": 623.6484375, "completions/mean_terminated_length": 556.4926147460938, "completions/min_length": 138.0, "completions/min_terminated_length": 138.0, "epoch": 8.04666083406241, "grad_norm": 0.12451171875, "learning_rate": 1e-06, "loss": 0.0032, "num_tokens": 500894959.0, "reward": 0.5926339626312256, "reward_std": 0.18242643773555756, "rewards/verify_math_reward/mean": 0.5926339030265808, "rewards/verify_math_reward/std": 0.49161848425865173, "step": 861 }, { "clip_ratio/high_max": 0.0016310338151015458, "clip_ratio/high_mean": 0.000524143026495949, "clip_ratio/low_mean": 0.0003509167015636194, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008750597244215896, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 2303.0, "completions/mean_length": 605.2600708007812, "completions/mean_terminated_length": 549.8515014648438, "completions/min_length": 103.0, "completions/min_terminated_length": 103.0, "epoch": 8.05599300087489, "grad_norm": 0.1328125, "learning_rate": 1e-06, "loss": 0.0087, "num_tokens": 501461360.0, "reward": 0.5725446939468384, "reward_std": 0.2188713699579239, "rewards/verify_math_reward/mean": 0.5725446343421936, "rewards/verify_math_reward/std": 0.49498558044433594, "step": 862 }, { "clip_ratio/high_max": 0.001550317799228651, "clip_ratio/high_mean": 0.00044595811823455733, "clip_ratio/low_mean": 0.0003373031987621289, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007832613182472414, "completions/clipped_ratio": 0.0078125, "completions/max_length": 4096.0, "completions/max_terminated_length": 3992.0, "completions/mean_length": 603.9129638671875, "completions/mean_terminated_length": 576.4161987304688, "completions/min_length": 3.0, "completions/min_terminated_length": 3.0, "epoch": 8.065325167687373, "grad_norm": 0.1259765625, "learning_rate": 1e-06, "loss": -0.0028, "num_tokens": 502078130.0, "reward": 0.5323660969734192, "reward_std": 0.19813409447669983, "rewards/verify_math_reward/mean": 0.5323660969734192, "rewards/verify_math_reward/std": 0.4992299973964691, "step": 863 }, { "clip_ratio/high_max": 0.0015432717054864042, "clip_ratio/high_mean": 0.0004493577710036334, "clip_ratio/low_mean": 0.00025994005727625336, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007092978230502922, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 2681.0, "completions/mean_length": 638.0011596679688, "completions/mean_terminated_length": 575.12841796875, "completions/min_length": 119.0, "completions/min_terminated_length": 119.0, "epoch": 8.074657334499854, "grad_norm": 0.109375, "learning_rate": 1e-06, "loss": 0.0019, "num_tokens": 502670131.0, "reward": 0.5446428656578064, "reward_std": 0.1755484640598297, "rewards/verify_math_reward/mean": 0.5446428656578064, "rewards/verify_math_reward/std": 0.4982811510562897, "step": 864 }, { "clip_ratio/high_max": 0.0013704040520678973, "clip_ratio/high_mean": 0.0003949880742766254, "clip_ratio/low_mean": 0.00026574883054308884, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0006607368941331515, "completions/clipped_ratio": 0.010044642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 2025.0, "completions/mean_length": 585.3627319335938, "completions/mean_terminated_length": 549.7418212890625, "completions/min_length": 120.0, "completions/min_terminated_length": 120.0, "epoch": 8.083989501312336, "grad_norm": 0.119140625, "learning_rate": 1e-06, "loss": 0.0025, "num_tokens": 503255176.0, "reward": 0.535714328289032, "reward_std": 0.19892504811286926, "rewards/verify_math_reward/mean": 0.5357142686843872, "rewards/verify_math_reward/std": 0.4990014135837555, "step": 865 }, { "clip_ratio/high_max": 0.001541744650239707, "clip_ratio/high_mean": 0.0004173470326804818, "clip_ratio/low_mean": 0.0003949804329295148, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008123274710669648, "completions/clipped_ratio": 0.010044642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3504.0, "completions/mean_length": 554.9542846679688, "completions/mean_terminated_length": 519.0247802734375, "completions/min_length": 148.0, "completions/min_terminated_length": 148.0, "epoch": 8.093321668124817, "grad_norm": 0.1318359375, "learning_rate": 1e-06, "loss": 0.0058, "num_tokens": 503801191.0, "reward": 0.5546875, "reward_std": 0.2021905481815338, "rewards/verify_math_reward/mean": 0.5546875, "rewards/verify_math_reward/std": 0.4972778558731079, "step": 866 }, { "clip_ratio/high_max": 0.0014549273764714599, "clip_ratio/high_mean": 0.0004922729364125189, "clip_ratio/low_mean": 0.0003672184179777105, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008594913520028058, "completions/clipped_ratio": 0.022321428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3110.0, "completions/mean_length": 664.5748291015625, "completions/mean_terminated_length": 586.231689453125, "completions/min_length": 141.0, "completions/min_terminated_length": 141.0, "epoch": 8.1026538349373, "grad_norm": 0.1240234375, "learning_rate": 1e-06, "loss": -0.005, "num_tokens": 504395434.0, "reward": 0.515625, "reward_std": 0.2237556129693985, "rewards/verify_math_reward/mean": 0.515625, "rewards/verify_math_reward/std": 0.5000349283218384, "step": 867 }, { "clip_ratio/high_max": 0.001549756049826101, "clip_ratio/high_mean": 0.0004758235231747676, "clip_ratio/low_mean": 0.00033826272351689113, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008140862391883275, "completions/clipped_ratio": 0.022321428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3846.0, "completions/mean_length": 652.9654541015625, "completions/mean_terminated_length": 574.3572998046875, "completions/min_length": 79.0, "completions/min_terminated_length": 79.0, "epoch": 8.11198600174978, "grad_norm": 0.1259765625, "learning_rate": 1e-06, "loss": 0.0048, "num_tokens": 504996371.0, "reward": 0.546875, "reward_std": 0.22138871252536774, "rewards/verify_math_reward/mean": 0.546875, "rewards/verify_math_reward/std": 0.4980759024620056, "step": 868 }, { "clip_ratio/high_max": 0.0018592611049825791, "clip_ratio/high_mean": 0.0006670099746770575, "clip_ratio/low_mean": 0.0003113480520369194, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009783580208022613, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 4070.0, "completions/mean_length": 620.5167846679688, "completions/mean_terminated_length": 573.3382568359375, "completions/min_length": 158.0, "completions/min_terminated_length": 158.0, "epoch": 8.121318168562263, "grad_norm": 0.134765625, "learning_rate": 1e-06, "loss": 0.01, "num_tokens": 505585362.0, "reward": 0.5345982313156128, "reward_std": 0.22462351620197296, "rewards/verify_math_reward/mean": 0.5345982313156128, "rewards/verify_math_reward/std": 0.4990801215171814, "step": 869 }, { "clip_ratio/high_max": 0.001431892036634963, "clip_ratio/high_mean": 0.00047108674198170775, "clip_ratio/low_mean": 0.0002798161339114813, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007509028839649545, "completions/clipped_ratio": 0.010044642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3136.0, "completions/mean_length": 600.5971069335938, "completions/mean_terminated_length": 565.1307373046875, "completions/min_length": 83.0, "completions/min_terminated_length": 83.0, "epoch": 8.130650335374744, "grad_norm": 0.1220703125, "learning_rate": 1e-06, "loss": -0.0015, "num_tokens": 506172017.0, "reward": 0.5524553656578064, "reward_std": 0.1953885406255722, "rewards/verify_math_reward/mean": 0.5524553656578064, "rewards/verify_math_reward/std": 0.49751853942871094, "step": 870 }, { "clip_ratio/high_max": 0.001366339684864215, "clip_ratio/high_mean": 0.00037103207421296247, "clip_ratio/low_mean": 0.00021886720855945896, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0005898992758375243, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3893.0, "completions/mean_length": 616.9107666015625, "completions/mean_terminated_length": 561.6870727539062, "completions/min_length": 58.0, "completions/min_terminated_length": 58.0, "epoch": 8.139982502187227, "grad_norm": 0.1103515625, "learning_rate": 1e-06, "loss": 0.0024, "num_tokens": 506762737.0, "reward": 0.5725446939468384, "reward_std": 0.14530527591705322, "rewards/verify_math_reward/mean": 0.5725446343421936, "rewards/verify_math_reward/std": 0.49498558044433594, "step": 871 }, { "clip_ratio/high_max": 0.001973413000087021, "clip_ratio/high_mean": 0.000655198623007891, "clip_ratio/low_mean": 0.00040153223619654455, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010567308781901374, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 2826.0, "completions/mean_length": 610.005615234375, "completions/mean_terminated_length": 550.6527099609375, "completions/min_length": 113.0, "completions/min_terminated_length": 113.0, "epoch": 8.149314668999708, "grad_norm": 0.138671875, "learning_rate": 1e-06, "loss": -0.0009, "num_tokens": 507344262.0, "reward": 0.5502232313156128, "reward_std": 0.2408924549818039, "rewards/verify_math_reward/mean": 0.5502232313156128, "rewards/verify_math_reward/std": 0.49774909019470215, "step": 872 }, { "clip_ratio/high_max": 0.001684861388639547, "clip_ratio/high_mean": 0.0004855446362626026, "clip_ratio/low_mean": 0.00039934766266469524, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.000884892295289319, "completions/clipped_ratio": 0.021205357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 2789.0, "completions/mean_length": 620.5480346679688, "completions/mean_terminated_length": 545.2531127929688, "completions/min_length": 134.0, "completions/min_terminated_length": 134.0, "epoch": 8.15864683581219, "grad_norm": 0.1318359375, "learning_rate": 1e-06, "loss": 0.0095, "num_tokens": 507915161.0, "reward": 0.5959821939468384, "reward_std": 0.22131451964378357, "rewards/verify_math_reward/mean": 0.5959821343421936, "rewards/verify_math_reward/std": 0.490975022315979, "step": 873 }, { "clip_ratio/high_max": 0.0015606476281391224, "clip_ratio/high_mean": 0.00042545598171273014, "clip_ratio/low_mean": 0.00034125207560009585, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007667080399187398, "completions/clipped_ratio": 0.029017857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3050.0, "completions/mean_length": 680.0346069335938, "completions/mean_terminated_length": 577.9483032226562, "completions/min_length": 90.0, "completions/min_terminated_length": 90.0, "epoch": 8.167979002624673, "grad_norm": 0.12890625, "learning_rate": 1e-06, "loss": -0.0118, "num_tokens": 508510536.0, "reward": 0.5111607313156128, "reward_std": 0.21319912374019623, "rewards/verify_math_reward/mean": 0.5111607313156128, "rewards/verify_math_reward/std": 0.5001546144485474, "step": 874 }, { "clip_ratio/high_max": 0.0017300440740655176, "clip_ratio/high_mean": 0.0005284130988911784, "clip_ratio/low_mean": 0.0004068345499490533, "clip_ratio/low_min": 9.404152478964534e-06, "clip_ratio/region_mean": 0.0009352476290587219, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 3966.0, "completions/mean_length": 580.1183471679688, "completions/mean_terminated_length": 536.4180908203125, "completions/min_length": 116.0, "completions/min_terminated_length": 116.0, "epoch": 8.177311169437154, "grad_norm": 0.140625, "learning_rate": 1e-06, "loss": -0.0019, "num_tokens": 509065762.0, "reward": 0.551339328289032, "reward_std": 0.23045022785663605, "rewards/verify_math_reward/mean": 0.5513392686843872, "rewards/verify_math_reward/std": 0.4976350665092468, "step": 875 }, { "clip_ratio/high_max": 0.0018924691394204274, "clip_ratio/high_mean": 0.0005723708322875609, "clip_ratio/low_mean": 0.0003268855334681575, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008992563516585506, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 3051.0, "completions/mean_length": 545.927490234375, "completions/mean_terminated_length": 501.8022766113281, "completions/min_length": 27.0, "completions/min_terminated_length": 27.0, "epoch": 8.186643336249636, "grad_norm": 0.1376953125, "learning_rate": 1e-06, "loss": -0.0035, "num_tokens": 509602385.0, "reward": 0.6350446939468384, "reward_std": 0.1925356090068817, "rewards/verify_math_reward/mean": 0.6350446343421936, "rewards/verify_math_reward/std": 0.481686532497406, "step": 876 }, { "clip_ratio/high_max": 0.0017362269009026932, "clip_ratio/high_mean": 0.0005923977596467012, "clip_ratio/low_mean": 0.00032261033845770726, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009150081050393055, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 3127.0, "completions/mean_length": 588.0670166015625, "completions/mean_terminated_length": 544.465576171875, "completions/min_length": 22.0, "completions/min_terminated_length": 22.0, "epoch": 8.195975503062117, "grad_norm": 0.140625, "learning_rate": 1e-06, "loss": -0.006, "num_tokens": 510183957.0, "reward": 0.5625, "reward_std": 0.2516414523124695, "rewards/verify_math_reward/mean": 0.5625, "rewards/verify_math_reward/std": 0.49635544419288635, "step": 877 }, { "clip_ratio/high_max": 0.0015896023887762567, "clip_ratio/high_mean": 0.0004748531939640088, "clip_ratio/low_mean": 0.00027660682280838955, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007514600101785618, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3736.0, "completions/mean_length": 615.380615234375, "completions/mean_terminated_length": 564.1370239257812, "completions/min_length": 29.0, "completions/min_terminated_length": 29.0, "epoch": 8.2053076698746, "grad_norm": 0.1279296875, "learning_rate": 1e-06, "loss": 0.0083, "num_tokens": 510775410.0, "reward": 0.5479910969734192, "reward_std": 0.20804892480373383, "rewards/verify_math_reward/mean": 0.5479910969734192, "rewards/verify_math_reward/std": 0.49796950817108154, "step": 878 }, { "clip_ratio/high_max": 0.00159483886363887, "clip_ratio/high_mean": 0.0004746732116700514, "clip_ratio/low_mean": 0.00036302139142208034, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008376946052521816, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3666.0, "completions/mean_length": 620.8092041015625, "completions/mean_terminated_length": 565.6473999023438, "completions/min_length": 75.0, "completions/min_terminated_length": 75.0, "epoch": 8.21463983668708, "grad_norm": 0.12890625, "learning_rate": 1e-06, "loss": 0.009, "num_tokens": 511366687.0, "reward": 0.5758928656578064, "reward_std": 0.2101939469575882, "rewards/verify_math_reward/mean": 0.5758928656578064, "rewards/verify_math_reward/std": 0.49448272585868835, "step": 879 }, { "clip_ratio/high_max": 0.001336234889095067, "clip_ratio/high_mean": 0.0004071303610544419, "clip_ratio/low_mean": 0.00035937676511821337, "clip_ratio/low_min": 1.193659318232676e-05, "clip_ratio/region_mean": 0.0007665071143492241, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3234.0, "completions/mean_length": 662.5580444335938, "completions/mean_terminated_length": 608.0589599609375, "completions/min_length": 146.0, "completions/min_terminated_length": 146.0, "epoch": 8.223972003499563, "grad_norm": 0.12451171875, "learning_rate": 1e-06, "loss": 0.0062, "num_tokens": 511992147.0, "reward": 0.4933035969734192, "reward_std": 0.2275192141532898, "rewards/verify_math_reward/mean": 0.4933035671710968, "rewards/verify_math_reward/std": 0.5002344250679016, "step": 880 }, { "clip_ratio/high_max": 0.0013866669723938685, "clip_ratio/high_mean": 0.0004217528789922653, "clip_ratio/low_mean": 0.00035159288358954655, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007733457623544382, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 2742.0, "completions/mean_length": 579.2767944335938, "completions/mean_terminated_length": 539.5846557617188, "completions/min_length": 103.0, "completions/min_terminated_length": 103.0, "epoch": 8.233304170312044, "grad_norm": 0.1298828125, "learning_rate": 1e-06, "loss": 0.0022, "num_tokens": 512565643.0, "reward": 0.5334821939468384, "reward_std": 0.20963124930858612, "rewards/verify_math_reward/mean": 0.5334821343421936, "rewards/verify_math_reward/std": 0.49915632605552673, "step": 881 }, { "clip_ratio/high_max": 0.0015374592385342112, "clip_ratio/high_mean": 0.00041051021048588154, "clip_ratio/low_mean": 0.0003006694947202959, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007111797053767077, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 2941.0, "completions/mean_length": 630.0580444335938, "completions/mean_terminated_length": 571.0465698242188, "completions/min_length": 123.0, "completions/min_terminated_length": 123.0, "epoch": 8.242636337124527, "grad_norm": 0.1181640625, "learning_rate": 1e-06, "loss": -0.0128, "num_tokens": 513172247.0, "reward": 0.5066964626312256, "reward_std": 0.1846049576997757, "rewards/verify_math_reward/mean": 0.5066964030265808, "rewards/verify_math_reward/std": 0.5002344250679016, "step": 882 }, { "clip_ratio/high_max": 0.001493399352511915, "clip_ratio/high_mean": 0.00046457714313419274, "clip_ratio/low_mean": 0.0003219285948716788, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007865057505114237, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3827.0, "completions/mean_length": 592.3683471679688, "completions/mean_terminated_length": 540.7859497070312, "completions/min_length": 3.0, "completions/min_terminated_length": 3.0, "epoch": 8.251968503937007, "grad_norm": 0.1298828125, "learning_rate": 1e-06, "loss": 0.0062, "num_tokens": 513739473.0, "reward": 0.5234375, "reward_std": 0.20418290793895721, "rewards/verify_math_reward/mean": 0.5234375, "rewards/verify_math_reward/std": 0.49972933530807495, "step": 883 }, { "clip_ratio/high_max": 0.0017472907547926297, "clip_ratio/high_mean": 0.000569048528291205, "clip_ratio/low_mean": 0.00032808198898237606, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008971305105660576, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 2687.0, "completions/mean_length": 614.0592041015625, "completions/mean_terminated_length": 558.790283203125, "completions/min_length": 115.0, "completions/min_terminated_length": 115.0, "epoch": 8.26130067074949, "grad_norm": 0.130859375, "learning_rate": 1e-06, "loss": -0.0027, "num_tokens": 514315126.0, "reward": 0.546875, "reward_std": 0.23694662749767303, "rewards/verify_math_reward/mean": 0.546875, "rewards/verify_math_reward/std": 0.4980759024620056, "step": 884 }, { "clip_ratio/high_max": 0.001324815347288677, "clip_ratio/high_mean": 0.00040013637385527545, "clip_ratio/low_mean": 0.00033200555776602414, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007321419179788791, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3376.0, "completions/mean_length": 603.4777221679688, "completions/mean_terminated_length": 556.06787109375, "completions/min_length": 23.0, "completions/min_terminated_length": 23.0, "epoch": 8.27063283756197, "grad_norm": 0.12060546875, "learning_rate": 1e-06, "loss": 0.0027, "num_tokens": 514904130.0, "reward": 0.5580357313156128, "reward_std": 0.1940016895532608, "rewards/verify_math_reward/mean": 0.5580357313156128, "rewards/verify_math_reward/std": 0.49689781665802, "step": 885 }, { "clip_ratio/high_max": 0.00172890922294755, "clip_ratio/high_mean": 0.0005497201705111365, "clip_ratio/low_mean": 0.0003320003434055252, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.000881720520737872, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3352.0, "completions/mean_length": 633.2824096679688, "completions/mean_terminated_length": 578.318603515625, "completions/min_length": 7.0, "completions/min_terminated_length": 7.0, "epoch": 8.279965004374453, "grad_norm": 0.130859375, "learning_rate": 1e-06, "loss": -0.0013, "num_tokens": 515504991.0, "reward": 0.5290178656578064, "reward_std": 0.22721359133720398, "rewards/verify_math_reward/mean": 0.5290178656578064, "rewards/verify_math_reward/std": 0.49943605065345764, "step": 886 }, { "clip_ratio/high_max": 0.0017464477114117472, "clip_ratio/high_mean": 0.0005554093470436783, "clip_ratio/low_mean": 0.000385710791533711, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009411201281182002, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3482.0, "completions/mean_length": 611.4553833007812, "completions/mean_terminated_length": 556.1451416015625, "completions/min_length": 126.0, "completions/min_terminated_length": 126.0, "epoch": 8.289297171186934, "grad_norm": 0.1396484375, "learning_rate": 1e-06, "loss": -0.0111, "num_tokens": 516080047.0, "reward": 0.5792410969734192, "reward_std": 0.2399599701166153, "rewards/verify_math_reward/mean": 0.5792410969734192, "rewards/verify_math_reward/std": 0.49395665526390076, "step": 887 }, { "clip_ratio/high_max": 0.0013682266153409728, "clip_ratio/high_mean": 0.00042073265478848043, "clip_ratio/low_mean": 0.00032234091304417234, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007430735649904818, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 2794.0, "completions/mean_length": 623.6864013671875, "completions/mean_terminated_length": 560.5534057617188, "completions/min_length": 152.0, "completions/min_terminated_length": 152.0, "epoch": 8.298629337999417, "grad_norm": 0.11328125, "learning_rate": 1e-06, "loss": -0.0049, "num_tokens": 516663334.0, "reward": 0.5580357313156128, "reward_std": 0.19422556459903717, "rewards/verify_math_reward/mean": 0.5580357313156128, "rewards/verify_math_reward/std": 0.49689778685569763, "step": 888 }, { "clip_ratio/high_max": 0.0017015608918882208, "clip_ratio/high_mean": 0.0005660527640429791, "clip_ratio/low_mean": 0.00033774187238577724, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009037946379066852, "completions/clipped_ratio": 0.006696428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 2412.0, "completions/mean_length": 571.3158569335938, "completions/mean_terminated_length": 547.553955078125, "completions/min_length": 127.0, "completions/min_terminated_length": 127.0, "epoch": 8.307961504811898, "grad_norm": 0.12890625, "learning_rate": 1e-06, "loss": -0.0025, "num_tokens": 517237145.0, "reward": 0.629464328289032, "reward_std": 0.22267001867294312, "rewards/verify_math_reward/mean": 0.6294642686843872, "rewards/verify_math_reward/std": 0.4832179844379425, "step": 889 }, { "clip_ratio/high_max": 0.001672685058110801, "clip_ratio/high_mean": 0.0005197747411784803, "clip_ratio/low_mean": 0.00034178967371190083, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008615644155725022, "completions/clipped_ratio": 0.008928571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 4022.0, "completions/mean_length": 606.5770263671875, "completions/mean_terminated_length": 575.1408081054688, "completions/min_length": 97.0, "completions/min_terminated_length": 97.0, "epoch": 8.31729367162438, "grad_norm": 0.1337890625, "learning_rate": 1e-06, "loss": 0.0063, "num_tokens": 517841462.0, "reward": 0.5580357313156128, "reward_std": 0.20351210236549377, "rewards/verify_math_reward/mean": 0.5580357313156128, "rewards/verify_math_reward/std": 0.49689778685569763, "step": 890 }, { "clip_ratio/high_max": 0.0017001188316498883, "clip_ratio/high_mean": 0.0004989069307157479, "clip_ratio/low_mean": 0.0003887211655637657, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008876280899130506, "completions/clipped_ratio": 0.022321428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3648.0, "completions/mean_length": 647.0201416015625, "completions/mean_terminated_length": 568.2762451171875, "completions/min_length": 73.0, "completions/min_terminated_length": 73.0, "epoch": 8.326625838436861, "grad_norm": 0.12890625, "learning_rate": 1e-06, "loss": 0.0004, "num_tokens": 518435216.0, "reward": 0.5234375, "reward_std": 0.2187972068786621, "rewards/verify_math_reward/mean": 0.5234375, "rewards/verify_math_reward/std": 0.49972933530807495, "step": 891 }, { "clip_ratio/high_max": 0.0013056448278803146, "clip_ratio/high_mean": 0.0003652055950169597, "clip_ratio/low_mean": 0.00026464711004337005, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.000629852710517298, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 3523.0, "completions/mean_length": 560.3660888671875, "completions/mean_terminated_length": 520.4605102539062, "completions/min_length": 120.0, "completions/min_terminated_length": 120.0, "epoch": 8.335958005249344, "grad_norm": 0.1201171875, "learning_rate": 1e-06, "loss": 0.0142, "num_tokens": 518984328.0, "reward": 0.5245535969734192, "reward_std": 0.16578476130962372, "rewards/verify_math_reward/mean": 0.5245535969734192, "rewards/verify_math_reward/std": 0.4996756613254547, "step": 892 }, { "clip_ratio/high_max": 0.0015380546774395043, "clip_ratio/high_mean": 0.0004793887990217627, "clip_ratio/low_mean": 0.0004007861812169722, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008801749663689407, "completions/clipped_ratio": 0.0189732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 2718.0, "completions/mean_length": 621.4654541015625, "completions/mean_terminated_length": 554.267333984375, "completions/min_length": 75.0, "completions/min_terminated_length": 75.0, "epoch": 8.345290172061826, "grad_norm": 0.134765625, "learning_rate": 1e-06, "loss": 0.0033, "num_tokens": 519552665.0, "reward": 0.5390625, "reward_std": 0.2326594591140747, "rewards/verify_math_reward/mean": 0.5390625, "rewards/verify_math_reward/std": 0.4987502098083496, "step": 893 }, { "clip_ratio/high_max": 0.001598576205651625, "clip_ratio/high_mean": 0.0004480711572796281, "clip_ratio/low_mean": 0.00033639293883425125, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007844641063456947, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3772.0, "completions/mean_length": 589.677490234375, "completions/mean_terminated_length": 534.0215454101562, "completions/min_length": 129.0, "completions/min_terminated_length": 129.0, "epoch": 8.354622338874307, "grad_norm": 0.1240234375, "learning_rate": 1e-06, "loss": 0.0049, "num_tokens": 520117168.0, "reward": 0.559151828289032, "reward_std": 0.1829584687948227, "rewards/verify_math_reward/mean": 0.5591517686843872, "rewards/verify_math_reward/std": 0.496766060590744, "step": 894 }, { "clip_ratio/high_max": 0.0017098172429541592, "clip_ratio/high_mean": 0.0004787050828554129, "clip_ratio/low_mean": 0.0003222945865672955, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008009996477085224, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 3288.0, "completions/mean_length": 630.203125, "completions/mean_terminated_length": 587.1254272460938, "completions/min_length": 140.0, "completions/min_terminated_length": 140.0, "epoch": 8.36395450568679, "grad_norm": 0.130859375, "learning_rate": 1e-06, "loss": -0.0095, "num_tokens": 520720246.0, "reward": 0.5133928656578064, "reward_std": 0.23247580230236053, "rewards/verify_math_reward/mean": 0.5133928656578064, "rewards/verify_math_reward/std": 0.500099778175354, "step": 895 }, { "clip_ratio/high_max": 0.001812557320590713, "clip_ratio/high_mean": 0.0005399327089889994, "clip_ratio/low_mean": 0.00036290496541369066, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009028376880451106, "completions/clipped_ratio": 0.010044642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 2805.0, "completions/mean_length": 595.1596069335938, "completions/mean_terminated_length": 559.6380615234375, "completions/min_length": 112.0, "completions/min_terminated_length": 112.0, "epoch": 8.37328667249927, "grad_norm": 0.1337890625, "learning_rate": 1e-06, "loss": -0.0006, "num_tokens": 521300029.0, "reward": 0.5736607313156128, "reward_std": 0.22007529437541962, "rewards/verify_math_reward/mean": 0.5736607313156128, "rewards/verify_math_reward/std": 0.4948205351829529, "step": 896 }, { "clip_ratio/high_max": 0.0017018866819853429, "clip_ratio/high_mean": 0.0005414952763658221, "clip_ratio/low_mean": 0.00036395597271621227, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009054512547663762, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3087.0, "completions/mean_length": 579.1640625, "completions/mean_terminated_length": 523.3412475585938, "completions/min_length": 111.0, "completions/min_terminated_length": 111.0, "epoch": 8.382618839311753, "grad_norm": 0.1416015625, "learning_rate": 1e-06, "loss": 0.0156, "num_tokens": 521862336.0, "reward": 0.5524553656578064, "reward_std": 0.23157496750354767, "rewards/verify_math_reward/mean": 0.5524553656578064, "rewards/verify_math_reward/std": 0.49751853942871094, "step": 897 }, { "clip_ratio/high_max": 0.0017398423597114743, "clip_ratio/high_mean": 0.0004925803399373763, "clip_ratio/low_mean": 0.0002706510886127944, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007632314345755731, "completions/clipped_ratio": 0.021205357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3169.0, "completions/mean_length": 619.138427734375, "completions/mean_terminated_length": 543.81298828125, "completions/min_length": 146.0, "completions/min_terminated_length": 146.0, "epoch": 8.391951006124234, "grad_norm": 0.1279296875, "learning_rate": 1e-06, "loss": -0.0002, "num_tokens": 522416276.0, "reward": 0.5926339626312256, "reward_std": 0.17652484774589539, "rewards/verify_math_reward/mean": 0.5926339030265808, "rewards/verify_math_reward/std": 0.49161848425865173, "step": 898 }, { "clip_ratio/high_max": 0.0018027021014859201, "clip_ratio/high_mean": 0.0005538426175917266, "clip_ratio/low_mean": 0.000342303311242631, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008961459388956428, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 4075.0, "completions/mean_length": 581.357177734375, "completions/mean_terminated_length": 517.4545288085938, "completions/min_length": 99.0, "completions/min_terminated_length": 99.0, "epoch": 8.401283172936717, "grad_norm": 0.140625, "learning_rate": 1e-06, "loss": -0.0162, "num_tokens": 522963740.0, "reward": 0.6149553656578064, "reward_std": 0.194911390542984, "rewards/verify_math_reward/mean": 0.6149553656578064, "rewards/verify_math_reward/std": 0.4868776500225067, "step": 899 }, { "clip_ratio/high_max": 0.001807417580494075, "clip_ratio/high_mean": 0.0005836440525399667, "clip_ratio/low_mean": 0.00032918493911893165, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009128289784712251, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3649.0, "completions/mean_length": 598.7299194335938, "completions/mean_terminated_length": 539.18505859375, "completions/min_length": 131.0, "completions/min_terminated_length": 131.0, "epoch": 8.410615339749198, "grad_norm": 0.1328125, "learning_rate": 1e-06, "loss": 0.0139, "num_tokens": 523519162.0, "reward": 0.5881696939468384, "reward_std": 0.24081578850746155, "rewards/verify_math_reward/mean": 0.5881696343421936, "rewards/verify_math_reward/std": 0.4924395978450775, "step": 900 }, { "clip_ratio/high_max": 0.001764618269589846, "clip_ratio/high_mean": 0.0006486259676421469, "clip_ratio/low_mean": 0.00039002792368592054, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001038653904288367, "completions/clipped_ratio": 0.0078125, "completions/max_length": 4096.0, "completions/max_terminated_length": 3339.0, "completions/mean_length": 594.5267944335938, "completions/mean_terminated_length": 566.9561767578125, "completions/min_length": 83.0, "completions/min_terminated_length": 83.0, "epoch": 8.41994750656168, "grad_norm": 0.140625, "learning_rate": 1e-06, "loss": -0.006, "num_tokens": 524106306.0, "reward": 0.5714285969734192, "reward_std": 0.27230456471443176, "rewards/verify_math_reward/mean": 0.5714285969734192, "rewards/verify_math_reward/std": 0.49514806270599365, "step": 901 }, { "clip_ratio/high_max": 0.0017868690065370174, "clip_ratio/high_mean": 0.0005425563986136694, "clip_ratio/low_mean": 0.00028931020460731816, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008318666150444187, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3925.0, "completions/mean_length": 599.1808471679688, "completions/mean_terminated_length": 535.6022338867188, "completions/min_length": 99.0, "completions/min_terminated_length": 99.0, "epoch": 8.429279673374161, "grad_norm": 0.130859375, "learning_rate": 1e-06, "loss": 0.0081, "num_tokens": 524658764.0, "reward": 0.6383928656578064, "reward_std": 0.2141416221857071, "rewards/verify_math_reward/mean": 0.6383928656578064, "rewards/verify_math_reward/std": 0.4807341694831848, "step": 902 }, { "clip_ratio/high_max": 0.0019628958307293942, "clip_ratio/high_mean": 0.0005937970076956844, "clip_ratio/low_mean": 0.00027018056778160826, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008639775796837057, "completions/clipped_ratio": 0.0234375, "completions/max_length": 4096.0, "completions/max_terminated_length": 2488.0, "completions/mean_length": 624.4788208007812, "completions/mean_terminated_length": 541.1622924804688, "completions/min_length": 84.0, "completions/min_terminated_length": 84.0, "epoch": 8.438611840186644, "grad_norm": 0.130859375, "learning_rate": 1e-06, "loss": 0.0016, "num_tokens": 525219713.0, "reward": 0.5636160969734192, "reward_std": 0.20820976793766022, "rewards/verify_math_reward/mean": 0.5636160969734192, "rewards/verify_math_reward/std": 0.49621346592903137, "step": 903 }, { "clip_ratio/high_max": 0.0018190665196016198, "clip_ratio/high_mean": 0.0005865290181645832, "clip_ratio/low_mean": 0.00030423904979670624, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008907680658012396, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 3736.0, "completions/mean_length": 621.4375, "completions/mean_terminated_length": 582.2212524414062, "completions/min_length": 80.0, "completions/min_terminated_length": 80.0, "epoch": 8.447944006999125, "grad_norm": 0.12890625, "learning_rate": 1e-06, "loss": -0.0116, "num_tokens": 525832361.0, "reward": 0.5613839626312256, "reward_std": 0.2127137929201126, "rewards/verify_math_reward/mean": 0.5613839030265808, "rewards/verify_math_reward/std": 0.496494859457016, "step": 904 }, { "clip_ratio/high_max": 0.0014852175208943663, "clip_ratio/high_mean": 0.0004753192365569703, "clip_ratio/low_mean": 0.0003577102679628297, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008330295122505049, "completions/clipped_ratio": 0.0078125, "completions/max_length": 4096.0, "completions/max_terminated_length": 3497.0, "completions/mean_length": 591.8460083007812, "completions/mean_terminated_length": 564.2542114257812, "completions/min_length": 109.0, "completions/min_terminated_length": 109.0, "epoch": 8.457276173811607, "grad_norm": 0.1328125, "learning_rate": 1e-06, "loss": -0.0043, "num_tokens": 526429847.0, "reward": 0.5948660969734192, "reward_std": 0.22135479748249054, "rewards/verify_math_reward/mean": 0.5948660969734192, "rewards/verify_math_reward/std": 0.49119213223457336, "step": 905 }, { "clip_ratio/high_max": 0.0015947114407026675, "clip_ratio/high_mean": 0.0005013735790271312, "clip_ratio/low_mean": 0.0003463989635292819, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008477725486955023, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3874.0, "completions/mean_length": 601.2801513671875, "completions/mean_terminated_length": 545.8084106445312, "completions/min_length": 4.0, "completions/min_terminated_length": 4.0, "epoch": 8.466608340624088, "grad_norm": 0.1279296875, "learning_rate": 1e-06, "loss": -0.0073, "num_tokens": 526997170.0, "reward": 0.582589328289032, "reward_std": 0.2290155589580536, "rewards/verify_math_reward/mean": 0.5825892686843872, "rewards/verify_math_reward/std": 0.493407279253006, "step": 906 }, { "clip_ratio/high_max": 0.0015031150460345089, "clip_ratio/high_mean": 0.0004442498941443773, "clip_ratio/low_mean": 0.00031960154296939436, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007638514266545826, "completions/clipped_ratio": 0.0189732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 4062.0, "completions/mean_length": 638.3671875, "completions/mean_terminated_length": 571.4959716796875, "completions/min_length": 19.0, "completions/min_terminated_length": 19.0, "epoch": 8.47594050743657, "grad_norm": 0.1259765625, "learning_rate": 1e-06, "loss": -0.0051, "num_tokens": 527584963.0, "reward": 0.515625, "reward_std": 0.21602025628089905, "rewards/verify_math_reward/mean": 0.515625, "rewards/verify_math_reward/std": 0.5000349283218384, "step": 907 }, { "clip_ratio/high_max": 0.0016060644757089904, "clip_ratio/high_mean": 0.00042741316269712115, "clip_ratio/low_mean": 0.0004014176520286128, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008288308108603815, "completions/clipped_ratio": 0.021205357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3937.0, "completions/mean_length": 672.1986694335938, "completions/mean_terminated_length": 598.0227661132812, "completions/min_length": 145.0, "completions/min_terminated_length": 145.0, "epoch": 8.485272674249051, "grad_norm": 0.1259765625, "learning_rate": 1e-06, "loss": 0.0025, "num_tokens": 528192661.0, "reward": 0.527901828289032, "reward_std": 0.20783139765262604, "rewards/verify_math_reward/mean": 0.5279017686843872, "rewards/verify_math_reward/std": 0.49949970841407776, "step": 908 }, { "clip_ratio/high_max": 0.0017307647158304462, "clip_ratio/high_mean": 0.0005103231901557592, "clip_ratio/low_mean": 0.0003590532381849698, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008693764434610785, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 2978.0, "completions/mean_length": 589.1830444335938, "completions/mean_terminated_length": 545.5955200195312, "completions/min_length": 162.0, "completions/min_terminated_length": 162.0, "epoch": 8.494604841061534, "grad_norm": 0.1259765625, "learning_rate": 1e-06, "loss": 0.0098, "num_tokens": 528761569.0, "reward": 0.5546875, "reward_std": 0.21060903370380402, "rewards/verify_math_reward/mean": 0.5546875, "rewards/verify_math_reward/std": 0.4972778558731079, "step": 909 }, { "clip_ratio/high_max": 0.0014862043444736628, "clip_ratio/high_mean": 0.000418830484704813, "clip_ratio/low_mean": 0.0003197139348003475, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007385444196188473, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 3903.0, "completions/mean_length": 613.7210083007812, "completions/mean_terminated_length": 574.4176025390625, "completions/min_length": 150.0, "completions/min_terminated_length": 150.0, "epoch": 8.503937007874015, "grad_norm": 0.12353515625, "learning_rate": 1e-06, "loss": -0.003, "num_tokens": 529370567.0, "reward": 0.551339328289032, "reward_std": 0.1918955147266388, "rewards/verify_math_reward/mean": 0.5513392686843872, "rewards/verify_math_reward/std": 0.4976350665092468, "step": 910 }, { "clip_ratio/high_max": 0.0019107366424577776, "clip_ratio/high_mean": 0.0005375444156925369, "clip_ratio/low_mean": 0.0003362418125334443, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008737862326597678, "completions/clipped_ratio": 0.008928571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 2913.0, "completions/mean_length": 535.8973388671875, "completions/mean_terminated_length": 503.8243408203125, "completions/min_length": 88.0, "completions/min_terminated_length": 88.0, "epoch": 8.513269174686497, "grad_norm": 0.142578125, "learning_rate": 1e-06, "loss": 0.0118, "num_tokens": 529906819.0, "reward": 0.637276828289032, "reward_std": 0.21214716136455536, "rewards/verify_math_reward/mean": 0.6372767686843872, "rewards/verify_math_reward/std": 0.481054425239563, "step": 911 }, { "clip_ratio/high_max": 0.001241295964064193, "clip_ratio/high_mean": 0.0003295468716260075, "clip_ratio/low_mean": 0.0003396690580075301, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0006692159340673243, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 2876.0, "completions/mean_length": 642.2723388671875, "completions/mean_terminated_length": 591.4246826171875, "completions/min_length": 106.0, "completions/min_terminated_length": 106.0, "epoch": 8.52260134149898, "grad_norm": 0.107421875, "learning_rate": 1e-06, "loss": 0.0041, "num_tokens": 530523247.0, "reward": 0.4743303656578064, "reward_std": 0.18013623356819153, "rewards/verify_math_reward/mean": 0.4743303656578064, "rewards/verify_math_reward/std": 0.4996195435523987, "step": 912 }, { "clip_ratio/high_max": 0.0017086550105887, "clip_ratio/high_mean": 0.0005698565296370361, "clip_ratio/low_mean": 0.00038646468397018907, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009563212051943992, "completions/clipped_ratio": 0.021205357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3678.0, "completions/mean_length": 650.2902221679688, "completions/mean_terminated_length": 575.6396484375, "completions/min_length": 21.0, "completions/min_terminated_length": 21.0, "epoch": 8.531933508311461, "grad_norm": 0.1376953125, "learning_rate": 1e-06, "loss": -0.0207, "num_tokens": 531113371.0, "reward": 0.5245535969734192, "reward_std": 0.2622414231300354, "rewards/verify_math_reward/mean": 0.5245535969734192, "rewards/verify_math_reward/std": 0.4996756613254547, "step": 913 }, { "clip_ratio/high_max": 0.001687218486040365, "clip_ratio/high_mean": 0.0005116880656714784, "clip_ratio/low_mean": 0.00029640564264354907, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008080937104750774, "completions/clipped_ratio": 0.010044642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3667.0, "completions/mean_length": 553.7745971679688, "completions/mean_terminated_length": 517.8331298828125, "completions/min_length": 85.0, "completions/min_terminated_length": 85.0, "epoch": 8.541265675123944, "grad_norm": 0.130859375, "learning_rate": 1e-06, "loss": 0.0119, "num_tokens": 531666953.0, "reward": 0.598214328289032, "reward_std": 0.2105737179517746, "rewards/verify_math_reward/mean": 0.5982142686843872, "rewards/verify_math_reward/std": 0.49053287506103516, "step": 914 }, { "clip_ratio/high_max": 0.0017619947393541224, "clip_ratio/high_mean": 0.0005559896108024986, "clip_ratio/low_mean": 0.00040431408615404507, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009603036887710914, "completions/clipped_ratio": 0.006696428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 1687.0, "completions/mean_length": 547.2835083007812, "completions/mean_terminated_length": 523.3595581054688, "completions/min_length": 96.0, "completions/min_terminated_length": 96.0, "epoch": 8.550597841936424, "grad_norm": 0.142578125, "learning_rate": 1e-06, "loss": 0.0063, "num_tokens": 532221319.0, "reward": 0.5848214626312256, "reward_std": 0.24525383114814758, "rewards/verify_math_reward/mean": 0.5848214030265808, "rewards/verify_math_reward/std": 0.49302801489830017, "step": 915 }, { "clip_ratio/high_max": 0.0018598817950987723, "clip_ratio/high_mean": 0.000556495856926631, "clip_ratio/low_mean": 0.0002807374728490686, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008372333286388312, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3952.0, "completions/mean_length": 689.4676513671875, "completions/mean_terminated_length": 639.3148193359375, "completions/min_length": 163.0, "completions/min_terminated_length": 163.0, "epoch": 8.559930008748907, "grad_norm": 0.11328125, "learning_rate": 1e-06, "loss": -0.0036, "num_tokens": 532884850.0, "reward": 0.463169664144516, "reward_std": 0.20293870568275452, "rewards/verify_math_reward/mean": 0.4631696343421936, "rewards/verify_math_reward/std": 0.49892017245292664, "step": 916 }, { "clip_ratio/high_max": 0.0014546530601364793, "clip_ratio/high_mean": 0.0003646654126896465, "clip_ratio/low_mean": 0.0003260940266045509, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0006907594361109659, "completions/clipped_ratio": 0.021205357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3705.0, "completions/mean_length": 644.5960083007812, "completions/mean_terminated_length": 569.8220825195312, "completions/min_length": 6.0, "completions/min_terminated_length": 6.0, "epoch": 8.569262175561388, "grad_norm": 0.12353515625, "learning_rate": 1e-06, "loss": 0.007, "num_tokens": 533472992.0, "reward": 0.5323660969734192, "reward_std": 0.18359534442424774, "rewards/verify_math_reward/mean": 0.5323660969734192, "rewards/verify_math_reward/std": 0.4992299973964691, "step": 917 }, { "clip_ratio/high_max": 0.0015795235085533932, "clip_ratio/high_mean": 0.00046314455744322913, "clip_ratio/low_mean": 0.0003992133807741993, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008623579442428309, "completions/clipped_ratio": 0.006696428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 2971.0, "completions/mean_length": 577.5100708007812, "completions/mean_terminated_length": 553.7899169921875, "completions/min_length": 14.0, "completions/min_terminated_length": 14.0, "epoch": 8.57859434237387, "grad_norm": 0.1328125, "learning_rate": 1e-06, "loss": -0.0053, "num_tokens": 534059841.0, "reward": 0.5245535969734192, "reward_std": 0.2202172726392746, "rewards/verify_math_reward/mean": 0.5245535969734192, "rewards/verify_math_reward/std": 0.4996756911277771, "step": 918 }, { "clip_ratio/high_max": 0.0017360732308588922, "clip_ratio/high_mean": 0.0004725495975890226, "clip_ratio/low_mean": 0.0003561232105084855, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008286728188977577, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 4052.0, "completions/mean_length": 590.1506958007812, "completions/mean_terminated_length": 534.5022583007812, "completions/min_length": 147.0, "completions/min_terminated_length": 147.0, "epoch": 8.587926509186351, "grad_norm": 0.1279296875, "learning_rate": 1e-06, "loss": 0.0038, "num_tokens": 534626792.0, "reward": 0.578125, "reward_std": 0.21685144305229187, "rewards/verify_math_reward/mean": 0.578125, "rewards/verify_math_reward/std": 0.4941346049308777, "step": 919 }, { "clip_ratio/high_max": 0.001625804466129921, "clip_ratio/high_mean": 0.0004773313697796766, "clip_ratio/low_mean": 0.0003072223100843985, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007845536779313989, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3969.0, "completions/mean_length": 633.7678833007812, "completions/mean_terminated_length": 578.8118286132812, "completions/min_length": 11.0, "completions/min_terminated_length": 11.0, "epoch": 8.597258675998834, "grad_norm": 0.123046875, "learning_rate": 1e-06, "loss": -0.0061, "num_tokens": 535220032.0, "reward": 0.5055803656578064, "reward_std": 0.2061368077993393, "rewards/verify_math_reward/mean": 0.5055803656578064, "rewards/verify_math_reward/std": 0.5002480745315552, "step": 920 }, { "clip_ratio/high_max": 0.0016396561859437497, "clip_ratio/high_mean": 0.00047457321988986223, "clip_ratio/low_mean": 0.00030810346515863785, "clip_ratio/low_min": 6.510416824312415e-06, "clip_ratio/region_mean": 0.0007826766786820372, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3387.0, "completions/mean_length": 604.4319458007812, "completions/mean_terminated_length": 540.9488525390625, "completions/min_length": 11.0, "completions/min_terminated_length": 11.0, "epoch": 8.606590842811315, "grad_norm": 0.12158203125, "learning_rate": 1e-06, "loss": 0.0067, "num_tokens": 535778747.0, "reward": 0.574776828289032, "reward_std": 0.20955273509025574, "rewards/verify_math_reward/mean": 0.5747767686843872, "rewards/verify_math_reward/std": 0.49465295672416687, "step": 921 }, { "clip_ratio/high_max": 0.001730506738567783, "clip_ratio/high_mean": 0.0004712760774054914, "clip_ratio/low_mean": 0.00031183343355678517, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007831095163055579, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 2737.0, "completions/mean_length": 622.1160888671875, "completions/mean_terminated_length": 570.9716796875, "completions/min_length": 128.0, "completions/min_terminated_length": 128.0, "epoch": 8.615923009623797, "grad_norm": 0.1259765625, "learning_rate": 1e-06, "loss": 0.0122, "num_tokens": 536367027.0, "reward": 0.5089285969734192, "reward_std": 0.21117106080055237, "rewards/verify_math_reward/mean": 0.5089285969734192, "rewards/verify_math_reward/std": 0.5001994967460632, "step": 922 }, { "clip_ratio/high_max": 0.0016254750898951897, "clip_ratio/high_mean": 0.0004993629991076887, "clip_ratio/low_mean": 0.0003328687778321182, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008322317871716223, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 2945.0, "completions/mean_length": 619.6920166015625, "completions/mean_terminated_length": 564.512451171875, "completions/min_length": 132.0, "completions/min_terminated_length": 132.0, "epoch": 8.625255176436278, "grad_norm": 0.13671875, "learning_rate": 1e-06, "loss": 0.0072, "num_tokens": 536961111.0, "reward": 0.5636160969734192, "reward_std": 0.21432778239250183, "rewards/verify_math_reward/mean": 0.5636160969734192, "rewards/verify_math_reward/std": 0.49621346592903137, "step": 923 }, { "clip_ratio/high_max": 0.001721602246107068, "clip_ratio/high_mean": 0.0005219461934302672, "clip_ratio/low_mean": 0.00037469376366061624, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008966399600467412, "completions/clipped_ratio": 0.010044642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3136.0, "completions/mean_length": 544.1953125, "completions/mean_terminated_length": 508.15667724609375, "completions/min_length": 12.0, "completions/min_terminated_length": 12.0, "epoch": 8.63458734324876, "grad_norm": 0.1455078125, "learning_rate": 1e-06, "loss": -0.0005, "num_tokens": 537504166.0, "reward": 0.5256696939468384, "reward_std": 0.2325085699558258, "rewards/verify_math_reward/mean": 0.5256696343421936, "rewards/verify_math_reward/std": 0.4996195435523987, "step": 924 }, { "clip_ratio/high_max": 0.0015194864399745711, "clip_ratio/high_mean": 0.0003873891405419272, "clip_ratio/low_mean": 0.0002880939252918324, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0006754830624231545, "completions/clipped_ratio": 0.0200892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3740.0, "completions/mean_length": 638.0569458007812, "completions/mean_terminated_length": 567.1651611328125, "completions/min_length": 120.0, "completions/min_terminated_length": 120.0, "epoch": 8.643919510061242, "grad_norm": 0.1181640625, "learning_rate": 1e-06, "loss": -0.0067, "num_tokens": 538094113.0, "reward": 0.5178571939468384, "reward_std": 0.17461372911930084, "rewards/verify_math_reward/mean": 0.5178571343421936, "rewards/verify_math_reward/std": 0.4999600946903229, "step": 925 }, { "clip_ratio/high_max": 0.0017375241059198743, "clip_ratio/high_mean": 0.0005021859069529455, "clip_ratio/low_mean": 0.0003631916688391357, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008653775994389434, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 3134.0, "completions/mean_length": 587.3660888671875, "completions/mean_terminated_length": 547.7652587890625, "completions/min_length": 96.0, "completions/min_terminated_length": 96.0, "epoch": 8.653251676873724, "grad_norm": 0.1337890625, "learning_rate": 1e-06, "loss": 0.0153, "num_tokens": 538667313.0, "reward": 0.559151828289032, "reward_std": 0.20756672322750092, "rewards/verify_math_reward/mean": 0.5591517686843872, "rewards/verify_math_reward/std": 0.496766060590744, "step": 926 }, { "clip_ratio/high_max": 0.0016017565640140674, "clip_ratio/high_mean": 0.0005030020404319657, "clip_ratio/low_mean": 0.0002697615398119524, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007727635820629075, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 2242.0, "completions/mean_length": 576.4017944335938, "completions/mean_terminated_length": 516.4767456054688, "completions/min_length": 57.0, "completions/min_terminated_length": 57.0, "epoch": 8.662583843686207, "grad_norm": 0.12255859375, "learning_rate": 1e-06, "loss": 0.0042, "num_tokens": 539213545.0, "reward": 0.5703125, "reward_std": 0.18716463446617126, "rewards/verify_math_reward/mean": 0.5703125, "rewards/verify_math_reward/std": 0.49530795216560364, "step": 927 }, { "clip_ratio/high_max": 0.0015024595559225418, "clip_ratio/high_mean": 0.00048641150249295606, "clip_ratio/low_mean": 0.0004322054810472764, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009186169900203822, "completions/clipped_ratio": 0.0200892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 2956.0, "completions/mean_length": 636.2377319335938, "completions/mean_terminated_length": 565.3086547851562, "completions/min_length": 22.0, "completions/min_terminated_length": 22.0, "epoch": 8.671916010498688, "grad_norm": 0.1279296875, "learning_rate": 1e-06, "loss": 0.0102, "num_tokens": 539798990.0, "reward": 0.527901828289032, "reward_std": 0.23334342241287231, "rewards/verify_math_reward/mean": 0.5279017686843872, "rewards/verify_math_reward/std": 0.49949970841407776, "step": 928 }, { "clip_ratio/high_max": 0.0017354858355247416, "clip_ratio/high_mean": 0.0004994459477529745, "clip_ratio/low_mean": 0.00038305372936520143, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008824996702969656, "completions/clipped_ratio": 0.0200892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3582.0, "completions/mean_length": 636.1439819335938, "completions/mean_terminated_length": 565.2130126953125, "completions/min_length": 150.0, "completions/min_terminated_length": 150.0, "epoch": 8.68124817731117, "grad_norm": 0.140625, "learning_rate": 1e-06, "loss": 0.0171, "num_tokens": 540383943.0, "reward": 0.5569196939468384, "reward_std": 0.21733295917510986, "rewards/verify_math_reward/mean": 0.5569196343421936, "rewards/verify_math_reward/std": 0.49702703952789307, "step": 929 }, { "clip_ratio/high_max": 0.001857759898484801, "clip_ratio/high_mean": 0.0005302980175656558, "clip_ratio/low_mean": 0.0004215425344682444, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009518405659036944, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 2731.0, "completions/mean_length": 608.7756958007812, "completions/mean_terminated_length": 549.40185546875, "completions/min_length": 100.0, "completions/min_terminated_length": 100.0, "epoch": 8.690580344123651, "grad_norm": 0.142578125, "learning_rate": 1e-06, "loss": -0.0039, "num_tokens": 540954278.0, "reward": 0.5848214626312256, "reward_std": 0.23435191810131073, "rewards/verify_math_reward/mean": 0.5848214030265808, "rewards/verify_math_reward/std": 0.49302801489830017, "step": 930 }, { "clip_ratio/high_max": 0.0019121235836792039, "clip_ratio/high_mean": 0.0005733485695600393, "clip_ratio/low_mean": 0.0002883373324493732, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.000861685887684871, "completions/clipped_ratio": 0.0200892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 2975.0, "completions/mean_length": 681.7667846679688, "completions/mean_terminated_length": 611.7711181640625, "completions/min_length": 143.0, "completions/min_terminated_length": 143.0, "epoch": 8.699912510936134, "grad_norm": 0.11767578125, "learning_rate": 1e-06, "loss": -0.0076, "num_tokens": 541584853.0, "reward": 0.5089285969734192, "reward_std": 0.214814230799675, "rewards/verify_math_reward/mean": 0.5089285969734192, "rewards/verify_math_reward/std": 0.5001994967460632, "step": 931 }, { "clip_ratio/high_max": 0.0017335543361696182, "clip_ratio/high_mean": 0.0005107215254156472, "clip_ratio/low_mean": 0.00042112849746445136, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009318500187873724, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 2328.0, "completions/mean_length": 611.8170166015625, "completions/mean_terminated_length": 552.4949340820312, "completions/min_length": 4.0, "completions/min_terminated_length": 4.0, "epoch": 8.709244677748615, "grad_norm": 0.138671875, "learning_rate": 1e-06, "loss": -0.0003, "num_tokens": 542165209.0, "reward": 0.5368303656578064, "reward_std": 0.21086977422237396, "rewards/verify_math_reward/mean": 0.5368303656578064, "rewards/verify_math_reward/std": 0.49892017245292664, "step": 932 }, { "clip_ratio/high_max": 0.0015037285702419467, "clip_ratio/high_mean": 0.0003934235918450213, "clip_ratio/low_mean": 0.00019559146517167392, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0005890150528102822, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 2591.0, "completions/mean_length": 662.232177734375, "completions/mean_terminated_length": 599.7999877929688, "completions/min_length": 142.0, "completions/min_terminated_length": 142.0, "epoch": 8.718576844561097, "grad_norm": 0.1142578125, "learning_rate": 1e-06, "loss": 0.002, "num_tokens": 542781721.0, "reward": 0.5658482313156128, "reward_std": 0.1543617695569992, "rewards/verify_math_reward/mean": 0.5658482313156128, "rewards/verify_math_reward/std": 0.49592188000679016, "step": 933 }, { "clip_ratio/high_max": 0.0014332880964502692, "clip_ratio/high_mean": 0.000394102932318674, "clip_ratio/low_mean": 0.0003036897494439472, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0006977926789204503, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 3637.0, "completions/mean_length": 633.5926513671875, "completions/mean_terminated_length": 590.5570678710938, "completions/min_length": 89.0, "completions/min_terminated_length": 89.0, "epoch": 8.727909011373578, "grad_norm": 0.1142578125, "learning_rate": 1e-06, "loss": 0.0142, "num_tokens": 543390532.0, "reward": 0.5011160969734192, "reward_std": 0.1767512857913971, "rewards/verify_math_reward/mean": 0.5011160969734192, "rewards/verify_math_reward/std": 0.5002779960632324, "step": 934 }, { "clip_ratio/high_max": 0.0018787748595059384, "clip_ratio/high_mean": 0.0005562072410612018, "clip_ratio/low_mean": 0.000399612187038656, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009558194296914735, "completions/clipped_ratio": 0.0189732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 2410.0, "completions/mean_length": 611.9888916015625, "completions/mean_terminated_length": 544.6074829101562, "completions/min_length": 8.0, "completions/min_terminated_length": 8.0, "epoch": 8.73724117818606, "grad_norm": 0.13671875, "learning_rate": 1e-06, "loss": 0.0181, "num_tokens": 543948706.0, "reward": 0.5602678656578064, "reward_std": 0.21763533353805542, "rewards/verify_math_reward/mean": 0.5602678656578064, "rewards/verify_math_reward/std": 0.4966317415237427, "step": 935 }, { "clip_ratio/high_max": 0.0017085592990042642, "clip_ratio/high_mean": 0.0005232095326164199, "clip_ratio/low_mean": 0.00034663820576952276, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008698477295183693, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3782.0, "completions/mean_length": 607.091552734375, "completions/mean_terminated_length": 551.7120361328125, "completions/min_length": 105.0, "completions/min_terminated_length": 105.0, "epoch": 8.746573344998541, "grad_norm": 0.1328125, "learning_rate": 1e-06, "loss": 0.0103, "num_tokens": 544523428.0, "reward": 0.5569196939468384, "reward_std": 0.1906581073999405, "rewards/verify_math_reward/mean": 0.5569196343421936, "rewards/verify_math_reward/std": 0.4970270097255707, "step": 936 }, { "clip_ratio/high_max": 0.0014151353498164099, "clip_ratio/high_mean": 0.0003932183233246178, "clip_ratio/low_mean": 0.00035517708670340653, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007483954195777187, "completions/clipped_ratio": 0.010044642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3904.0, "completions/mean_length": 596.171875, "completions/mean_terminated_length": 560.66064453125, "completions/min_length": 112.0, "completions/min_terminated_length": 112.0, "epoch": 8.755905511811024, "grad_norm": 0.1240234375, "learning_rate": 1e-06, "loss": 0.0133, "num_tokens": 545113950.0, "reward": 0.5881696939468384, "reward_std": 0.2212025374174118, "rewards/verify_math_reward/mean": 0.5881696343421936, "rewards/verify_math_reward/std": 0.4924395978450775, "step": 937 }, { "clip_ratio/high_max": 0.0016391848967032274, "clip_ratio/high_mean": 0.0005453678943467821, "clip_ratio/low_mean": 0.0004193331365058839, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009647010410844814, "completions/clipped_ratio": 0.024553571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3744.0, "completions/mean_length": 668.4866333007812, "completions/mean_terminated_length": 582.2105102539062, "completions/min_length": 145.0, "completions/min_terminated_length": 145.0, "epoch": 8.765237678623505, "grad_norm": 0.13671875, "learning_rate": 1e-06, "loss": 0.0002, "num_tokens": 545709474.0, "reward": 0.5390625, "reward_std": 0.23616454005241394, "rewards/verify_math_reward/mean": 0.5390625, "rewards/verify_math_reward/std": 0.4987502098083496, "step": 938 }, { "clip_ratio/high_max": 0.0018827369021892082, "clip_ratio/high_mean": 0.0005811306814393902, "clip_ratio/low_mean": 0.00035539788541427697, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.000936528564125183, "completions/clipped_ratio": 0.008928571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3390.0, "completions/mean_length": 523.2377319335938, "completions/mean_terminated_length": 491.0506896972656, "completions/min_length": 64.0, "completions/min_terminated_length": 64.0, "epoch": 8.774569845435988, "grad_norm": 0.15234375, "learning_rate": 1e-06, "loss": 0.0044, "num_tokens": 546229503.0, "reward": 0.621651828289032, "reward_std": 0.22936254739761353, "rewards/verify_math_reward/mean": 0.6216517686843872, "rewards/verify_math_reward/std": 0.4852459728717804, "step": 939 }, { "clip_ratio/high_max": 0.0014341303749461076, "clip_ratio/high_mean": 0.0003922824550954829, "clip_ratio/low_mean": 0.0003290212607680587, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007213037156361679, "completions/clipped_ratio": 0.0323660714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3679.0, "completions/mean_length": 675.1517944335938, "completions/mean_terminated_length": 560.7289428710938, "completions/min_length": 111.0, "completions/min_terminated_length": 111.0, "epoch": 8.783902012248468, "grad_norm": 0.1279296875, "learning_rate": 1e-06, "loss": -0.0016, "num_tokens": 546809335.0, "reward": 0.5424107313156128, "reward_std": 0.17394223809242249, "rewards/verify_math_reward/mean": 0.5424107313156128, "rewards/verify_math_reward/std": 0.4984763562679291, "step": 940 }, { "clip_ratio/high_max": 0.001455961568353814, "clip_ratio/high_mean": 0.00040871775604500726, "clip_ratio/low_mean": 0.00029301418430804915, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007017319389888144, "completions/clipped_ratio": 0.008928571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 2651.0, "completions/mean_length": 573.099365234375, "completions/mean_terminated_length": 541.3615112304688, "completions/min_length": 143.0, "completions/min_terminated_length": 143.0, "epoch": 8.793234179060951, "grad_norm": 0.11572265625, "learning_rate": 1e-06, "loss": -0.0069, "num_tokens": 547369128.0, "reward": 0.6004464626312256, "reward_std": 0.205833300948143, "rewards/verify_math_reward/mean": 0.6004464030265808, "rewards/verify_math_reward/std": 0.49008017778396606, "step": 941 }, { "clip_ratio/high_max": 0.001447814769562683, "clip_ratio/high_mean": 0.0004180887731308758, "clip_ratio/low_mean": 0.00031519942911018006, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007332881918955536, "completions/clipped_ratio": 0.0200892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3975.0, "completions/mean_length": 619.9967041015625, "completions/mean_terminated_length": 548.734619140625, "completions/min_length": 149.0, "completions/min_terminated_length": 149.0, "epoch": 8.802566345873432, "grad_norm": 0.115234375, "learning_rate": 1e-06, "loss": -0.0099, "num_tokens": 547936013.0, "reward": 0.543526828289032, "reward_std": 0.1766757071018219, "rewards/verify_math_reward/mean": 0.5435267686843872, "rewards/verify_math_reward/std": 0.49838000535964966, "step": 942 }, { "clip_ratio/high_max": 0.0017402688117726939, "clip_ratio/high_mean": 0.0005268613151656609, "clip_ratio/low_mean": 0.0003360545338182419, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008629158592157182, "completions/clipped_ratio": 0.006696428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3254.0, "completions/mean_length": 568.5, "completions/mean_terminated_length": 544.7191162109375, "completions/min_length": 100.0, "completions/min_terminated_length": 100.0, "epoch": 8.811898512685914, "grad_norm": 0.1279296875, "learning_rate": 1e-06, "loss": 0.0036, "num_tokens": 548510949.0, "reward": 0.59375, "reward_std": 0.21969692409038544, "rewards/verify_math_reward/mean": 0.59375, "rewards/verify_math_reward/std": 0.4914066195487976, "step": 943 }, { "clip_ratio/high_max": 0.001513290316779603, "clip_ratio/high_mean": 0.0004044888418093251, "clip_ratio/low_mean": 0.0002366639280353411, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0006411527729142108, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 2697.0, "completions/mean_length": 634.3314819335938, "completions/mean_terminated_length": 579.3843383789062, "completions/min_length": 120.0, "completions/min_terminated_length": 120.0, "epoch": 8.821230679498395, "grad_norm": 0.1044921875, "learning_rate": 1e-06, "loss": -0.0125, "num_tokens": 549107558.0, "reward": 0.5636160969734192, "reward_std": 0.17803147435188293, "rewards/verify_math_reward/mean": 0.5636160969734192, "rewards/verify_math_reward/std": 0.49621346592903137, "step": 944 }, { "clip_ratio/high_max": 0.0013947562583780382, "clip_ratio/high_mean": 0.00038034964654798387, "clip_ratio/low_mean": 0.0002776464652924915, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.000657996119116433, "completions/clipped_ratio": 0.021205357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 4080.0, "completions/mean_length": 652.15625, "completions/mean_terminated_length": 577.546142578125, "completions/min_length": 127.0, "completions/min_terminated_length": 127.0, "epoch": 8.830562846310878, "grad_norm": 0.11474609375, "learning_rate": 1e-06, "loss": 0.0109, "num_tokens": 549697938.0, "reward": 0.5189732313156128, "reward_std": 0.18644829094409943, "rewards/verify_math_reward/mean": 0.5189732313156128, "rewards/verify_math_reward/std": 0.49991893768310547, "step": 945 }, { "clip_ratio/high_max": 0.0016548984131077304, "clip_ratio/high_mean": 0.0004959760540259595, "clip_ratio/low_mean": 0.0003435495328858451, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008395255986215489, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 2153.0, "completions/mean_length": 575.107177734375, "completions/mean_terminated_length": 519.219970703125, "completions/min_length": 161.0, "completions/min_terminated_length": 161.0, "epoch": 8.83989501312336, "grad_norm": 0.126953125, "learning_rate": 1e-06, "loss": -0.0065, "num_tokens": 550239634.0, "reward": 0.5993303656578064, "reward_std": 0.20339125394821167, "rewards/verify_math_reward/mean": 0.5993303656578064, "rewards/verify_math_reward/std": 0.49030786752700806, "step": 946 }, { "clip_ratio/high_max": 0.0016845199843373848, "clip_ratio/high_mean": 0.0005099602622067323, "clip_ratio/low_mean": 0.0003712894717864401, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008812497280814569, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 4045.0, "completions/mean_length": 640.068115234375, "completions/mean_terminated_length": 581.22705078125, "completions/min_length": 150.0, "completions/min_terminated_length": 150.0, "epoch": 8.849227179935841, "grad_norm": 0.125, "learning_rate": 1e-06, "loss": 0.0171, "num_tokens": 550834215.0, "reward": 0.5345982313156128, "reward_std": 0.21740712225437164, "rewards/verify_math_reward/mean": 0.5345982313156128, "rewards/verify_math_reward/std": 0.4990801215171814, "step": 947 }, { "clip_ratio/high_max": 0.0016339108569809468, "clip_ratio/high_mean": 0.0005902967563997663, "clip_ratio/low_mean": 0.00034587329855639837, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009361700476802071, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3251.0, "completions/mean_length": 654.1239013671875, "completions/mean_terminated_length": 595.5221557617188, "completions/min_length": 95.0, "completions/min_terminated_length": 95.0, "epoch": 8.858559346748324, "grad_norm": 0.1298828125, "learning_rate": 1e-06, "loss": 0.0037, "num_tokens": 551448990.0, "reward": 0.5535714626312256, "reward_std": 0.2292533665895462, "rewards/verify_math_reward/mean": 0.5535714030265808, "rewards/verify_math_reward/std": 0.4973995089530945, "step": 948 }, { "clip_ratio/high_max": 0.0015780063531565247, "clip_ratio/high_mean": 0.0004718756790680345, "clip_ratio/low_mean": 0.00032239909978670767, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007942747856759524, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3677.0, "completions/mean_length": 621.9564819335938, "completions/mean_terminated_length": 558.7920532226562, "completions/min_length": 132.0, "completions/min_terminated_length": 132.0, "epoch": 8.867891513560805, "grad_norm": 0.1396484375, "learning_rate": 1e-06, "loss": 0.0164, "num_tokens": 552027495.0, "reward": 0.5390625, "reward_std": 0.22352877259254456, "rewards/verify_math_reward/mean": 0.5390625, "rewards/verify_math_reward/std": 0.4987502098083496, "step": 949 }, { "clip_ratio/high_max": 0.0014388215749931987, "clip_ratio/high_mean": 0.0004559162789519178, "clip_ratio/low_mean": 0.0002821906871304236, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007381069628991099, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 2787.0, "completions/mean_length": 616.154052734375, "completions/mean_terminated_length": 568.9163208007812, "completions/min_length": 17.0, "completions/min_terminated_length": 17.0, "epoch": 8.877223680373287, "grad_norm": 0.12353515625, "learning_rate": 1e-06, "loss": -0.0091, "num_tokens": 552620473.0, "reward": 0.5558035969734192, "reward_std": 0.21075919270515442, "rewards/verify_math_reward/mean": 0.5558035969734192, "rewards/verify_math_reward/std": 0.49715372920036316, "step": 950 }, { "clip_ratio/high_max": 0.0016259537251244183, "clip_ratio/high_mean": 0.0005211812249399372, "clip_ratio/low_mean": 0.0003111157900548278, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008322970115841599, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 2260.0, "completions/mean_length": 577.6864013671875, "completions/mean_terminated_length": 537.976318359375, "completions/min_length": 22.0, "completions/min_terminated_length": 22.0, "epoch": 8.886555847185768, "grad_norm": 0.1259765625, "learning_rate": 1e-06, "loss": -0.0057, "num_tokens": 553191160.0, "reward": 0.5212053656578064, "reward_std": 0.19238333404064178, "rewards/verify_math_reward/mean": 0.5212053656578064, "rewards/verify_math_reward/std": 0.49982914328575134, "step": 951 }, { "clip_ratio/high_max": 0.001612585181646864, "clip_ratio/high_mean": 0.0005515304546861444, "clip_ratio/low_mean": 0.00037551792172507703, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009270483924410655, "completions/clipped_ratio": 0.005580357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3260.0, "completions/mean_length": 585.6529541015625, "completions/mean_terminated_length": 565.9539794921875, "completions/min_length": 151.0, "completions/min_terminated_length": 151.0, "epoch": 8.89588801399825, "grad_norm": 0.1298828125, "learning_rate": 1e-06, "loss": 0.0, "num_tokens": 553785473.0, "reward": 0.5558035969734192, "reward_std": 0.24123060703277588, "rewards/verify_math_reward/mean": 0.5558035969734192, "rewards/verify_math_reward/std": 0.49715372920036316, "step": 952 }, { "clip_ratio/high_max": 0.0016049309633672237, "clip_ratio/high_mean": 0.000490850989763203, "clip_ratio/low_mean": 0.0003469750622571155, "clip_ratio/low_min": 1.0735142495832406e-05, "clip_ratio/region_mean": 0.000837826057249913, "completions/clipped_ratio": 0.008928571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 2830.0, "completions/mean_length": 571.872802734375, "completions/mean_terminated_length": 540.1239013671875, "completions/min_length": 11.0, "completions/min_terminated_length": 11.0, "epoch": 8.905220180810732, "grad_norm": 0.1328125, "learning_rate": 1e-06, "loss": 0.0052, "num_tokens": 554356279.0, "reward": 0.5636160969734192, "reward_std": 0.21624736487865448, "rewards/verify_math_reward/mean": 0.5636160969734192, "rewards/verify_math_reward/std": 0.49621346592903137, "step": 953 }, { "clip_ratio/high_max": 0.001735047599140671, "clip_ratio/high_mean": 0.0005252989367363625, "clip_ratio/low_mean": 0.0003084070613112999, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.00083370600532362, "completions/clipped_ratio": 0.005580357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 4056.0, "completions/mean_length": 573.0792846679688, "completions/mean_terminated_length": 553.309814453125, "completions/min_length": 122.0, "completions/min_terminated_length": 122.0, "epoch": 8.914552347623214, "grad_norm": 0.119140625, "learning_rate": 1e-06, "loss": 0.0082, "num_tokens": 554940510.0, "reward": 0.5814732313156128, "reward_std": 0.20305635035037994, "rewards/verify_math_reward/mean": 0.5814732313156128, "rewards/verify_math_reward/std": 0.4935929775238037, "step": 954 }, { "clip_ratio/high_max": 0.0015454856838914566, "clip_ratio/high_mean": 0.0005216419679072715, "clip_ratio/low_mean": 0.0003173057908725241, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008389477634409559, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 2679.0, "completions/mean_length": 617.6864013671875, "completions/mean_terminated_length": 574.453125, "completions/min_length": 114.0, "completions/min_terminated_length": 114.0, "epoch": 8.923884514435695, "grad_norm": 0.1201171875, "learning_rate": 1e-06, "loss": 0.0184, "num_tokens": 555532621.0, "reward": 0.5502232313156128, "reward_std": 0.21462947130203247, "rewards/verify_math_reward/mean": 0.5502232313156128, "rewards/verify_math_reward/std": 0.49774909019470215, "step": 955 }, { "clip_ratio/high_max": 0.001508560942056647, "clip_ratio/high_mean": 0.00044567480642854207, "clip_ratio/low_mean": 0.000273632164976334, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007193069659479079, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 2332.0, "completions/mean_length": 634.8984375, "completions/mean_terminated_length": 575.9693603515625, "completions/min_length": 47.0, "completions/min_terminated_length": 47.0, "epoch": 8.933216681248178, "grad_norm": 0.1298828125, "learning_rate": 1e-06, "loss": 0.0043, "num_tokens": 556130282.0, "reward": 0.5345982313156128, "reward_std": 0.19155851006507874, "rewards/verify_math_reward/mean": 0.5345982313156128, "rewards/verify_math_reward/std": 0.4990801215171814, "step": 956 }, { "clip_ratio/high_max": 0.0013757557690041722, "clip_ratio/high_mean": 0.00043258950836388976, "clip_ratio/low_mean": 0.00033068390450807783, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007632734059370705, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3216.0, "completions/mean_length": 643.578125, "completions/mean_terminated_length": 584.796875, "completions/min_length": 74.0, "completions/min_terminated_length": 74.0, "epoch": 8.942548848060659, "grad_norm": 0.1298828125, "learning_rate": 1e-06, "loss": -0.0049, "num_tokens": 556737168.0, "reward": 0.5033482313156128, "reward_std": 0.2127547711133957, "rewards/verify_math_reward/mean": 0.5033482313156128, "rewards/verify_math_reward/std": 0.5002680420875549, "step": 957 }, { "clip_ratio/high_max": 0.0016217505508393515, "clip_ratio/high_mean": 0.000501149289902969, "clip_ratio/low_mean": 0.00035087403148281737, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008520233359377016, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3266.0, "completions/mean_length": 660.2511596679688, "completions/mean_terminated_length": 597.782958984375, "completions/min_length": 114.0, "completions/min_terminated_length": 114.0, "epoch": 8.951881014873141, "grad_norm": 0.119140625, "learning_rate": 1e-06, "loss": 0.0019, "num_tokens": 557358465.0, "reward": 0.5133928656578064, "reward_std": 0.20249292254447937, "rewards/verify_math_reward/mean": 0.5133928656578064, "rewards/verify_math_reward/std": 0.500099778175354, "step": 958 }, { "clip_ratio/high_max": 0.001791343160221004, "clip_ratio/high_mean": 0.0005761517129485583, "clip_ratio/low_mean": 0.0003766217503198277, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009527734773655538, "completions/clipped_ratio": 0.0189732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3468.0, "completions/mean_length": 620.9453125, "completions/mean_terminated_length": 553.7371826171875, "completions/min_length": 136.0, "completions/min_terminated_length": 136.0, "epoch": 8.961213181685622, "grad_norm": 0.1337890625, "learning_rate": 1e-06, "loss": -0.0078, "num_tokens": 557936408.0, "reward": 0.5870535969734192, "reward_std": 0.23052439093589783, "rewards/verify_math_reward/mean": 0.5870535969734192, "rewards/verify_math_reward/std": 0.49263834953308105, "step": 959 }, { "clip_ratio/high_max": 0.0017940017096407246, "clip_ratio/high_mean": 0.0005707343857466185, "clip_ratio/low_mean": 0.00037262102762269933, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.000943355388699274, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3319.0, "completions/mean_length": 611.4710083007812, "completions/mean_terminated_length": 564.169677734375, "completions/min_length": 149.0, "completions/min_terminated_length": 149.0, "epoch": 8.970545348498105, "grad_norm": 0.1328125, "learning_rate": 1e-06, "loss": 0.0152, "num_tokens": 558531854.0, "reward": 0.5546875, "reward_std": 0.21853618323802948, "rewards/verify_math_reward/mean": 0.5546875, "rewards/verify_math_reward/std": 0.4972778558731079, "step": 960 }, { "clip_ratio/high_max": 0.0013003966305404902, "clip_ratio/high_mean": 0.00037684658400394255, "clip_ratio/low_mean": 0.0002891404033107392, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0006659869865188739, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 3764.0, "completions/mean_length": 584.8381958007812, "completions/mean_terminated_length": 545.2088012695312, "completions/min_length": 3.0, "completions/min_terminated_length": 3.0, "epoch": 8.979877515310585, "grad_norm": 0.12255859375, "learning_rate": 1e-06, "loss": 0.0018, "num_tokens": 559103469.0, "reward": 0.6194196939468384, "reward_std": 0.18588444590568542, "rewards/verify_math_reward/mean": 0.6194196343421936, "rewards/verify_math_reward/std": 0.48580074310302734, "step": 961 }, { "clip_ratio/high_max": 0.0017467885804762773, "clip_ratio/high_mean": 0.0005397544890684003, "clip_ratio/low_mean": 0.00033204105056938715, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008717955324755167, "completions/clipped_ratio": 0.021205357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3104.0, "completions/mean_length": 635.724365234375, "completions/mean_terminated_length": 560.7582397460938, "completions/min_length": 130.0, "completions/min_terminated_length": 130.0, "epoch": 8.989209682123068, "grad_norm": 0.138671875, "learning_rate": 1e-06, "loss": -0.0193, "num_tokens": 559692526.0, "reward": 0.5055803656578064, "reward_std": 0.2199697643518448, "rewards/verify_math_reward/mean": 0.5055803656578064, "rewards/verify_math_reward/std": 0.5002480745315552, "step": 962 }, { "clip_ratio/high_max": 0.0016496466269018129, "clip_ratio/high_mean": 0.00047832449854467995, "clip_ratio/low_mean": 0.00039233260883975163, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008706570988579188, "completions/clipped_ratio": 0.014204545454545414, "completions/max_length": 4096.0, "completions/max_terminated_length": 3891.0, "completions/mean_length": 637.0966186523438, "completions/mean_terminated_length": 587.2564697265625, "completions/min_length": 86.0, "completions/min_terminated_length": 86.0, "epoch": 8.998541848935549, "grad_norm": 0.126953125, "learning_rate": 1e-06, "loss": 0.0017, "num_tokens": 560307979.0, "reward": 0.4854910969734192, "reward_std": 0.21729834377765656, "rewards/verify_math_reward/mean": 0.4854910671710968, "rewards/verify_math_reward/std": 0.5000686049461365, "step": 963 }, { "clip_ratio/high_max": 0.0015838597200854565, "clip_ratio/high_mean": 0.000544780214227103, "clip_ratio/low_mean": 0.00034177267389168264, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008865528870956041, "completions/clipped_ratio": 0.008928571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 4056.0, "completions/mean_length": 635.2299194335938, "completions/mean_terminated_length": 604.0518188476562, "completions/min_length": 148.0, "completions/min_terminated_length": 148.0, "epoch": 9.009332166812483, "grad_norm": 0.1298828125, "learning_rate": 1e-06, "loss": 0.0011, "num_tokens": 560936585.0, "reward": 0.5167410969734192, "reward_std": 0.23157496750354767, "rewards/verify_math_reward/mean": 0.5167410969734192, "rewards/verify_math_reward/std": 0.4999987483024597, "step": 964 }, { "clip_ratio/high_max": 0.0015604611680828384, "clip_ratio/high_mean": 0.00045380116466731124, "clip_ratio/low_mean": 0.0002617700657765454, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007155712255553226, "completions/clipped_ratio": 0.010044642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 2156.0, "completions/mean_length": 559.2467041015625, "completions/mean_terminated_length": 523.3607177734375, "completions/min_length": 134.0, "completions/min_terminated_length": 134.0, "epoch": 9.018664333624963, "grad_norm": 0.1328125, "learning_rate": 1e-06, "loss": 0.0098, "num_tokens": 561486790.0, "reward": 0.6149553656578064, "reward_std": 0.18280622363090515, "rewards/verify_math_reward/mean": 0.6149553656578064, "rewards/verify_math_reward/std": 0.4868776500225067, "step": 965 }, { "clip_ratio/high_max": 0.0018859688234442729, "clip_ratio/high_mean": 0.0005737175087006108, "clip_ratio/low_mean": 0.0003881120976529928, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.000961829596235475, "completions/clipped_ratio": 0.022321428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 2468.0, "completions/mean_length": 606.96875, "completions/mean_terminated_length": 527.3104858398438, "completions/min_length": 126.0, "completions/min_terminated_length": 126.0, "epoch": 9.027996500437446, "grad_norm": 0.1416015625, "learning_rate": 1e-06, "loss": 0.0067, "num_tokens": 562045386.0, "reward": 0.5502232313156128, "reward_std": 0.208427295088768, "rewards/verify_math_reward/mean": 0.5502232313156128, "rewards/verify_math_reward/std": 0.49774909019470215, "step": 966 }, { "clip_ratio/high_max": 0.0016008473776309984, "clip_ratio/high_mean": 0.0004954031207944354, "clip_ratio/low_mean": 0.0003710629986244385, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008664661168040766, "completions/clipped_ratio": 0.022321428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 2614.0, "completions/mean_length": 650.2589721679688, "completions/mean_terminated_length": 571.5889892578125, "completions/min_length": 140.0, "completions/min_terminated_length": 140.0, "epoch": 9.037328667249927, "grad_norm": 0.1416015625, "learning_rate": 1e-06, "loss": 0.0006, "num_tokens": 562636498.0, "reward": 0.5446428656578064, "reward_std": 0.239656463265419, "rewards/verify_math_reward/mean": 0.5446428656578064, "rewards/verify_math_reward/std": 0.49828118085861206, "step": 967 }, { "clip_ratio/high_max": 0.001595270895450085, "clip_ratio/high_mean": 0.0004952483645865868, "clip_ratio/low_mean": 0.0003451965735621343, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008404449472436681, "completions/clipped_ratio": 0.010044642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 4066.0, "completions/mean_length": 602.5960083007812, "completions/mean_terminated_length": 567.14990234375, "completions/min_length": 122.0, "completions/min_terminated_length": 122.0, "epoch": 9.04666083406241, "grad_norm": 0.1279296875, "learning_rate": 1e-06, "loss": -0.0029, "num_tokens": 563245408.0, "reward": 0.5033482313156128, "reward_std": 0.23548056185245514, "rewards/verify_math_reward/mean": 0.5033482313156128, "rewards/verify_math_reward/std": 0.5002680420875549, "step": 968 }, { "clip_ratio/high_max": 0.001770732998920721, "clip_ratio/high_mean": 0.0006120005477896484, "clip_ratio/low_mean": 0.000363428881200889, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009754294169397326, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 2194.0, "completions/mean_length": 598.794677734375, "completions/mean_terminated_length": 559.3228149414062, "completions/min_length": 98.0, "completions/min_terminated_length": 98.0, "epoch": 9.05599300087489, "grad_norm": 0.1396484375, "learning_rate": 1e-06, "loss": 0.0073, "num_tokens": 563825608.0, "reward": 0.5502232313156128, "reward_std": 0.2438220977783203, "rewards/verify_math_reward/mean": 0.5502232313156128, "rewards/verify_math_reward/std": 0.49774909019470215, "step": 969 }, { "clip_ratio/high_max": 0.0019437053215369815, "clip_ratio/high_mean": 0.0006528858348247013, "clip_ratio/low_mean": 0.0003233873865156056, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009762732279341435, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 3323.0, "completions/mean_length": 558.6975708007812, "completions/mean_terminated_length": 518.7731323242188, "completions/min_length": 139.0, "completions/min_terminated_length": 139.0, "epoch": 9.065325167687373, "grad_norm": 0.1318359375, "learning_rate": 1e-06, "loss": 0.0132, "num_tokens": 564369769.0, "reward": 0.5803571939468384, "reward_std": 0.2322482466697693, "rewards/verify_math_reward/mean": 0.5803571343421936, "rewards/verify_math_reward/std": 0.4937761425971985, "step": 970 }, { "clip_ratio/high_max": 0.0016274704685201868, "clip_ratio/high_mean": 0.0005316773524555174, "clip_ratio/low_mean": 0.000334410725372436, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008660880839670426, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3861.0, "completions/mean_length": 652.6596069335938, "completions/mean_terminated_length": 594.032958984375, "completions/min_length": 121.0, "completions/min_terminated_length": 121.0, "epoch": 9.074657334499854, "grad_norm": 0.1318359375, "learning_rate": 1e-06, "loss": 0.006, "num_tokens": 564985520.0, "reward": 0.512276828289032, "reward_std": 0.22646865248680115, "rewards/verify_math_reward/mean": 0.5122767686843872, "rewards/verify_math_reward/std": 0.500128448009491, "step": 971 }, { "clip_ratio/high_max": 0.00183391165046487, "clip_ratio/high_mean": 0.0005753153495788865, "clip_ratio/low_mean": 0.0003095582123933127, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008848735687934095, "completions/clipped_ratio": 0.006696428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3624.0, "completions/mean_length": 563.9375, "completions/mean_terminated_length": 540.1258544921875, "completions/min_length": 11.0, "completions/min_terminated_length": 11.0, "epoch": 9.083989501312336, "grad_norm": 0.13671875, "learning_rate": 1e-06, "loss": -0.0083, "num_tokens": 565549152.0, "reward": 0.590401828289032, "reward_std": 0.23093554377555847, "rewards/verify_math_reward/mean": 0.5904017686843872, "rewards/verify_math_reward/std": 0.49203425645828247, "step": 972 }, { "clip_ratio/high_max": 0.0016102681729535107, "clip_ratio/high_mean": 0.0005075933613625239, "clip_ratio/low_mean": 0.00030177611552062444, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008093694632407278, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 3285.0, "completions/mean_length": 599.513427734375, "completions/mean_terminated_length": 560.0496826171875, "completions/min_length": 5.0, "completions/min_terminated_length": 5.0, "epoch": 9.093321668124817, "grad_norm": 0.11962890625, "learning_rate": 1e-06, "loss": 0.0017, "num_tokens": 566133380.0, "reward": 0.5736607313156128, "reward_std": 0.188932403922081, "rewards/verify_math_reward/mean": 0.5736607313156128, "rewards/verify_math_reward/std": 0.4948205351829529, "step": 973 }, { "clip_ratio/high_max": 0.0014658632471764577, "clip_ratio/high_mean": 0.0004125052128074458, "clip_ratio/low_mean": 0.00030730440084880684, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.000719809624570189, "completions/clipped_ratio": 0.0200892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3616.0, "completions/mean_length": 664.513427734375, "completions/mean_terminated_length": 594.1640625, "completions/min_length": 138.0, "completions/min_terminated_length": 138.0, "epoch": 9.1026538349373, "grad_norm": 0.1259765625, "learning_rate": 1e-06, "loss": -0.0052, "num_tokens": 566735168.0, "reward": 0.5089285969734192, "reward_std": 0.20343543589115143, "rewards/verify_math_reward/mean": 0.5089285969734192, "rewards/verify_math_reward/std": 0.5001994967460632, "step": 974 }, { "clip_ratio/high_max": 0.0014222874833649257, "clip_ratio/high_mean": 0.0003877350225138798, "clip_ratio/low_mean": 0.000343247693081139, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007309827069548192, "completions/clipped_ratio": 0.0189732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3809.0, "completions/mean_length": 635.6395263671875, "completions/mean_terminated_length": 568.715576171875, "completions/min_length": 130.0, "completions/min_terminated_length": 130.0, "epoch": 9.11198600174978, "grad_norm": 0.12890625, "learning_rate": 1e-06, "loss": -0.0026, "num_tokens": 567340581.0, "reward": 0.4799107313156128, "reward_std": 0.20688427984714508, "rewards/verify_math_reward/mean": 0.4799107015132904, "rewards/verify_math_reward/std": 0.4998752772808075, "step": 975 }, { "clip_ratio/high_max": 0.0015988921968528302, "clip_ratio/high_mean": 0.0004650440851037274, "clip_ratio/low_mean": 0.0003426979819778353, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.000807742075267015, "completions/clipped_ratio": 0.0189732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3360.0, "completions/mean_length": 606.1015625, "completions/mean_terminated_length": 538.6063842773438, "completions/min_length": 174.0, "completions/min_terminated_length": 174.0, "epoch": 9.121318168562263, "grad_norm": 0.1376953125, "learning_rate": 1e-06, "loss": 0.0065, "num_tokens": 567906192.0, "reward": 0.512276828289032, "reward_std": 0.22500188648700714, "rewards/verify_math_reward/mean": 0.5122767686843872, "rewards/verify_math_reward/std": 0.500128448009491, "step": 976 }, { "clip_ratio/high_max": 0.0014862458629067987, "clip_ratio/high_mean": 0.000497794334705759, "clip_ratio/low_mean": 0.00037054970061944914, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008683440528329811, "completions/clipped_ratio": 0.0200892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3323.0, "completions/mean_length": 677.53125, "completions/mean_terminated_length": 607.4487915039062, "completions/min_length": 163.0, "completions/min_terminated_length": 163.0, "epoch": 9.130650335374744, "grad_norm": 0.12109375, "learning_rate": 1e-06, "loss": 0.0028, "num_tokens": 568532484.0, "reward": 0.5022321939468384, "reward_std": 0.2364223450422287, "rewards/verify_math_reward/mean": 0.5022321343421936, "rewards/verify_math_reward/std": 0.5002742409706116, "step": 977 }, { "clip_ratio/high_max": 0.001443858863240166, "clip_ratio/high_mean": 0.0003764739296912012, "clip_ratio/low_mean": 0.00032745573957981833, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007039296669972828, "completions/clipped_ratio": 0.022321428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 4004.0, "completions/mean_length": 644.1439819335938, "completions/mean_terminated_length": 565.33447265625, "completions/min_length": 58.0, "completions/min_terminated_length": 58.0, "epoch": 9.139982502187227, "grad_norm": 0.130859375, "learning_rate": 1e-06, "loss": -0.0003, "num_tokens": 569116485.0, "reward": 0.5479910969734192, "reward_std": 0.1867859810590744, "rewards/verify_math_reward/mean": 0.5479910969734192, "rewards/verify_math_reward/std": 0.49796950817108154, "step": 978 }, { "clip_ratio/high_max": 0.0018931880340460339, "clip_ratio/high_mean": 0.0005805788216548535, "clip_ratio/low_mean": 0.0003178017252594145, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008983805510069942, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 3165.0, "completions/mean_length": 609.4699096679688, "completions/mean_terminated_length": 566.1344604492188, "completions/min_length": 75.0, "completions/min_terminated_length": 75.0, "epoch": 9.149314668999708, "grad_norm": 0.138671875, "learning_rate": 1e-06, "loss": 0.0135, "num_tokens": 569712306.0, "reward": 0.5814732313156128, "reward_std": 0.2284930944442749, "rewards/verify_math_reward/mean": 0.5814732313156128, "rewards/verify_math_reward/std": 0.4935929775238037, "step": 979 }, { "clip_ratio/high_max": 0.0016218523360294057, "clip_ratio/high_mean": 0.0005201477135869936, "clip_ratio/low_mean": 0.0003666365460048837, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008867842439030937, "completions/clipped_ratio": 0.030133928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3315.0, "completions/mean_length": 687.2779541015625, "completions/mean_terminated_length": 581.3682250976562, "completions/min_length": 57.0, "completions/min_terminated_length": 57.0, "epoch": 9.15864683581219, "grad_norm": 0.134765625, "learning_rate": 1e-06, "loss": 0.0009, "num_tokens": 570308715.0, "reward": 0.527901828289032, "reward_std": 0.2483748346567154, "rewards/verify_math_reward/mean": 0.5279017686843872, "rewards/verify_math_reward/std": 0.49949970841407776, "step": 980 }, { "clip_ratio/high_max": 0.0017747420506566414, "clip_ratio/high_mean": 0.0005133899981046852, "clip_ratio/low_mean": 0.00028014496490413876, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007935349694889737, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3589.0, "completions/mean_length": 616.3694458007812, "completions/mean_terminated_length": 553.1033935546875, "completions/min_length": 88.0, "completions/min_terminated_length": 88.0, "epoch": 9.167979002624673, "grad_norm": 0.125, "learning_rate": 1e-06, "loss": -0.0007, "num_tokens": 570892870.0, "reward": 0.5345982313156128, "reward_std": 0.22300449013710022, "rewards/verify_math_reward/mean": 0.5345982313156128, "rewards/verify_math_reward/std": 0.4990801215171814, "step": 981 }, { "clip_ratio/high_max": 0.0013452855027935584, "clip_ratio/high_mean": 0.00033923454259365826, "clip_ratio/low_mean": 0.00032456524900226214, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0006637997832967812, "completions/clipped_ratio": 0.010044642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 2109.0, "completions/mean_length": 576.099365234375, "completions/mean_terminated_length": 540.3843994140625, "completions/min_length": 3.0, "completions/min_terminated_length": 3.0, "epoch": 9.177311169437154, "grad_norm": 0.12158203125, "learning_rate": 1e-06, "loss": 0.0039, "num_tokens": 571451647.0, "reward": 0.5524553656578064, "reward_std": 0.1696561723947525, "rewards/verify_math_reward/mean": 0.5524553656578064, "rewards/verify_math_reward/std": 0.49751853942871094, "step": 982 }, { "clip_ratio/high_max": 0.0014058054948691279, "clip_ratio/high_mean": 0.0004449033821174453, "clip_ratio/low_mean": 0.0002943426667343374, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007392460504433984, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 3615.0, "completions/mean_length": 581.9397583007812, "completions/mean_terminated_length": 542.2776489257812, "completions/min_length": 63.0, "completions/min_terminated_length": 63.0, "epoch": 9.186643336249636, "grad_norm": 0.130859375, "learning_rate": 1e-06, "loss": -0.0, "num_tokens": 572017457.0, "reward": 0.6082589626312256, "reward_std": 0.21616928279399872, "rewards/verify_math_reward/mean": 0.6082589030265808, "rewards/verify_math_reward/std": 0.48841196298599243, "step": 983 }, { "clip_ratio/high_max": 0.0014298116630016011, "clip_ratio/high_mean": 0.0003933404473173141, "clip_ratio/low_mean": 0.0003025793921551667, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0006959198444747017, "completions/clipped_ratio": 0.0200892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 2820.0, "completions/mean_length": 647.0424194335938, "completions/mean_terminated_length": 576.3348999023438, "completions/min_length": 131.0, "completions/min_terminated_length": 131.0, "epoch": 9.195975503062117, "grad_norm": 0.1181640625, "learning_rate": 1e-06, "loss": 0.0057, "num_tokens": 572613231.0, "reward": 0.53125, "reward_std": 0.20080114901065826, "rewards/verify_math_reward/mean": 0.53125, "rewards/verify_math_reward/std": 0.4993011951446533, "step": 984 }, { "clip_ratio/high_max": 0.0016818567855807487, "clip_ratio/high_mean": 0.0005095037383853196, "clip_ratio/low_mean": 0.0002962551257041923, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008057588711380959, "completions/clipped_ratio": 0.008928571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3771.0, "completions/mean_length": 558.9553833007812, "completions/mean_terminated_length": 527.090087890625, "completions/min_length": 101.0, "completions/min_terminated_length": 101.0, "epoch": 9.2053076698746, "grad_norm": 0.1259765625, "learning_rate": 1e-06, "loss": -0.0062, "num_tokens": 573152959.0, "reward": 0.6305803656578064, "reward_std": 0.1949855387210846, "rewards/verify_math_reward/mean": 0.6305803656578064, "rewards/verify_math_reward/std": 0.4829172194004059, "step": 985 }, { "clip_ratio/high_max": 0.0018361453376201098, "clip_ratio/high_mean": 0.0005749121432927495, "clip_ratio/low_mean": 0.00031705505421086855, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008919671927287709, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 4069.0, "completions/mean_length": 576.7879638671875, "completions/mean_terminated_length": 533.0463256835938, "completions/min_length": 108.0, "completions/min_terminated_length": 108.0, "epoch": 9.21463983668708, "grad_norm": 0.146484375, "learning_rate": 1e-06, "loss": 0.0057, "num_tokens": 573710289.0, "reward": 0.6328125, "reward_std": 0.2312796264886856, "rewards/verify_math_reward/mean": 0.6328125, "rewards/verify_math_reward/std": 0.48230743408203125, "step": 986 }, { "clip_ratio/high_max": 0.001607736267033033, "clip_ratio/high_mean": 0.0004533880940016388, "clip_ratio/low_mean": 0.00042847167742365855, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008818597752906498, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3511.0, "completions/mean_length": 659.5770263671875, "completions/mean_terminated_length": 597.0965576171875, "completions/min_length": 63.0, "completions/min_terminated_length": 63.0, "epoch": 9.223972003499563, "grad_norm": 0.125, "learning_rate": 1e-06, "loss": 0.003, "num_tokens": 574335190.0, "reward": 0.5078125, "reward_std": 0.20651407539844513, "rewards/verify_math_reward/mean": 0.5078125, "rewards/verify_math_reward/std": 0.5002182126045227, "step": 987 }, { "clip_ratio/high_max": 0.0017732976848492399, "clip_ratio/high_mean": 0.0005352601366439558, "clip_ratio/low_mean": 0.0003872381439578021, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009224982841260498, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3208.0, "completions/mean_length": 671.2667846679688, "completions/mean_terminated_length": 620.845947265625, "completions/min_length": 11.0, "completions/min_terminated_length": 11.0, "epoch": 9.233304170312044, "grad_norm": 0.12890625, "learning_rate": 1e-06, "loss": -0.0016, "num_tokens": 574983277.0, "reward": 0.4988839626312256, "reward_std": 0.23293182253837585, "rewards/verify_math_reward/mean": 0.4988839328289032, "rewards/verify_math_reward/std": 0.5002779960632324, "step": 988 }, { "clip_ratio/high_max": 0.0017576297595951473, "clip_ratio/high_mean": 0.0005030810800690233, "clip_ratio/low_mean": 0.0003936687865007116, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008967498679339769, "completions/clipped_ratio": 0.010044642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3161.0, "completions/mean_length": 573.9631958007812, "completions/mean_terminated_length": 538.2265625, "completions/min_length": 130.0, "completions/min_terminated_length": 130.0, "epoch": 9.242636337124527, "grad_norm": 0.138671875, "learning_rate": 1e-06, "loss": 0.0188, "num_tokens": 575544316.0, "reward": 0.5848214626312256, "reward_std": 0.23266012966632843, "rewards/verify_math_reward/mean": 0.5848214030265808, "rewards/verify_math_reward/std": 0.49302801489830017, "step": 989 }, { "clip_ratio/high_max": 0.0013710622233702452, "clip_ratio/high_mean": 0.0003740097836271161, "clip_ratio/low_mean": 0.00029554720356372854, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0006695569945804891, "completions/clipped_ratio": 0.0189732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3655.0, "completions/mean_length": 615.6730346679688, "completions/mean_terminated_length": 548.3629150390625, "completions/min_length": 126.0, "completions/min_terminated_length": 126.0, "epoch": 9.251968503937007, "grad_norm": 0.1279296875, "learning_rate": 1e-06, "loss": -0.0064, "num_tokens": 576123711.0, "reward": 0.5223214626312256, "reward_std": 0.20008648931980133, "rewards/verify_math_reward/mean": 0.5223214030265808, "rewards/verify_math_reward/std": 0.49978047609329224, "step": 990 }, { "clip_ratio/high_max": 0.0016154453733179253, "clip_ratio/high_mean": 0.00043451850569908856, "clip_ratio/low_mean": 0.0003143019927165369, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007488204887522443, "completions/clipped_ratio": 0.006696428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3042.0, "completions/mean_length": 565.6875, "completions/mean_terminated_length": 541.8876342773438, "completions/min_length": 129.0, "completions/min_terminated_length": 129.0, "epoch": 9.26130067074949, "grad_norm": 0.12158203125, "learning_rate": 1e-06, "loss": 0.0046, "num_tokens": 576708119.0, "reward": 0.5066964626312256, "reward_std": 0.16522064805030823, "rewards/verify_math_reward/mean": 0.5066964030265808, "rewards/verify_math_reward/std": 0.5002344250679016, "step": 991 }, { "clip_ratio/high_max": 0.0015530745004070923, "clip_ratio/high_mean": 0.00041285441466243356, "clip_ratio/low_mean": 0.00035634881396617857, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007692032140766969, "completions/clipped_ratio": 0.0200892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3090.0, "completions/mean_length": 637.7210083007812, "completions/mean_terminated_length": 566.8223266601562, "completions/min_length": 99.0, "completions/min_terminated_length": 99.0, "epoch": 9.27063283756197, "grad_norm": 0.12353515625, "learning_rate": 1e-06, "loss": -0.0063, "num_tokens": 577292125.0, "reward": 0.5111607313156128, "reward_std": 0.19692625105381012, "rewards/verify_math_reward/mean": 0.5111607313156128, "rewards/verify_math_reward/std": 0.5001546144485474, "step": 992 }, { "clip_ratio/high_max": 0.0015815202550584218, "clip_ratio/high_mean": 0.0004305243475073439, "clip_ratio/low_mean": 0.0004106321025574289, "clip_ratio/low_min": 1.2739502381009515e-05, "clip_ratio/region_mean": 0.0008411564531343174, "completions/clipped_ratio": 0.010044642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3370.0, "completions/mean_length": 620.7913208007812, "completions/mean_terminated_length": 585.5298461914062, "completions/min_length": 149.0, "completions/min_terminated_length": 149.0, "epoch": 9.279965004374453, "grad_norm": 0.12451171875, "learning_rate": 1e-06, "loss": 0.0067, "num_tokens": 577910698.0, "reward": 0.53125, "reward_std": 0.21781939268112183, "rewards/verify_math_reward/mean": 0.53125, "rewards/verify_math_reward/std": 0.4993011951446533, "step": 993 }, { "clip_ratio/high_max": 0.0019933510266127996, "clip_ratio/high_mean": 0.0006385835677065188, "clip_ratio/low_mean": 0.00035117202401124814, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009897555983116035, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3586.0, "completions/mean_length": 632.802490234375, "completions/mean_terminated_length": 581.8153686523438, "completions/min_length": 72.0, "completions/min_terminated_length": 72.0, "epoch": 9.289297171186934, "grad_norm": 0.140625, "learning_rate": 1e-06, "loss": 0.0006, "num_tokens": 578507617.0, "reward": 0.5569196939468384, "reward_std": 0.24390017986297607, "rewards/verify_math_reward/mean": 0.5569196343421936, "rewards/verify_math_reward/std": 0.4970270097255707, "step": 994 }, { "clip_ratio/high_max": 0.0013096993534418289, "clip_ratio/high_mean": 0.00037204886712061125, "clip_ratio/low_mean": 0.00029453307570292964, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0006665819496447511, "completions/clipped_ratio": 0.0189732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3408.0, "completions/mean_length": 613.1138916015625, "completions/mean_terminated_length": 545.7542724609375, "completions/min_length": 164.0, "completions/min_terminated_length": 164.0, "epoch": 9.298629337999417, "grad_norm": 0.1201171875, "learning_rate": 1e-06, "loss": -0.0046, "num_tokens": 579079879.0, "reward": 0.5491071939468384, "reward_std": 0.18967919051647186, "rewards/verify_math_reward/mean": 0.5491071343421936, "rewards/verify_math_reward/std": 0.49786055088043213, "step": 995 }, { "clip_ratio/high_max": 0.001640536576815066, "clip_ratio/high_mean": 0.0004984663798950351, "clip_ratio/low_mean": 0.00029986659455971676, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007983329805938411, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3896.0, "completions/mean_length": 614.7377319335938, "completions/mean_terminated_length": 555.4653930664062, "completions/min_length": 109.0, "completions/min_terminated_length": 109.0, "epoch": 9.307961504811898, "grad_norm": 0.1328125, "learning_rate": 1e-06, "loss": 0.0129, "num_tokens": 579661684.0, "reward": 0.5792410969734192, "reward_std": 0.22255055606365204, "rewards/verify_math_reward/mean": 0.5792410969734192, "rewards/verify_math_reward/std": 0.49395665526390076, "step": 996 }, { "clip_ratio/high_max": 0.0014478760458587203, "clip_ratio/high_mean": 0.00043692247299986775, "clip_ratio/low_mean": 0.000346227021964296, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007831494958736585, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3933.0, "completions/mean_length": 578.7723388671875, "completions/mean_terminated_length": 531.0271606445312, "completions/min_length": 128.0, "completions/min_terminated_length": 128.0, "epoch": 9.31729367162438, "grad_norm": 0.1357421875, "learning_rate": 1e-06, "loss": 0.0077, "num_tokens": 580218440.0, "reward": 0.5602678656578064, "reward_std": 0.20595203340053558, "rewards/verify_math_reward/mean": 0.5602678656578064, "rewards/verify_math_reward/std": 0.4966317117214203, "step": 997 }, { "clip_ratio/high_max": 0.001665726963437919, "clip_ratio/high_mean": 0.0005046477675705319, "clip_ratio/low_mean": 0.00042466105219318706, "clip_ratio/low_min": 1.0045001545222476e-05, "clip_ratio/region_mean": 0.0009293088205595268, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 2393.0, "completions/mean_length": 600.796875, "completions/mean_terminated_length": 549.338623046875, "completions/min_length": 89.0, "completions/min_terminated_length": 89.0, "epoch": 9.326625838436861, "grad_norm": 0.15625, "learning_rate": 1e-06, "loss": 0.011, "num_tokens": 580781218.0, "reward": 0.5580357313156128, "reward_std": 0.2465658336877823, "rewards/verify_math_reward/mean": 0.5580357313156128, "rewards/verify_math_reward/std": 0.49689781665802, "step": 998 }, { "clip_ratio/high_max": 0.0017876784786494682, "clip_ratio/high_mean": 0.0004964058711038888, "clip_ratio/low_mean": 0.00036549975561683823, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008619056288807769, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 3417.0, "completions/mean_length": 556.0301513671875, "completions/mean_terminated_length": 512.030517578125, "completions/min_length": 122.0, "completions/min_terminated_length": 122.0, "epoch": 9.335958005249344, "grad_norm": 0.1357421875, "learning_rate": 1e-06, "loss": 0.0202, "num_tokens": 581325741.0, "reward": 0.5725446939468384, "reward_std": 0.20261241495609283, "rewards/verify_math_reward/mean": 0.5725446343421936, "rewards/verify_math_reward/std": 0.49498558044433594, "step": 999 }, { "clip_ratio/high_max": 0.0012760765566781629, "clip_ratio/high_mean": 0.00035061654477885895, "clip_ratio/low_mean": 0.0002110987237529116, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0005617152737613651, "completions/clipped_ratio": 0.010044642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3900.0, "completions/mean_length": 593.6975708007812, "completions/mean_terminated_length": 558.1611938476562, "completions/min_length": 127.0, "completions/min_terminated_length": 127.0, "epoch": 9.345290172061826, "grad_norm": 0.109375, "learning_rate": 1e-06, "loss": -0.0062, "num_tokens": 581915550.0, "reward": 0.5881696939468384, "reward_std": 0.15991567075252533, "rewards/verify_math_reward/mean": 0.5881696343421936, "rewards/verify_math_reward/std": 0.4924396276473999, "step": 1000 }, { "clip_ratio/high_max": 0.002071037813948351, "clip_ratio/high_mean": 0.0005819952150432073, "clip_ratio/low_mean": 0.0004212928470224142, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010032880572907743, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 2502.0, "completions/mean_length": 590.8973388671875, "completions/mean_terminated_length": 547.3311157226562, "completions/min_length": 165.0, "completions/min_terminated_length": 165.0, "epoch": 9.354622338874307, "grad_norm": 0.134765625, "learning_rate": 1e-06, "loss": 0.0129, "num_tokens": 582493786.0, "reward": 0.5491071939468384, "reward_std": 0.23364469408988953, "rewards/verify_math_reward/mean": 0.5491071343421936, "rewards/verify_math_reward/std": 0.49786055088043213, "step": 1001 }, { "clip_ratio/high_max": 0.0016897258901735768, "clip_ratio/high_mean": 0.0005495733062161889, "clip_ratio/low_mean": 0.0002537046361794637, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008032779414861579, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 3972.0, "completions/mean_length": 569.5803833007812, "completions/mean_terminated_length": 525.7491455078125, "completions/min_length": 3.0, "completions/min_terminated_length": 3.0, "epoch": 9.36395450568679, "grad_norm": 0.142578125, "learning_rate": 1e-06, "loss": 0.0109, "num_tokens": 583041778.0, "reward": 0.5625, "reward_std": 0.19772180914878845, "rewards/verify_math_reward/mean": 0.5625, "rewards/verify_math_reward/std": 0.49635544419288635, "step": 1002 }, { "clip_ratio/high_max": 0.0016611479131825035, "clip_ratio/high_mean": 0.00045596009431392304, "clip_ratio/low_mean": 0.0002882320144408368, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007441921043209732, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 2643.0, "completions/mean_length": 603.8192138671875, "completions/mean_terminated_length": 556.4140625, "completions/min_length": 145.0, "completions/min_terminated_length": 145.0, "epoch": 9.37328667249927, "grad_norm": 0.11572265625, "learning_rate": 1e-06, "loss": 0.0105, "num_tokens": 583621336.0, "reward": 0.5290178656578064, "reward_std": 0.18088369071483612, "rewards/verify_math_reward/mean": 0.5290178656578064, "rewards/verify_math_reward/std": 0.49943605065345764, "step": 1003 }, { "clip_ratio/high_max": 0.0015815586339158472, "clip_ratio/high_mean": 0.0005218012724981236, "clip_ratio/low_mean": 0.000298866166190237, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008206674433495209, "completions/clipped_ratio": 0.0200892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 2059.0, "completions/mean_length": 621.4888916015625, "completions/mean_terminated_length": 550.2574462890625, "completions/min_length": 110.0, "completions/min_terminated_length": 110.0, "epoch": 9.382618839311753, "grad_norm": 0.1220703125, "learning_rate": 1e-06, "loss": -0.0031, "num_tokens": 584195918.0, "reward": 0.535714328289032, "reward_std": 0.21091002225875854, "rewards/verify_math_reward/mean": 0.5357142686843872, "rewards/verify_math_reward/std": 0.4990014135837555, "step": 1004 }, { "clip_ratio/high_max": 0.0017101928788179066, "clip_ratio/high_mean": 0.000560220372335607, "clip_ratio/low_mean": 0.00034818381993773073, "clip_ratio/low_min": 1.051834442478139e-05, "clip_ratio/region_mean": 0.0009084041876121773, "completions/clipped_ratio": 0.008928571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3058.0, "completions/mean_length": 612.5167846679688, "completions/mean_terminated_length": 581.134033203125, "completions/min_length": 91.0, "completions/min_terminated_length": 91.0, "epoch": 9.391951006124234, "grad_norm": 0.1357421875, "learning_rate": 1e-06, "loss": 0.0031, "num_tokens": 584796997.0, "reward": 0.5602678656578064, "reward_std": 0.2270306795835495, "rewards/verify_math_reward/mean": 0.5602678656578064, "rewards/verify_math_reward/std": 0.4966317415237427, "step": 1005 }, { "clip_ratio/high_max": 0.0014503122120004264, "clip_ratio/high_mean": 0.0004433378998101034, "clip_ratio/low_mean": 0.00033345918097893446, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007767970964778215, "completions/clipped_ratio": 0.022321428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3953.0, "completions/mean_length": 665.9788208007812, "completions/mean_terminated_length": 587.6677856445312, "completions/min_length": 26.0, "completions/min_terminated_length": 26.0, "epoch": 9.401283172936717, "grad_norm": 0.1259765625, "learning_rate": 1e-06, "loss": 0.0133, "num_tokens": 585400402.0, "reward": 0.5089285969734192, "reward_std": 0.19497595727443695, "rewards/verify_math_reward/mean": 0.5089285969734192, "rewards/verify_math_reward/std": 0.5001994967460632, "step": 1006 }, { "clip_ratio/high_max": 0.0016050372869358398, "clip_ratio/high_mean": 0.0005562499048892278, "clip_ratio/low_mean": 0.00028444551423945086, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008406954211750417, "completions/clipped_ratio": 0.0189732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 4016.0, "completions/mean_length": 615.7366333007812, "completions/mean_terminated_length": 548.427734375, "completions/min_length": 115.0, "completions/min_terminated_length": 115.0, "epoch": 9.410615339749198, "grad_norm": 0.12158203125, "learning_rate": 1e-06, "loss": 0.0003, "num_tokens": 585970918.0, "reward": 0.59375, "reward_std": 0.20252613723278046, "rewards/verify_math_reward/mean": 0.59375, "rewards/verify_math_reward/std": 0.4914066195487976, "step": 1007 }, { "clip_ratio/high_max": 0.0015311507650039857, "clip_ratio/high_mean": 0.00044015199955538264, "clip_ratio/low_mean": 0.0003749093129954417, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008150613184625399, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3824.0, "completions/mean_length": 618.2254638671875, "completions/mean_terminated_length": 571.015869140625, "completions/min_length": 119.0, "completions/min_terminated_length": 119.0, "epoch": 9.41994750656168, "grad_norm": 0.1328125, "learning_rate": 1e-06, "loss": 0.0057, "num_tokens": 586564280.0, "reward": 0.5212053656578064, "reward_std": 0.22582675516605377, "rewards/verify_math_reward/mean": 0.5212053656578064, "rewards/verify_math_reward/std": 0.49982914328575134, "step": 1008 }, { "clip_ratio/high_max": 0.0012292135716052144, "clip_ratio/high_mean": 0.0003560837087661639, "clip_ratio/low_mean": 0.0003563880686670018, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007124717767510447, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 2959.0, "completions/mean_length": 617.5100708007812, "completions/mean_terminated_length": 574.2745971679688, "completions/min_length": 11.0, "completions/min_terminated_length": 11.0, "epoch": 9.429279673374161, "grad_norm": 0.119140625, "learning_rate": 1e-06, "loss": 0.009, "num_tokens": 587160345.0, "reward": 0.535714328289032, "reward_std": 0.19910797476768494, "rewards/verify_math_reward/mean": 0.5357142686843872, "rewards/verify_math_reward/std": 0.4990014135837555, "step": 1009 }, { "clip_ratio/high_max": 0.001768231273672427, "clip_ratio/high_mean": 0.0004981605184184446, "clip_ratio/low_mean": 0.0003863488836941542, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008845094143907772, "completions/clipped_ratio": 0.0200892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 2806.0, "completions/mean_length": 619.786865234375, "completions/mean_terminated_length": 548.5205078125, "completions/min_length": 142.0, "completions/min_terminated_length": 142.0, "epoch": 9.438611840186644, "grad_norm": 0.1376953125, "learning_rate": 1e-06, "loss": 0.0026, "num_tokens": 587739226.0, "reward": 0.5334821939468384, "reward_std": 0.21112899482250214, "rewards/verify_math_reward/mean": 0.5334821343421936, "rewards/verify_math_reward/std": 0.49915632605552673, "step": 1010 }, { "clip_ratio/high_max": 0.0019684721610246925, "clip_ratio/high_mean": 0.0006615833231080614, "clip_ratio/low_mean": 0.0003499977823366862, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010115811091964133, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3607.0, "completions/mean_length": 601.5714721679688, "completions/mean_terminated_length": 554.1357421875, "completions/min_length": 146.0, "completions/min_terminated_length": 146.0, "epoch": 9.447944006999125, "grad_norm": 0.1416015625, "learning_rate": 1e-06, "loss": 0.0131, "num_tokens": 588319570.0, "reward": 0.6049107313156128, "reward_std": 0.2502896189689636, "rewards/verify_math_reward/mean": 0.6049107313156128, "rewards/verify_math_reward/std": 0.48914292454719543, "step": 1011 }, { "clip_ratio/high_max": 0.0015889170699665556, "clip_ratio/high_mean": 0.0004813561185983417, "clip_ratio/low_mean": 0.00032364998958200886, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.000805006105110806, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 2658.0, "completions/mean_length": 583.544677734375, "completions/mean_terminated_length": 523.7412109375, "completions/min_length": 105.0, "completions/min_terminated_length": 105.0, "epoch": 9.457276173811607, "grad_norm": 0.1416015625, "learning_rate": 1e-06, "loss": 0.0018, "num_tokens": 588870722.0, "reward": 0.590401828289032, "reward_std": 0.2043694704771042, "rewards/verify_math_reward/mean": 0.5904017686843872, "rewards/verify_math_reward/std": 0.49203425645828247, "step": 1012 }, { "clip_ratio/high_max": 0.0014783477381570265, "clip_ratio/high_mean": 0.0003990056422935595, "clip_ratio/low_mean": 0.0003467415241402705, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007457471647285274, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 3370.0, "completions/mean_length": 590.3527221679688, "completions/mean_terminated_length": 546.7796630859375, "completions/min_length": 134.0, "completions/min_terminated_length": 134.0, "epoch": 9.466608340624088, "grad_norm": 0.125, "learning_rate": 1e-06, "loss": 0.0047, "num_tokens": 589461462.0, "reward": 0.494419664144516, "reward_std": 0.19952164590358734, "rewards/verify_math_reward/mean": 0.4944196343421936, "rewards/verify_math_reward/std": 0.5002480745315552, "step": 1013 }, { "clip_ratio/high_max": 0.0018437276394251967, "clip_ratio/high_mean": 0.0005355752382456558, "clip_ratio/low_mean": 0.00026403180913803226, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.000799607054432272, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 3397.0, "completions/mean_length": 659.4989013671875, "completions/mean_terminated_length": 620.7122192382812, "completions/min_length": 77.0, "completions/min_terminated_length": 77.0, "epoch": 9.47594050743657, "grad_norm": 0.1181640625, "learning_rate": 1e-06, "loss": -0.0038, "num_tokens": 590094893.0, "reward": 0.5368303656578064, "reward_std": 0.2111382633447647, "rewards/verify_math_reward/mean": 0.5368303656578064, "rewards/verify_math_reward/std": 0.49892017245292664, "step": 1014 }, { "clip_ratio/high_max": 0.001442779456738208, "clip_ratio/high_mean": 0.000435021896237231, "clip_ratio/low_mean": 0.00024378611402653405, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0006788080145270214, "completions/clipped_ratio": 0.0234375, "completions/max_length": 4096.0, "completions/max_terminated_length": 3704.0, "completions/mean_length": 697.185302734375, "completions/mean_terminated_length": 615.6137084960938, "completions/min_length": 165.0, "completions/min_terminated_length": 165.0, "epoch": 9.485272674249051, "grad_norm": 0.1123046875, "learning_rate": 1e-06, "loss": -0.0019, "num_tokens": 590723643.0, "reward": 0.4441964626312256, "reward_std": 0.19358617067337036, "rewards/verify_math_reward/mean": 0.4441964328289032, "rewards/verify_math_reward/std": 0.49715369939804077, "step": 1015 }, { "clip_ratio/high_max": 0.0017190755297633586, "clip_ratio/high_mean": 0.0005332826183348516, "clip_ratio/low_mean": 0.00030512571436247526, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008384083257624297, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 4025.0, "completions/mean_length": 586.8616333007812, "completions/mean_terminated_length": 527.1146850585938, "completions/min_length": 102.0, "completions/min_terminated_length": 102.0, "epoch": 9.494604841061534, "grad_norm": 0.140625, "learning_rate": 1e-06, "loss": 0.0062, "num_tokens": 591268103.0, "reward": 0.5870535969734192, "reward_std": 0.21298159658908844, "rewards/verify_math_reward/mean": 0.5870535969734192, "rewards/verify_math_reward/std": 0.49263834953308105, "step": 1016 }, { "clip_ratio/high_max": 0.0015440364350070013, "clip_ratio/high_mean": 0.0004342544613109567, "clip_ratio/low_mean": 0.0004449193831987941, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008791738518993952, "completions/clipped_ratio": 0.0189732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 2152.0, "completions/mean_length": 621.7154541015625, "completions/mean_terminated_length": 554.5221557617188, "completions/min_length": 100.0, "completions/min_terminated_length": 100.0, "epoch": 9.503937007874015, "grad_norm": 0.130859375, "learning_rate": 1e-06, "loss": 0.0058, "num_tokens": 591837840.0, "reward": 0.5848214626312256, "reward_std": 0.2205638438463211, "rewards/verify_math_reward/mean": 0.5848214030265808, "rewards/verify_math_reward/std": 0.49302801489830017, "step": 1017 }, { "clip_ratio/high_max": 0.0016775978756413679, "clip_ratio/high_mean": 0.0005225780655564449, "clip_ratio/low_mean": 0.0004171479017713864, "clip_ratio/low_min": 1.0109996765095275e-05, "clip_ratio/region_mean": 0.0009397259718753048, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 3891.0, "completions/mean_length": 612.9486694335938, "completions/mean_terminated_length": 569.656494140625, "completions/min_length": 128.0, "completions/min_terminated_length": 128.0, "epoch": 9.513269174686497, "grad_norm": 0.1357421875, "learning_rate": 1e-06, "loss": 0.0002, "num_tokens": 592435378.0, "reward": 0.5524553656578064, "reward_std": 0.2252180278301239, "rewards/verify_math_reward/mean": 0.5524553656578064, "rewards/verify_math_reward/std": 0.49751853942871094, "step": 1018 }, { "clip_ratio/high_max": 0.0016598425918346038, "clip_ratio/high_mean": 0.0005649558779623476, "clip_ratio/low_mean": 0.0004223565244956262, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009873123999568634, "completions/clipped_ratio": 0.010044642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 4054.0, "completions/mean_length": 599.8627319335938, "completions/mean_terminated_length": 564.388916015625, "completions/min_length": 105.0, "completions/min_terminated_length": 105.0, "epoch": 9.52260134149898, "grad_norm": 0.13671875, "learning_rate": 1e-06, "loss": 0.0113, "num_tokens": 593017191.0, "reward": 0.6037946939468384, "reward_std": 0.24960379302501678, "rewards/verify_math_reward/mean": 0.6037946343421936, "rewards/verify_math_reward/std": 0.48938122391700745, "step": 1019 }, { "clip_ratio/high_max": 0.0014643544318460044, "clip_ratio/high_mean": 0.0003963733986438456, "clip_ratio/low_mean": 0.0004394810962367046, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.00083585449920065, "completions/clipped_ratio": 0.0200892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 2703.0, "completions/mean_length": 644.1920166015625, "completions/mean_terminated_length": 573.4259643554688, "completions/min_length": 96.0, "completions/min_terminated_length": 96.0, "epoch": 9.531933508311461, "grad_norm": 0.12255859375, "learning_rate": 1e-06, "loss": 0.0033, "num_tokens": 593605507.0, "reward": 0.5033482313156128, "reward_std": 0.21613861620426178, "rewards/verify_math_reward/mean": 0.5033482313156128, "rewards/verify_math_reward/std": 0.5002680420875549, "step": 1020 }, { "clip_ratio/high_max": 0.0015964167314450606, "clip_ratio/high_mean": 0.00046870355231476424, "clip_ratio/low_mean": 0.0003091682157219111, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007778717708788463, "completions/clipped_ratio": 0.0189732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3500.0, "completions/mean_length": 618.8861694335938, "completions/mean_terminated_length": 551.63818359375, "completions/min_length": 133.0, "completions/min_terminated_length": 133.0, "epoch": 9.541265675123944, "grad_norm": 0.1337890625, "learning_rate": 1e-06, "loss": -0.0046, "num_tokens": 594178189.0, "reward": 0.5848214626312256, "reward_std": 0.19561536610126495, "rewards/verify_math_reward/mean": 0.5848214030265808, "rewards/verify_math_reward/std": 0.49302801489830017, "step": 1021 }, { "clip_ratio/high_max": 0.001627592717341031, "clip_ratio/high_mean": 0.00047343376854769303, "clip_ratio/low_mean": 0.00032549734771691874, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007989311216078931, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 2881.0, "completions/mean_length": 618.0357666015625, "completions/mean_terminated_length": 558.819580078125, "completions/min_length": 44.0, "completions/min_terminated_length": 44.0, "epoch": 9.550597841936424, "grad_norm": 0.1240234375, "learning_rate": 1e-06, "loss": 0.0076, "num_tokens": 594758661.0, "reward": 0.582589328289032, "reward_std": 0.20569244027137756, "rewards/verify_math_reward/mean": 0.5825892686843872, "rewards/verify_math_reward/std": 0.493407279253006, "step": 1022 }, { "clip_ratio/high_max": 0.001609678109161905, "clip_ratio/high_mean": 0.00046538331412193656, "clip_ratio/low_mean": 0.000392561342209774, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.000857944663039234, "completions/clipped_ratio": 0.0200892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3430.0, "completions/mean_length": 681.044677734375, "completions/mean_terminated_length": 611.0341796875, "completions/min_length": 96.0, "completions/min_terminated_length": 96.0, "epoch": 9.559930008748907, "grad_norm": 0.1201171875, "learning_rate": 1e-06, "loss": -0.0044, "num_tokens": 595396293.0, "reward": 0.470982164144516, "reward_std": 0.23150008916854858, "rewards/verify_math_reward/mean": 0.4709821343421936, "rewards/verify_math_reward/std": 0.49943602085113525, "step": 1023 }, { "clip_ratio/high_max": 0.0017634148316574283, "clip_ratio/high_mean": 0.0005043214844135946, "clip_ratio/low_mean": 0.0004319453153129871, "clip_ratio/low_min": 2.605532336019678e-05, "clip_ratio/region_mean": 0.0009362667951791082, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 3143.0, "completions/mean_length": 593.3660888671875, "completions/mean_terminated_length": 549.8305053710938, "completions/min_length": 127.0, "completions/min_terminated_length": 127.0, "epoch": 9.569262175561388, "grad_norm": 0.13671875, "learning_rate": 1e-06, "loss": 0.0027, "num_tokens": 595982261.0, "reward": 0.5691964626312256, "reward_std": 0.22330942749977112, "rewards/verify_math_reward/mean": 0.5691964030265808, "rewards/verify_math_reward/std": 0.4954652488231659, "step": 1024 }, { "clip_ratio/high_max": 0.0016593620530329645, "clip_ratio/high_mean": 0.0005050667223258642, "clip_ratio/low_mean": 0.00026910394012702454, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007741706713204621, "completions/clipped_ratio": 0.0200892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3742.0, "completions/mean_length": 656.372802734375, "completions/mean_terminated_length": 585.8565063476562, "completions/min_length": 126.0, "completions/min_terminated_length": 126.0, "epoch": 9.57859434237387, "grad_norm": 0.125, "learning_rate": 1e-06, "loss": -0.0099, "num_tokens": 596579747.0, "reward": 0.582589328289032, "reward_std": 0.2100098878145218, "rewards/verify_math_reward/mean": 0.5825892686843872, "rewards/verify_math_reward/std": 0.493407279253006, "step": 1025 }, { "clip_ratio/high_max": 0.001635274834370648, "clip_ratio/high_mean": 0.00047569862294949417, "clip_ratio/low_mean": 0.00030445646814314387, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007801551028023823, "completions/clipped_ratio": 0.0189732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 2578.0, "completions/mean_length": 583.4810791015625, "completions/mean_terminated_length": 515.54833984375, "completions/min_length": 143.0, "completions/min_terminated_length": 143.0, "epoch": 9.587926509186351, "grad_norm": 0.1298828125, "learning_rate": 1e-06, "loss": -0.0044, "num_tokens": 597130194.0, "reward": 0.6026785969734192, "reward_std": 0.1856241375207901, "rewards/verify_math_reward/mean": 0.6026785969734192, "rewards/verify_math_reward/std": 0.48961687088012695, "step": 1026 }, { "clip_ratio/high_max": 0.001661720155425428, "clip_ratio/high_mean": 0.0004896392042610387, "clip_ratio/low_mean": 0.0002725053276435574, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007621445315635356, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 2047.0, "completions/mean_length": 605.661865234375, "completions/mean_terminated_length": 542.2011108398438, "completions/min_length": 139.0, "completions/min_terminated_length": 139.0, "epoch": 9.597258675998834, "grad_norm": 0.1171875, "learning_rate": 1e-06, "loss": -0.0145, "num_tokens": 597693315.0, "reward": 0.6116071939468384, "reward_std": 0.17870448529720306, "rewards/verify_math_reward/mean": 0.6116071343421936, "rewards/verify_math_reward/std": 0.48765692114830017, "step": 1027 }, { "clip_ratio/high_max": 0.0012194090950288228, "clip_ratio/high_mean": 0.0004011477055883006, "clip_ratio/low_mean": 0.00036200342162828747, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007631511352883535, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 2730.0, "completions/mean_length": 650.7042846679688, "completions/mean_terminated_length": 611.8182983398438, "completions/min_length": 56.0, "completions/min_terminated_length": 56.0, "epoch": 9.606590842811315, "grad_norm": 0.1162109375, "learning_rate": 1e-06, "loss": 0.0009, "num_tokens": 598329818.0, "reward": 0.5055803656578064, "reward_std": 0.17930999398231506, "rewards/verify_math_reward/mean": 0.5055803656578064, "rewards/verify_math_reward/std": 0.5002480745315552, "step": 1028 }, { "clip_ratio/high_max": 0.0018090404009853955, "clip_ratio/high_mean": 0.0005827510431117844, "clip_ratio/low_mean": 0.00031681506106906454, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008995661037261016, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3716.0, "completions/mean_length": 585.2578125, "completions/mean_terminated_length": 529.53173828125, "completions/min_length": 128.0, "completions/min_terminated_length": 128.0, "epoch": 9.615923009623797, "grad_norm": 0.134765625, "learning_rate": 1e-06, "loss": 0.0047, "num_tokens": 598891633.0, "reward": 0.5558035969734192, "reward_std": 0.21447792649269104, "rewards/verify_math_reward/mean": 0.5558035969734192, "rewards/verify_math_reward/std": 0.49715369939804077, "step": 1029 }, { "clip_ratio/high_max": 0.0016982593861030182, "clip_ratio/high_mean": 0.0005398427610998624, "clip_ratio/low_mean": 0.0002807480625506287, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.000820590821604128, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3498.0, "completions/mean_length": 582.9006958007812, "completions/mean_terminated_length": 523.0863037109375, "completions/min_length": 133.0, "completions/min_terminated_length": 133.0, "epoch": 9.625255176436278, "grad_norm": 0.1279296875, "learning_rate": 1e-06, "loss": 0.0013, "num_tokens": 599437824.0, "reward": 0.6238839626312256, "reward_std": 0.194227397441864, "rewards/verify_math_reward/mean": 0.6238839030265808, "rewards/verify_math_reward/std": 0.48468026518821716, "step": 1030 }, { "clip_ratio/high_max": 0.0013905311316193547, "clip_ratio/high_mean": 0.00043191946792831004, "clip_ratio/low_mean": 0.000325700002576923, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007576194821012905, "completions/clipped_ratio": 0.0189732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 2730.0, "completions/mean_length": 630.625, "completions/mean_terminated_length": 563.6040649414062, "completions/min_length": 115.0, "completions/min_terminated_length": 115.0, "epoch": 9.63458734324876, "grad_norm": 0.12255859375, "learning_rate": 1e-06, "loss": 0.0026, "num_tokens": 600022896.0, "reward": 0.5267857313156128, "reward_std": 0.18599504232406616, "rewards/verify_math_reward/mean": 0.5267857313156128, "rewards/verify_math_reward/std": 0.4995608627796173, "step": 1031 }, { "clip_ratio/high_max": 0.001653968316531973, "clip_ratio/high_mean": 0.0005212303149164654, "clip_ratio/low_mean": 0.0003065186374442419, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008277489523607073, "completions/clipped_ratio": 0.008928571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3873.0, "completions/mean_length": 607.5636596679688, "completions/mean_terminated_length": 576.1362915039062, "completions/min_length": 5.0, "completions/min_terminated_length": 5.0, "epoch": 9.643919510061242, "grad_norm": 0.1298828125, "learning_rate": 1e-06, "loss": 0.0113, "num_tokens": 600628745.0, "reward": 0.5524553656578064, "reward_std": 0.21098490059375763, "rewards/verify_math_reward/mean": 0.5524553656578064, "rewards/verify_math_reward/std": 0.49751850962638855, "step": 1032 }, { "clip_ratio/high_max": 0.0017462572832300793, "clip_ratio/high_mean": 0.0005433238966361387, "clip_ratio/low_mean": 0.0003435649302900856, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008868888226061244, "completions/clipped_ratio": 0.010044642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 2331.0, "completions/mean_length": 576.5881958007812, "completions/mean_terminated_length": 540.8782348632812, "completions/min_length": 131.0, "completions/min_terminated_length": 131.0, "epoch": 9.653251676873724, "grad_norm": 0.1337890625, "learning_rate": 1e-06, "loss": -0.0004, "num_tokens": 601192448.0, "reward": 0.6071428656578064, "reward_std": 0.19945567846298218, "rewards/verify_math_reward/mean": 0.6071428656578064, "rewards/verify_math_reward/std": 0.48865827918052673, "step": 1033 }, { "clip_ratio/high_max": 0.002208570349466754, "clip_ratio/high_mean": 0.0006456002129198168, "clip_ratio/low_mean": 0.00037567671222404897, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001021276943902194, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 2319.0, "completions/mean_length": 627.2824096679688, "completions/mean_terminated_length": 576.2140502929688, "completions/min_length": 114.0, "completions/min_terminated_length": 114.0, "epoch": 9.662583843686207, "grad_norm": 0.142578125, "learning_rate": 1e-06, "loss": -0.0017, "num_tokens": 601790869.0, "reward": 0.578125, "reward_std": 0.237140953540802, "rewards/verify_math_reward/mean": 0.578125, "rewards/verify_math_reward/std": 0.4941346049308777, "step": 1034 }, { "clip_ratio/high_max": 0.0013230712147560553, "clip_ratio/high_mean": 0.0003571682145775412, "clip_ratio/low_mean": 0.0003271263526585244, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0006842945576863713, "completions/clipped_ratio": 0.005580357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3915.0, "completions/mean_length": 555.6529541015625, "completions/mean_terminated_length": 535.78564453125, "completions/min_length": 124.0, "completions/min_terminated_length": 124.0, "epoch": 9.671916010498688, "grad_norm": 0.11572265625, "learning_rate": 1e-06, "loss": 0.0106, "num_tokens": 602353366.0, "reward": 0.582589328289032, "reward_std": 0.18392983078956604, "rewards/verify_math_reward/mean": 0.5825892686843872, "rewards/verify_math_reward/std": 0.493407279253006, "step": 1035 }, { "clip_ratio/high_max": 0.0016704393565305509, "clip_ratio/high_mean": 0.0005685832229573862, "clip_ratio/low_mean": 0.00031748412220622413, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008860673483468418, "completions/clipped_ratio": 0.010044642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 2530.0, "completions/mean_length": 581.5011596679688, "completions/mean_terminated_length": 545.8410034179688, "completions/min_length": 141.0, "completions/min_terminated_length": 141.0, "epoch": 9.68124817731117, "grad_norm": 0.1435546875, "learning_rate": 1e-06, "loss": 0.0152, "num_tokens": 602926519.0, "reward": 0.5837053656578064, "reward_std": 0.22740861773490906, "rewards/verify_math_reward/mean": 0.5837053656578064, "rewards/verify_math_reward/std": 0.49321895837783813, "step": 1036 }, { "clip_ratio/high_max": 0.002119047327141743, "clip_ratio/high_mean": 0.0006555687136824417, "clip_ratio/low_mean": 0.0003680266057699555, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010235953250230523, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3265.0, "completions/mean_length": 632.6920166015625, "completions/mean_terminated_length": 569.7227172851562, "completions/min_length": 152.0, "completions/min_terminated_length": 152.0, "epoch": 9.690580344123651, "grad_norm": 0.1328125, "learning_rate": 1e-06, "loss": -0.0045, "num_tokens": 603507147.0, "reward": 0.5758928656578064, "reward_std": 0.2413061559200287, "rewards/verify_math_reward/mean": 0.5758928656578064, "rewards/verify_math_reward/std": 0.49448272585868835, "step": 1037 }, { "clip_ratio/high_max": 0.0015943269299896201, "clip_ratio/high_mean": 0.00052203596135314, "clip_ratio/low_mean": 0.000388023648156377, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009100595943891676, "completions/clipped_ratio": 0.0200892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 1912.0, "completions/mean_length": 632.4609375, "completions/mean_terminated_length": 561.4544677734375, "completions/min_length": 5.0, "completions/min_terminated_length": 5.0, "epoch": 9.699912510936134, "grad_norm": 0.134765625, "learning_rate": 1e-06, "loss": 0.0005, "num_tokens": 604083768.0, "reward": 0.543526828289032, "reward_std": 0.23830027878284454, "rewards/verify_math_reward/mean": 0.5435267686843872, "rewards/verify_math_reward/std": 0.49838000535964966, "step": 1038 }, { "clip_ratio/high_max": 0.0015890000067884102, "clip_ratio/high_mean": 0.0004911664696010121, "clip_ratio/low_mean": 0.0002455752407968248, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007367417165369261, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3680.0, "completions/mean_length": 605.0558471679688, "completions/mean_terminated_length": 545.61865234375, "completions/min_length": 112.0, "completions/min_terminated_length": 112.0, "epoch": 9.709244677748615, "grad_norm": 0.1337890625, "learning_rate": 1e-06, "loss": 0.0083, "num_tokens": 604653770.0, "reward": 0.5613839626312256, "reward_std": 0.19681817293167114, "rewards/verify_math_reward/mean": 0.5613839030265808, "rewards/verify_math_reward/std": 0.496494859457016, "step": 1039 }, { "clip_ratio/high_max": 0.0017141320331575116, "clip_ratio/high_mean": 0.0005748283646198615, "clip_ratio/low_mean": 0.0002959078447020147, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008707362076165737, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 2506.0, "completions/mean_length": 620.1663208007812, "completions/mean_terminated_length": 560.9863891601562, "completions/min_length": 144.0, "completions/min_terminated_length": 144.0, "epoch": 9.718576844561097, "grad_norm": 0.125, "learning_rate": 1e-06, "loss": -0.0123, "num_tokens": 605233159.0, "reward": 0.5424107313156128, "reward_std": 0.20621420443058014, "rewards/verify_math_reward/mean": 0.5424107313156128, "rewards/verify_math_reward/std": 0.4984763264656067, "step": 1040 }, { "clip_ratio/high_max": 0.0018559827494755154, "clip_ratio/high_mean": 0.0005467600085466984, "clip_ratio/low_mean": 0.00037965662920669274, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.00092641663923132, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 3292.0, "completions/mean_length": 539.8772583007812, "completions/mean_terminated_length": 495.6768493652344, "completions/min_length": 145.0, "completions/min_terminated_length": 145.0, "epoch": 9.727909011373578, "grad_norm": 0.1455078125, "learning_rate": 1e-06, "loss": 0.0078, "num_tokens": 605762393.0, "reward": 0.5535714626312256, "reward_std": 0.22620722651481628, "rewards/verify_math_reward/mean": 0.5535714030265808, "rewards/verify_math_reward/std": 0.4973995089530945, "step": 1041 }, { "clip_ratio/high_max": 0.0014845919586150558, "clip_ratio/high_mean": 0.0005118056769788382, "clip_ratio/low_mean": 0.0003898650722931052, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009016707490445697, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3428.0, "completions/mean_length": 602.75, "completions/mean_terminated_length": 539.236328125, "completions/min_length": 92.0, "completions/min_terminated_length": 92.0, "epoch": 9.73724117818606, "grad_norm": 0.12890625, "learning_rate": 1e-06, "loss": 0.0048, "num_tokens": 606319305.0, "reward": 0.613839328289032, "reward_std": 0.22969591617584229, "rewards/verify_math_reward/mean": 0.6138392686843872, "rewards/verify_math_reward/std": 0.48714008927345276, "step": 1042 }, { "clip_ratio/high_max": 0.0014428102840611245, "clip_ratio/high_mean": 0.0004627575365248049, "clip_ratio/low_mean": 0.00044080607949581463, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009035636321641505, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3593.0, "completions/mean_length": 592.021240234375, "completions/mean_terminated_length": 532.3621215820312, "completions/min_length": 56.0, "completions/min_terminated_length": 56.0, "epoch": 9.746573344998541, "grad_norm": 0.130859375, "learning_rate": 1e-06, "loss": 0.0086, "num_tokens": 606875244.0, "reward": 0.5502232313156128, "reward_std": 0.20434018969535828, "rewards/verify_math_reward/mean": 0.5502232313156128, "rewards/verify_math_reward/std": 0.49774909019470215, "step": 1043 }, { "clip_ratio/high_max": 0.0017396485473000212, "clip_ratio/high_mean": 0.0005491690124017623, "clip_ratio/low_mean": 0.00039158352706181176, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009407525503775105, "completions/clipped_ratio": 0.0279017857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3378.0, "completions/mean_length": 671.0748291015625, "completions/mean_terminated_length": 572.7703857421875, "completions/min_length": 110.0, "completions/min_terminated_length": 110.0, "epoch": 9.755905511811024, "grad_norm": 0.1376953125, "learning_rate": 1e-06, "loss": -0.0025, "num_tokens": 607459855.0, "reward": 0.566964328289032, "reward_std": 0.25156474113464355, "rewards/verify_math_reward/mean": 0.5669642686843872, "rewards/verify_math_reward/std": 0.49577224254608154, "step": 1044 }, { "clip_ratio/high_max": 0.0016653785951348254, "clip_ratio/high_mean": 0.0005270020892567118, "clip_ratio/low_mean": 0.0002509757468942553, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007779778234180412, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3713.0, "completions/mean_length": 579.2567138671875, "completions/mean_terminated_length": 531.5181274414062, "completions/min_length": 13.0, "completions/min_terminated_length": 13.0, "epoch": 9.765237678623505, "grad_norm": 0.12353515625, "learning_rate": 1e-06, "loss": 0.0088, "num_tokens": 608004685.0, "reward": 0.6194196939468384, "reward_std": 0.18074241280555725, "rewards/verify_math_reward/mean": 0.6194196343421936, "rewards/verify_math_reward/std": 0.48580074310302734, "step": 1045 }, { "clip_ratio/high_max": 0.0014723056883667596, "clip_ratio/high_mean": 0.00041872708015944227, "clip_ratio/low_mean": 0.00039844727859872364, "clip_ratio/low_min": 1.3481449968821835e-05, "clip_ratio/region_mean": 0.0008171743629645789, "completions/clipped_ratio": 0.0200892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3227.0, "completions/mean_length": 684.450927734375, "completions/mean_terminated_length": 614.51025390625, "completions/min_length": 140.0, "completions/min_terminated_length": 140.0, "epoch": 9.774569845435988, "grad_norm": 0.115234375, "learning_rate": 1e-06, "loss": 0.0116, "num_tokens": 608638521.0, "reward": 0.5178571939468384, "reward_std": 0.20568355917930603, "rewards/verify_math_reward/mean": 0.5178571343421936, "rewards/verify_math_reward/std": 0.4999600946903229, "step": 1046 }, { "clip_ratio/high_max": 0.001494511433520529, "clip_ratio/high_mean": 0.00046225893277096475, "clip_ratio/low_mean": 0.0003379497046580582, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.000800208629698318, "completions/clipped_ratio": 0.0200892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3895.0, "completions/mean_length": 665.216552734375, "completions/mean_terminated_length": 594.881591796875, "completions/min_length": 178.0, "completions/min_terminated_length": 178.0, "epoch": 9.783902012248468, "grad_norm": 0.1240234375, "learning_rate": 1e-06, "loss": 0.0093, "num_tokens": 609258235.0, "reward": 0.5245535969734192, "reward_std": 0.20234207808971405, "rewards/verify_math_reward/mean": 0.5245535969734192, "rewards/verify_math_reward/std": 0.4996756613254547, "step": 1047 }, { "clip_ratio/high_max": 0.0017511433034087531, "clip_ratio/high_mean": 0.00048649705445313884, "clip_ratio/low_mean": 0.0002581315509360138, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.000744628598113195, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 4000.0, "completions/mean_length": 602.1629638671875, "completions/mean_terminated_length": 558.7367553710938, "completions/min_length": 98.0, "completions/min_terminated_length": 98.0, "epoch": 9.793234179060951, "grad_norm": 0.1337890625, "learning_rate": 1e-06, "loss": 0.002, "num_tokens": 609846141.0, "reward": 0.5647321939468384, "reward_std": 0.19125469028949738, "rewards/verify_math_reward/mean": 0.5647321343421936, "rewards/verify_math_reward/std": 0.49606892466545105, "step": 1048 }, { "clip_ratio/high_max": 0.0016136951999214943, "clip_ratio/high_mean": 0.0004358586422767985, "clip_ratio/low_mean": 0.0003786776679817194, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008145363053699839, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 2354.0, "completions/mean_length": 631.857177734375, "completions/mean_terminated_length": 572.8762817382812, "completions/min_length": 129.0, "completions/min_terminated_length": 129.0, "epoch": 9.802566345873432, "grad_norm": 0.125, "learning_rate": 1e-06, "loss": 0.007, "num_tokens": 610438685.0, "reward": 0.4888392984867096, "reward_std": 0.203317791223526, "rewards/verify_math_reward/mean": 0.4888392984867096, "rewards/verify_math_reward/std": 0.5001546144485474, "step": 1049 }, { "clip_ratio/high_max": 0.0016718643801141297, "clip_ratio/high_mean": 0.0005195867747715965, "clip_ratio/low_mean": 0.0003522919473653019, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008718787289581087, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 2635.0, "completions/mean_length": 589.9342041015625, "completions/mean_terminated_length": 542.3405151367188, "completions/min_length": 18.0, "completions/min_terminated_length": 18.0, "epoch": 9.811898512685914, "grad_norm": 0.126953125, "learning_rate": 1e-06, "loss": 0.0047, "num_tokens": 611005954.0, "reward": 0.625, "reward_std": 0.21571576595306396, "rewards/verify_math_reward/mean": 0.625, "rewards/verify_math_reward/std": 0.48439329862594604, "step": 1050 }, { "clip_ratio/high_max": 0.0016855701924214372, "clip_ratio/high_mean": 0.0005488282470196282, "clip_ratio/low_mean": 0.0003581058080044386, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009069340544556326, "completions/clipped_ratio": 0.021205357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3349.0, "completions/mean_length": 633.0178833007812, "completions/mean_terminated_length": 557.9931640625, "completions/min_length": 14.0, "completions/min_terminated_length": 14.0, "epoch": 9.821230679498395, "grad_norm": 0.1337890625, "learning_rate": 1e-06, "loss": -0.0184, "num_tokens": 611575882.0, "reward": 0.5613839626312256, "reward_std": 0.2335277795791626, "rewards/verify_math_reward/mean": 0.5613839030265808, "rewards/verify_math_reward/std": 0.496494859457016, "step": 1051 }, { "clip_ratio/high_max": 0.0014498521923087537, "clip_ratio/high_mean": 0.00042809664171272743, "clip_ratio/low_mean": 0.00034096496324309555, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007690615902902209, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3234.0, "completions/mean_length": 664.0245971679688, "completions/mean_terminated_length": 609.5487670898438, "completions/min_length": 98.0, "completions/min_terminated_length": 98.0, "epoch": 9.830562846310878, "grad_norm": 0.126953125, "learning_rate": 1e-06, "loss": -0.0032, "num_tokens": 612207752.0, "reward": 0.5011160969734192, "reward_std": 0.21752727031707764, "rewards/verify_math_reward/mean": 0.5011160969734192, "rewards/verify_math_reward/std": 0.5002779960632324, "step": 1052 }, { "clip_ratio/high_max": 0.0016368181459256448, "clip_ratio/high_mean": 0.0004629036884580273, "clip_ratio/low_mean": 0.00030241855756685254, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007653222401131643, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 2825.0, "completions/mean_length": 606.2042846679688, "completions/mean_terminated_length": 554.8255615234375, "completions/min_length": 79.0, "completions/min_terminated_length": 79.0, "epoch": 9.83989501312336, "grad_norm": 0.1328125, "learning_rate": 1e-06, "loss": 0.0012, "num_tokens": 612780503.0, "reward": 0.5691964626312256, "reward_std": 0.20493286848068237, "rewards/verify_math_reward/mean": 0.5691964030265808, "rewards/verify_math_reward/std": 0.4954652786254883, "step": 1053 }, { "clip_ratio/high_max": 0.0015261678454407956, "clip_ratio/high_mean": 0.00044068275667541457, "clip_ratio/low_mean": 0.00036278312018112047, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008034658958422369, "completions/clipped_ratio": 0.0267857142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3330.0, "completions/mean_length": 647.3783569335938, "completions/mean_terminated_length": 552.462158203125, "completions/min_length": 147.0, "completions/min_terminated_length": 147.0, "epoch": 9.849227179935841, "grad_norm": 0.130859375, "learning_rate": 1e-06, "loss": -0.0112, "num_tokens": 613352306.0, "reward": 0.4921875298023224, "reward_std": 0.21673452854156494, "rewards/verify_math_reward/mean": 0.4921875, "rewards/verify_math_reward/std": 0.5002182126045227, "step": 1054 }, { "clip_ratio/high_max": 0.0015844178733459557, "clip_ratio/high_mean": 0.0004496640008255781, "clip_ratio/low_mean": 0.0002630391011280153, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007127031035452092, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3781.0, "completions/mean_length": 610.0859375, "completions/mean_terminated_length": 550.7344360351562, "completions/min_length": 118.0, "completions/min_terminated_length": 118.0, "epoch": 9.858559346748324, "grad_norm": 0.11474609375, "learning_rate": 1e-06, "loss": 0.0075, "num_tokens": 613939655.0, "reward": 0.5535714626312256, "reward_std": 0.18193750083446503, "rewards/verify_math_reward/mean": 0.5535714030265808, "rewards/verify_math_reward/std": 0.4973994493484497, "step": 1055 }, { "clip_ratio/high_max": 0.0013832466383973951, "clip_ratio/high_mean": 0.0003877047243463494, "clip_ratio/low_mean": 0.0002934871680508877, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0006811918874518597, "completions/clipped_ratio": 0.0279017857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3738.0, "completions/mean_length": 685.935302734375, "completions/mean_terminated_length": 588.057373046875, "completions/min_length": 133.0, "completions/min_terminated_length": 133.0, "epoch": 9.867891513560805, "grad_norm": 0.11474609375, "learning_rate": 1e-06, "loss": 0.0147, "num_tokens": 614546061.0, "reward": 0.5145089626312256, "reward_std": 0.1807435303926468, "rewards/verify_math_reward/mean": 0.5145089030265808, "rewards/verify_math_reward/std": 0.5000685453414917, "step": 1056 }, { "clip_ratio/high_max": 0.0016222526155615924, "clip_ratio/high_mean": 0.0005341903965927486, "clip_ratio/low_mean": 0.0002644018669570869, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007985922597981698, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 4052.0, "completions/mean_length": 630.232177734375, "completions/mean_terminated_length": 583.185546875, "completions/min_length": 70.0, "completions/min_terminated_length": 70.0, "epoch": 9.877223680373287, "grad_norm": 0.1279296875, "learning_rate": 1e-06, "loss": 0.0067, "num_tokens": 615148701.0, "reward": 0.5412946939468384, "reward_std": 0.2181655615568161, "rewards/verify_math_reward/mean": 0.5412946343421936, "rewards/verify_math_reward/std": 0.49857014417648315, "step": 1057 }, { "clip_ratio/high_max": 0.001589798277564114, "clip_ratio/high_mean": 0.0005022087107136031, "clip_ratio/low_mean": 0.00036527433803712483, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008674830378367915, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3523.0, "completions/mean_length": 629.8873291015625, "completions/mean_terminated_length": 570.8729248046875, "completions/min_length": 121.0, "completions/min_terminated_length": 121.0, "epoch": 9.886555847185768, "grad_norm": 0.1357421875, "learning_rate": 1e-06, "loss": 0.0004, "num_tokens": 615745488.0, "reward": 0.5223214626312256, "reward_std": 0.22608567774295807, "rewards/verify_math_reward/mean": 0.5223214030265808, "rewards/verify_math_reward/std": 0.49978047609329224, "step": 1058 }, { "clip_ratio/high_max": 0.0015174525497059221, "clip_ratio/high_mean": 0.000393879813032072, "clip_ratio/low_mean": 0.00024093854619877675, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0006348183455884282, "completions/clipped_ratio": 0.022321428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 2036.0, "completions/mean_length": 625.7902221679688, "completions/mean_terminated_length": 546.5616455078125, "completions/min_length": 123.0, "completions/min_terminated_length": 123.0, "epoch": 9.89588801399825, "grad_norm": 0.12060546875, "learning_rate": 1e-06, "loss": 0.0065, "num_tokens": 616310788.0, "reward": 0.5178571939468384, "reward_std": 0.17911633849143982, "rewards/verify_math_reward/mean": 0.5178571343421936, "rewards/verify_math_reward/std": 0.4999600946903229, "step": 1059 }, { "clip_ratio/high_max": 0.0015828846571821487, "clip_ratio/high_mean": 0.0004305865841160994, "clip_ratio/low_mean": 0.00028942746791926766, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007200140516943065, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 2633.0, "completions/mean_length": 570.78125, "completions/mean_terminated_length": 522.9276123046875, "completions/min_length": 101.0, "completions/min_terminated_length": 101.0, "epoch": 9.905220180810732, "grad_norm": 0.12060546875, "learning_rate": 1e-06, "loss": -0.004, "num_tokens": 616859240.0, "reward": 0.5993303656578064, "reward_std": 0.17149020731449127, "rewards/verify_math_reward/mean": 0.5993303656578064, "rewards/verify_math_reward/std": 0.49030786752700806, "step": 1060 }, { "clip_ratio/high_max": 0.0017359167850372614, "clip_ratio/high_mean": 0.0005204638389386673, "clip_ratio/low_mean": 0.00037651833508789423, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008969821874416084, "completions/clipped_ratio": 0.004464285714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 2550.0, "completions/mean_length": 503.8035888671875, "completions/mean_terminated_length": 487.6950988769531, "completions/min_length": 135.0, "completions/min_terminated_length": 135.0, "epoch": 9.914552347623214, "grad_norm": 0.1357421875, "learning_rate": 1e-06, "loss": 0.0121, "num_tokens": 617374888.0, "reward": 0.6428571939468384, "reward_std": 0.20891515910625458, "rewards/verify_math_reward/mean": 0.6428571343421936, "rewards/verify_math_reward/std": 0.47942501306533813, "step": 1061 }, { "clip_ratio/high_max": 0.001725457988868584, "clip_ratio/high_mean": 0.000477585521821311, "clip_ratio/low_mean": 0.00038720311158613185, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008647886463677423, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3222.0, "completions/mean_length": 630.3125, "completions/mean_terminated_length": 567.2999877929688, "completions/min_length": 89.0, "completions/min_terminated_length": 89.0, "epoch": 9.923884514435695, "grad_norm": 0.123046875, "learning_rate": 1e-06, "loss": -0.0018, "num_tokens": 617962968.0, "reward": 0.5658482313156128, "reward_std": 0.19922491908073425, "rewards/verify_math_reward/mean": 0.5658482313156128, "rewards/verify_math_reward/std": 0.49592188000679016, "step": 1062 }, { "clip_ratio/high_max": 0.0019836470783047844, "clip_ratio/high_mean": 0.0005347727078515163, "clip_ratio/low_mean": 0.0002900128150713499, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008247855193985743, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3111.0, "completions/mean_length": 564.9408569335938, "completions/mean_terminated_length": 508.8923034667969, "completions/min_length": 70.0, "completions/min_terminated_length": 70.0, "epoch": 9.933216681248178, "grad_norm": 0.1337890625, "learning_rate": 1e-06, "loss": -0.0079, "num_tokens": 618507411.0, "reward": 0.543526828289032, "reward_std": 0.2126345932483673, "rewards/verify_math_reward/mean": 0.5435267686843872, "rewards/verify_math_reward/std": 0.49838000535964966, "step": 1063 }, { "clip_ratio/high_max": 0.0014982956236053724, "clip_ratio/high_mean": 0.0005473875917232363, "clip_ratio/low_mean": 0.00041467652863502735, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009620641203582636, "completions/clipped_ratio": 0.006696428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 2453.0, "completions/mean_length": 631.1049194335938, "completions/mean_terminated_length": 607.74609375, "completions/min_length": 16.0, "completions/min_terminated_length": 16.0, "epoch": 9.942548848060659, "grad_norm": 0.140625, "learning_rate": 1e-06, "loss": 0.0021, "num_tokens": 619128145.0, "reward": 0.5680803656578064, "reward_std": 0.2296210527420044, "rewards/verify_math_reward/mean": 0.5680803656578064, "rewards/verify_math_reward/std": 0.4956200420856476, "step": 1064 }, { "clip_ratio/high_max": 0.0017813220674725017, "clip_ratio/high_mean": 0.0005317763999528324, "clip_ratio/low_mean": 0.0003509698141215267, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008827462388580898, "completions/clipped_ratio": 0.021205357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 2415.0, "completions/mean_length": 655.536865234375, "completions/mean_terminated_length": 581.0, "completions/min_length": 132.0, "completions/min_terminated_length": 132.0, "epoch": 9.951881014873141, "grad_norm": 0.12060546875, "learning_rate": 1e-06, "loss": -0.0125, "num_tokens": 619729586.0, "reward": 0.5703125, "reward_std": 0.2108672559261322, "rewards/verify_math_reward/mean": 0.5703125, "rewards/verify_math_reward/std": 0.49530795216560364, "step": 1065 }, { "clip_ratio/high_max": 0.001583902850143204, "clip_ratio/high_mean": 0.0004801422760465357, "clip_ratio/low_mean": 0.0002936088548040061, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007737511232335237, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 3959.0, "completions/mean_length": 589.4564819335938, "completions/mean_terminated_length": 549.8792724609375, "completions/min_length": 85.0, "completions/min_terminated_length": 85.0, "epoch": 9.961213181685622, "grad_norm": 0.12451171875, "learning_rate": 1e-06, "loss": 0.0126, "num_tokens": 620307955.0, "reward": 0.5323660969734192, "reward_std": 0.2038070261478424, "rewards/verify_math_reward/mean": 0.5323660969734192, "rewards/verify_math_reward/std": 0.4992299973964691, "step": 1066 }, { "clip_ratio/high_max": 0.001739679495585733, "clip_ratio/high_mean": 0.0004959868065270712, "clip_ratio/low_mean": 0.0004510337030296796, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009470204950048355, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 4001.0, "completions/mean_length": 601.6998291015625, "completions/mean_terminated_length": 554.265869140625, "completions/min_length": 87.0, "completions/min_terminated_length": 87.0, "epoch": 9.970545348498105, "grad_norm": 0.134765625, "learning_rate": 1e-06, "loss": 0.0019, "num_tokens": 620890710.0, "reward": 0.5301339626312256, "reward_std": 0.24221837520599365, "rewards/verify_math_reward/mean": 0.5301339030265808, "rewards/verify_math_reward/std": 0.49936985969543457, "step": 1067 }, { "clip_ratio/high_max": 0.0015477416945941513, "clip_ratio/high_mean": 0.0004594975466716278, "clip_ratio/low_mean": 0.00027450891491298535, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007340064667005208, "completions/clipped_ratio": 0.021205357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3739.0, "completions/mean_length": 631.6027221679688, "completions/mean_terminated_length": 556.5473022460938, "completions/min_length": 132.0, "completions/min_terminated_length": 132.0, "epoch": 9.979877515310585, "grad_norm": 0.1220703125, "learning_rate": 1e-06, "loss": 0.0068, "num_tokens": 621457746.0, "reward": 0.6171875, "reward_std": 0.19564956426620483, "rewards/verify_math_reward/mean": 0.6171875, "rewards/verify_math_reward/std": 0.4863446056842804, "step": 1068 }, { "clip_ratio/high_max": 0.0014885582504575723, "clip_ratio/high_mean": 0.0004587147298025229, "clip_ratio/low_mean": 0.00030915980028112244, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007678745264456666, "completions/clipped_ratio": 0.0189732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3943.0, "completions/mean_length": 626.5413208007812, "completions/mean_terminated_length": 559.44140625, "completions/min_length": 113.0, "completions/min_terminated_length": 113.0, "epoch": 9.989209682123068, "grad_norm": 0.12109375, "learning_rate": 1e-06, "loss": 0.0041, "num_tokens": 622053583.0, "reward": 0.5189732313156128, "reward_std": 0.19389109313488007, "rewards/verify_math_reward/mean": 0.5189732313156128, "rewards/verify_math_reward/std": 0.49991893768310547, "step": 1069 }, { "clip_ratio/high_max": 0.001736123401315126, "clip_ratio/high_mean": 0.0005841668562425184, "clip_ratio/low_mean": 0.00036166243921798014, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009458292834096937, "completions/clipped_ratio": 0.017045454545454586, "completions/max_length": 4096.0, "completions/max_terminated_length": 2381.0, "completions/mean_length": 641.0227661132812, "completions/mean_terminated_length": 581.1098022460938, "completions/min_length": 171.0, "completions/min_terminated_length": 171.0, "epoch": 9.998541848935549, "grad_norm": 0.12353515625, "learning_rate": 1e-06, "loss": -0.0079, "num_tokens": 622652831.0, "reward": 0.5412946939468384, "reward_std": 0.23412367701530457, "rewards/verify_math_reward/mean": 0.5412946343421936, "rewards/verify_math_reward/std": 0.49857014417648315, "step": 1070 }, { "clip_ratio/high_max": 0.001919582125992747, "clip_ratio/high_mean": 0.0005472912157529208, "clip_ratio/low_mean": 0.0003837442050098616, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009310354244007613, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3619.0, "completions/mean_length": 613.8114013671875, "completions/mean_terminated_length": 554.5233154296875, "completions/min_length": 159.0, "completions/min_terminated_length": 159.0, "epoch": 10.009332166812483, "grad_norm": 0.1318359375, "learning_rate": 1e-06, "loss": 0.0059, "num_tokens": 623233870.0, "reward": 0.5, "reward_std": 0.23165123164653778, "rewards/verify_math_reward/mean": 0.5, "rewards/verify_math_reward/std": 0.5002792477607727, "step": 1071 }, { "clip_ratio/high_max": 0.001850129976446624, "clip_ratio/high_mean": 0.0005400942072810722, "clip_ratio/low_mean": 0.0002552113480760454, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007953055492180283, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 3605.0, "completions/mean_length": 619.8326416015625, "completions/mean_terminated_length": 580.5982055664062, "completions/min_length": 150.0, "completions/min_terminated_length": 150.0, "epoch": 10.018664333624963, "grad_norm": 0.1298828125, "learning_rate": 1e-06, "loss": -0.0055, "num_tokens": 623841688.0, "reward": 0.520089328289032, "reward_std": 0.22718150913715363, "rewards/verify_math_reward/mean": 0.5200892686843872, "rewards/verify_math_reward/std": 0.4998753070831299, "step": 1072 }, { "clip_ratio/high_max": 0.001637823027522245, "clip_ratio/high_mean": 0.0004614952532620009, "clip_ratio/low_mean": 0.00027986235090793343, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007413576065573579, "completions/clipped_ratio": 0.0189732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 2409.0, "completions/mean_length": 624.794677734375, "completions/mean_terminated_length": 557.6609497070312, "completions/min_length": 100.0, "completions/min_terminated_length": 100.0, "epoch": 10.027996500437446, "grad_norm": 0.1279296875, "learning_rate": 1e-06, "loss": 0.0012, "num_tokens": 624422800.0, "reward": 0.5368303656578064, "reward_std": 0.20564965903759003, "rewards/verify_math_reward/mean": 0.5368303656578064, "rewards/verify_math_reward/std": 0.49892017245292664, "step": 1073 }, { "clip_ratio/high_max": 0.0015059621509863064, "clip_ratio/high_mean": 0.0004688872477345285, "clip_ratio/low_mean": 0.0004503274567468907, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009192147099383874, "completions/clipped_ratio": 0.0267857142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 4074.0, "completions/mean_length": 656.4553833007812, "completions/mean_terminated_length": 561.7889404296875, "completions/min_length": 136.0, "completions/min_terminated_length": 136.0, "epoch": 10.037328667249927, "grad_norm": 0.1318359375, "learning_rate": 1e-06, "loss": 0.0077, "num_tokens": 624998632.0, "reward": 0.5267857313156128, "reward_std": 0.21286143362522125, "rewards/verify_math_reward/mean": 0.5267857313156128, "rewards/verify_math_reward/std": 0.4995608329772949, "step": 1074 }, { "clip_ratio/high_max": 0.0014891096298015327, "clip_ratio/high_mean": 0.0004943469086811092, "clip_ratio/low_mean": 0.0003300498495946158, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008243967622547643, "completions/clipped_ratio": 0.010044642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 2638.0, "completions/mean_length": 607.8046875, "completions/mean_terminated_length": 572.4114990234375, "completions/min_length": 24.0, "completions/min_terminated_length": 24.0, "epoch": 10.04666083406241, "grad_norm": 0.1337890625, "learning_rate": 1e-06, "loss": 0.0037, "num_tokens": 625601793.0, "reward": 0.5446428656578064, "reward_std": 0.21951259672641754, "rewards/verify_math_reward/mean": 0.5446428656578064, "rewards/verify_math_reward/std": 0.4982811510562897, "step": 1075 }, { "clip_ratio/high_max": 0.0015768517932883697, "clip_ratio/high_mean": 0.0004668293256600009, "clip_ratio/low_mean": 0.00029906721465522423, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007658965423615882, "completions/clipped_ratio": 0.008928571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3007.0, "completions/mean_length": 581.953125, "completions/mean_terminated_length": 550.2950439453125, "completions/min_length": 117.0, "completions/min_terminated_length": 117.0, "epoch": 10.05599300087489, "grad_norm": 0.126953125, "learning_rate": 1e-06, "loss": 0.0108, "num_tokens": 626167031.0, "reward": 0.6261160969734192, "reward_std": 0.20936980843544006, "rewards/verify_math_reward/mean": 0.6261160969734192, "rewards/verify_math_reward/std": 0.48410359025001526, "step": 1076 }, { "clip_ratio/high_max": 0.0015254894242389128, "clip_ratio/high_mean": 0.00043216289623160264, "clip_ratio/low_mean": 0.00032624031064187875, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007584032155136811, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 2434.0, "completions/mean_length": 606.6283569335938, "completions/mean_terminated_length": 563.2576293945312, "completions/min_length": 121.0, "completions/min_terminated_length": 121.0, "epoch": 10.065325167687373, "grad_norm": 0.1181640625, "learning_rate": 1e-06, "loss": -0.0124, "num_tokens": 626757858.0, "reward": 0.590401828289032, "reward_std": 0.17341090738773346, "rewards/verify_math_reward/mean": 0.5904017686843872, "rewards/verify_math_reward/std": 0.49203425645828247, "step": 1077 }, { "clip_ratio/high_max": 0.001506708684246405, "clip_ratio/high_mean": 0.0004400943637392629, "clip_ratio/low_mean": 0.00034780395139932807, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007878983233240433, "completions/clipped_ratio": 0.022321428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3447.0, "completions/mean_length": 664.6272583007812, "completions/mean_terminated_length": 586.2853393554688, "completions/min_length": 124.0, "completions/min_terminated_length": 124.0, "epoch": 10.074657334499854, "grad_norm": 0.12451171875, "learning_rate": 1e-06, "loss": 0.0032, "num_tokens": 627358924.0, "reward": 0.5345982313156128, "reward_std": 0.20534543693065643, "rewards/verify_math_reward/mean": 0.5345982313156128, "rewards/verify_math_reward/std": 0.4990801215171814, "step": 1078 }, { "clip_ratio/high_max": 0.0015208335544230067, "clip_ratio/high_mean": 0.0004815797437913716, "clip_ratio/low_mean": 0.00040735346692599705, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008889332175385789, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 4004.0, "completions/mean_length": 630.859375, "completions/mean_terminated_length": 579.8436889648438, "completions/min_length": 49.0, "completions/min_terminated_length": 49.0, "epoch": 10.083989501312336, "grad_norm": 0.1376953125, "learning_rate": 1e-06, "loss": 0.0121, "num_tokens": 627973710.0, "reward": 0.5245535969734192, "reward_std": 0.2173001766204834, "rewards/verify_math_reward/mean": 0.5245535969734192, "rewards/verify_math_reward/std": 0.4996756911277771, "step": 1079 }, { "clip_ratio/high_max": 0.0017864221226773225, "clip_ratio/high_mean": 0.0006015160261085839, "clip_ratio/low_mean": 0.0003232312412819738, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.000924747267163184, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 2419.0, "completions/mean_length": 633.765625, "completions/mean_terminated_length": 578.8095092773438, "completions/min_length": 84.0, "completions/min_terminated_length": 84.0, "epoch": 10.093321668124817, "grad_norm": 0.1259765625, "learning_rate": 1e-06, "loss": -0.0112, "num_tokens": 628586092.0, "reward": 0.5345982313156128, "reward_std": 0.2251092493534088, "rewards/verify_math_reward/mean": 0.5345982313156128, "rewards/verify_math_reward/std": 0.4990801215171814, "step": 1080 }, { "clip_ratio/high_max": 0.0017837556415543077, "clip_ratio/high_mean": 0.000528502018596555, "clip_ratio/low_mean": 0.00035645686398311227, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008849588662087626, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3144.0, "completions/mean_length": 591.6239013671875, "completions/mean_terminated_length": 544.0531616210938, "completions/min_length": 112.0, "completions/min_terminated_length": 112.0, "epoch": 10.1026538349373, "grad_norm": 0.1318359375, "learning_rate": 1e-06, "loss": 0.0025, "num_tokens": 629159699.0, "reward": 0.5379464626312256, "reward_std": 0.21564048528671265, "rewards/verify_math_reward/mean": 0.5379464030265808, "rewards/verify_math_reward/std": 0.4988364577293396, "step": 1081 }, { "clip_ratio/high_max": 0.0015256862934620585, "clip_ratio/high_mean": 0.0004688509143306874, "clip_ratio/low_mean": 0.0002828350320669415, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007516859441238921, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 2133.0, "completions/mean_length": 597.6574096679688, "completions/mean_terminated_length": 558.1727294921875, "completions/min_length": 102.0, "completions/min_terminated_length": 102.0, "epoch": 10.11198600174978, "grad_norm": 0.1259765625, "learning_rate": 1e-06, "loss": 0.0086, "num_tokens": 629749512.0, "reward": 0.543526828289032, "reward_std": 0.20354489982128143, "rewards/verify_math_reward/mean": 0.5435267686843872, "rewards/verify_math_reward/std": 0.49838000535964966, "step": 1082 }, { "clip_ratio/high_max": 0.0015897395605861675, "clip_ratio/high_mean": 0.00046778800992797187, "clip_ratio/low_mean": 0.00037000702150180587, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008377950489375507, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3803.0, "completions/mean_length": 612.1116333007812, "completions/mean_terminated_length": 564.8190307617188, "completions/min_length": 78.0, "completions/min_terminated_length": 78.0, "epoch": 10.121318168562263, "grad_norm": 0.130859375, "learning_rate": 1e-06, "loss": 0.0101, "num_tokens": 630348828.0, "reward": 0.5167410969734192, "reward_std": 0.2212817370891571, "rewards/verify_math_reward/mean": 0.5167410969734192, "rewards/verify_math_reward/std": 0.4999987483024597, "step": 1083 }, { "clip_ratio/high_max": 0.0015059544812174863, "clip_ratio/high_mean": 0.00044863606876788253, "clip_ratio/low_mean": 0.00035496303701165743, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008035990940697957, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 2532.0, "completions/mean_length": 627.505615234375, "completions/mean_terminated_length": 584.3943481445312, "completions/min_length": 146.0, "completions/min_terminated_length": 146.0, "epoch": 10.130650335374744, "grad_norm": 0.1357421875, "learning_rate": 1e-06, "loss": 0.0205, "num_tokens": 630960457.0, "reward": 0.4743303656578064, "reward_std": 0.20865483582019806, "rewards/verify_math_reward/mean": 0.4743303656578064, "rewards/verify_math_reward/std": 0.4996195137500763, "step": 1084 }, { "clip_ratio/high_max": 0.001831831161325681, "clip_ratio/high_mean": 0.0004756126108986791, "clip_ratio/low_mean": 0.0002906310619437136, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007662436773898662, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3663.0, "completions/mean_length": 642.2957763671875, "completions/mean_terminated_length": 583.49267578125, "completions/min_length": 71.0, "completions/min_terminated_length": 71.0, "epoch": 10.139982502187227, "grad_norm": 0.11767578125, "learning_rate": 1e-06, "loss": -0.004, "num_tokens": 631558162.0, "reward": 0.551339328289032, "reward_std": 0.1965171843767166, "rewards/verify_math_reward/mean": 0.5513392686843872, "rewards/verify_math_reward/std": 0.4976350665092468, "step": 1085 }, { "clip_ratio/high_max": 0.0019993139376310864, "clip_ratio/high_mean": 0.0006707254733555601, "clip_ratio/low_mean": 0.00036152725783722417, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010322527396056103, "completions/clipped_ratio": 0.0078125, "completions/max_length": 4096.0, "completions/max_terminated_length": 3234.0, "completions/mean_length": 549.7199096679688, "completions/mean_terminated_length": 521.7964477539062, "completions/min_length": 137.0, "completions/min_terminated_length": 137.0, "epoch": 10.149314668999708, "grad_norm": 0.1455078125, "learning_rate": 1e-06, "loss": -0.0098, "num_tokens": 632105591.0, "reward": 0.5892857313156128, "reward_std": 0.2587401866912842, "rewards/verify_math_reward/mean": 0.5892857313156128, "rewards/verify_math_reward/std": 0.49223825335502625, "step": 1086 }, { "clip_ratio/high_max": 0.0015812696365173906, "clip_ratio/high_mean": 0.00045869233645134955, "clip_ratio/low_mean": 0.0003219428009515468, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007806351241015363, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 2300.0, "completions/mean_length": 580.7053833007812, "completions/mean_terminated_length": 532.9864501953125, "completions/min_length": 53.0, "completions/min_terminated_length": 53.0, "epoch": 10.15864683581219, "grad_norm": 0.126953125, "learning_rate": 1e-06, "loss": -0.0118, "num_tokens": 632671839.0, "reward": 0.578125, "reward_std": 0.1695060133934021, "rewards/verify_math_reward/mean": 0.578125, "rewards/verify_math_reward/std": 0.4941346049308777, "step": 1087 }, { "clip_ratio/high_max": 0.0015989276325854007, "clip_ratio/high_mean": 0.00047871218202999444, "clip_ratio/low_mean": 0.00033596853643302893, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008146807094817632, "completions/clipped_ratio": 0.021205357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3427.0, "completions/mean_length": 660.989990234375, "completions/mean_terminated_length": 586.5712280273438, "completions/min_length": 131.0, "completions/min_terminated_length": 131.0, "epoch": 10.167979002624673, "grad_norm": 0.130859375, "learning_rate": 1e-06, "loss": 0.0011, "num_tokens": 633271286.0, "reward": 0.5334821939468384, "reward_std": 0.2019711583852768, "rewards/verify_math_reward/mean": 0.5334821343421936, "rewards/verify_math_reward/std": 0.49915632605552673, "step": 1088 }, { "clip_ratio/high_max": 0.001786749296115886, "clip_ratio/high_mean": 0.0005251097479685995, "clip_ratio/low_mean": 0.00035115719413170154, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008762669303905568, "completions/clipped_ratio": 0.022321428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 4039.0, "completions/mean_length": 701.9520263671875, "completions/mean_terminated_length": 624.4622802734375, "completions/min_length": 8.0, "completions/min_terminated_length": 8.0, "epoch": 10.177311169437154, "grad_norm": 0.126953125, "learning_rate": 1e-06, "loss": 0.0053, "num_tokens": 633909875.0, "reward": 0.5167410969734192, "reward_std": 0.2281925082206726, "rewards/verify_math_reward/mean": 0.5167410969734192, "rewards/verify_math_reward/std": 0.4999987483024597, "step": 1089 }, { "clip_ratio/high_max": 0.001676805290117045, "clip_ratio/high_mean": 0.0004657145470901014, "clip_ratio/low_mean": 0.00036184746477374574, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008275620139102102, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 2841.0, "completions/mean_length": 570.3795166015625, "completions/mean_terminated_length": 522.5203857421875, "completions/min_length": 107.0, "completions/min_terminated_length": 107.0, "epoch": 10.186643336249636, "grad_norm": 0.1318359375, "learning_rate": 1e-06, "loss": -0.0132, "num_tokens": 634457207.0, "reward": 0.629464328289032, "reward_std": 0.20693056285381317, "rewards/verify_math_reward/mean": 0.6294642686843872, "rewards/verify_math_reward/std": 0.4832179844379425, "step": 1090 }, { "clip_ratio/high_max": 0.0014925350005796645, "clip_ratio/high_mean": 0.00047520343218820926, "clip_ratio/low_mean": 0.0003693925566494727, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008445960006611131, "completions/clipped_ratio": 0.021205357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3561.0, "completions/mean_length": 596.4241333007812, "completions/mean_terminated_length": 520.6066284179688, "completions/min_length": 142.0, "completions/min_terminated_length": 142.0, "epoch": 10.195975503062117, "grad_norm": 0.1337890625, "learning_rate": 1e-06, "loss": 0.0001, "num_tokens": 634997931.0, "reward": 0.625, "reward_std": 0.22184264659881592, "rewards/verify_math_reward/mean": 0.625, "rewards/verify_math_reward/std": 0.48439329862594604, "step": 1091 }, { "clip_ratio/high_max": 0.0015315620985347778, "clip_ratio/high_mean": 0.00043522461805878265, "clip_ratio/low_mean": 0.0003758994764666568, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008111240995276603, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 2597.0, "completions/mean_length": 654.2835083007812, "completions/mean_terminated_length": 599.653076171875, "completions/min_length": 145.0, "completions/min_terminated_length": 145.0, "epoch": 10.2053076698746, "grad_norm": 0.1142578125, "learning_rate": 1e-06, "loss": 0.0098, "num_tokens": 635618721.0, "reward": 0.4720982313156128, "reward_std": 0.20272116363048553, "rewards/verify_math_reward/mean": 0.4720982015132904, "rewards/verify_math_reward/std": 0.49949970841407776, "step": 1092 }, { "clip_ratio/high_max": 0.001533907387056388, "clip_ratio/high_mean": 0.0004846340226549728, "clip_ratio/low_mean": 0.0003948526127715013, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008794866416792502, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3801.0, "completions/mean_length": 619.5390625, "completions/mean_terminated_length": 572.3472900390625, "completions/min_length": 132.0, "completions/min_terminated_length": 132.0, "epoch": 10.21463983668708, "grad_norm": 0.12109375, "learning_rate": 1e-06, "loss": 0.0032, "num_tokens": 636207868.0, "reward": 0.5424107313156128, "reward_std": 0.22097823023796082, "rewards/verify_math_reward/mean": 0.5424107313156128, "rewards/verify_math_reward/std": 0.4984763562679291, "step": 1093 }, { "clip_ratio/high_max": 0.0015355293689935934, "clip_ratio/high_mean": 0.00046355218569260614, "clip_ratio/low_mean": 0.00033559102257640916, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007991432094058837, "completions/clipped_ratio": 0.022321428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3785.0, "completions/mean_length": 653.614990234375, "completions/mean_terminated_length": 575.0216674804688, "completions/min_length": 3.0, "completions/min_terminated_length": 3.0, "epoch": 10.223972003499563, "grad_norm": 0.140625, "learning_rate": 1e-06, "loss": -0.0013, "num_tokens": 636808171.0, "reward": 0.4453125298023224, "reward_std": 0.21485699713230133, "rewards/verify_math_reward/mean": 0.4453125, "rewards/verify_math_reward/std": 0.4972778558731079, "step": 1094 }, { "clip_ratio/high_max": 0.0013949023050372489, "clip_ratio/high_mean": 0.0004000454498509498, "clip_ratio/low_mean": 0.00029239217974463827, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0006924376311872038, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 2472.0, "completions/mean_length": 616.4921875, "completions/mean_terminated_length": 557.249755859375, "completions/min_length": 34.0, "completions/min_terminated_length": 34.0, "epoch": 10.233304170312044, "grad_norm": 0.115234375, "learning_rate": 1e-06, "loss": 0.0144, "num_tokens": 637383084.0, "reward": 0.5301339626312256, "reward_std": 0.18734757602214813, "rewards/verify_math_reward/mean": 0.5301339030265808, "rewards/verify_math_reward/std": 0.49936985969543457, "step": 1095 }, { "clip_ratio/high_max": 0.0017311158208030974, "clip_ratio/high_mean": 0.0005165576781109849, "clip_ratio/low_mean": 0.00030333791505654517, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008198955838452093, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 3716.0, "completions/mean_length": 619.5379638671875, "completions/mean_terminated_length": 580.3002319335938, "completions/min_length": 18.0, "completions/min_terminated_length": 18.0, "epoch": 10.242636337124527, "grad_norm": 0.12353515625, "learning_rate": 1e-06, "loss": -0.0014, "num_tokens": 637987990.0, "reward": 0.5479910969734192, "reward_std": 0.20557299256324768, "rewards/verify_math_reward/mean": 0.5479910969734192, "rewards/verify_math_reward/std": 0.49796950817108154, "step": 1096 }, { "clip_ratio/high_max": 0.0017144478551927023, "clip_ratio/high_mean": 0.0004727702116724686, "clip_ratio/low_mean": 0.00032949393676062755, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008022641375191597, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3412.0, "completions/mean_length": 608.5960083007812, "completions/mean_terminated_length": 545.1885986328125, "completions/min_length": 76.0, "completions/min_terminated_length": 76.0, "epoch": 10.251968503937007, "grad_norm": 0.1328125, "learning_rate": 1e-06, "loss": 0.0197, "num_tokens": 638554236.0, "reward": 0.5703125, "reward_std": 0.20962940156459808, "rewards/verify_math_reward/mean": 0.5703125, "rewards/verify_math_reward/std": 0.49530795216560364, "step": 1097 }, { "clip_ratio/high_max": 0.0018896704732469516, "clip_ratio/high_mean": 0.0005906688852519437, "clip_ratio/low_mean": 0.0003481938667846407, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009388627513544634, "completions/clipped_ratio": 0.021205357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3750.0, "completions/mean_length": 643.8225708007812, "completions/mean_terminated_length": 569.0319213867188, "completions/min_length": 20.0, "completions/min_terminated_length": 20.0, "epoch": 10.26130067074949, "grad_norm": 0.134765625, "learning_rate": 1e-06, "loss": 0.0023, "num_tokens": 639139445.0, "reward": 0.574776828289032, "reward_std": 0.2286478579044342, "rewards/verify_math_reward/mean": 0.5747767686843872, "rewards/verify_math_reward/std": 0.49465295672416687, "step": 1098 }, { "clip_ratio/high_max": 0.0019207091681892052, "clip_ratio/high_mean": 0.0006169301102545433, "clip_ratio/low_mean": 0.0003355052323286145, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009524353226879612, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3902.0, "completions/mean_length": 599.1785888671875, "completions/mean_terminated_length": 547.6964721679688, "completions/min_length": 115.0, "completions/min_terminated_length": 115.0, "epoch": 10.27063283756197, "grad_norm": 0.1259765625, "learning_rate": 1e-06, "loss": -0.018, "num_tokens": 639716381.0, "reward": 0.5613839626312256, "reward_std": 0.21350152790546417, "rewards/verify_math_reward/mean": 0.5613839030265808, "rewards/verify_math_reward/std": 0.496494859457016, "step": 1099 }, { "clip_ratio/high_max": 0.0016598080492258305, "clip_ratio/high_mean": 0.0004825434170925291, "clip_ratio/low_mean": 0.00027872542761997465, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007612688468725537, "completions/clipped_ratio": 0.0200892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 4058.0, "completions/mean_length": 620.1328125, "completions/mean_terminated_length": 548.8735961914062, "completions/min_length": 63.0, "completions/min_terminated_length": 63.0, "epoch": 10.279965004374453, "grad_norm": 0.1298828125, "learning_rate": 1e-06, "loss": 0.0022, "num_tokens": 640299748.0, "reward": 0.5301339626312256, "reward_std": 0.19238336384296417, "rewards/verify_math_reward/mean": 0.5301339030265808, "rewards/verify_math_reward/std": 0.49936985969543457, "step": 1100 }, { "clip_ratio/high_max": 0.0015342075803346233, "clip_ratio/high_mean": 0.0004805250027857255, "clip_ratio/low_mean": 0.00031075992865226, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007912849309832382, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 2281.0, "completions/mean_length": 600.6428833007812, "completions/mean_terminated_length": 549.1823120117188, "completions/min_length": 78.0, "completions/min_terminated_length": 78.0, "epoch": 10.289297171186934, "grad_norm": 0.125, "learning_rate": 1e-06, "loss": -0.0002, "num_tokens": 640874612.0, "reward": 0.5290178656578064, "reward_std": 0.19208984076976776, "rewards/verify_math_reward/mean": 0.5290178656578064, "rewards/verify_math_reward/std": 0.49943605065345764, "step": 1101 }, { "clip_ratio/high_max": 0.0016850085739861242, "clip_ratio/high_mean": 0.000539821442430366, "clip_ratio/low_mean": 0.0002855800453289703, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008254014892372652, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3895.0, "completions/mean_length": 627.1217041015625, "completions/mean_terminated_length": 580.0328369140625, "completions/min_length": 129.0, "completions/min_terminated_length": 129.0, "epoch": 10.298629337999417, "grad_norm": 0.1298828125, "learning_rate": 1e-06, "loss": 0.0051, "num_tokens": 641480009.0, "reward": 0.5189732313156128, "reward_std": 0.20583511888980865, "rewards/verify_math_reward/mean": 0.5189732313156128, "rewards/verify_math_reward/std": 0.49991893768310547, "step": 1102 }, { "clip_ratio/high_max": 0.00164590566419065, "clip_ratio/high_mean": 0.0004613075368524733, "clip_ratio/low_mean": 0.0003890542811859632, "clip_ratio/low_min": 8.540584531147033e-06, "clip_ratio/region_mean": 0.0008503618082613684, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3637.0, "completions/mean_length": 617.1473388671875, "completions/mean_terminated_length": 553.8954467773438, "completions/min_length": 161.0, "completions/min_terminated_length": 161.0, "epoch": 10.307961504811898, "grad_norm": 0.12353515625, "learning_rate": 1e-06, "loss": -0.0014, "num_tokens": 642060453.0, "reward": 0.5424107313156128, "reward_std": 0.20872969925403595, "rewards/verify_math_reward/mean": 0.5424107313156128, "rewards/verify_math_reward/std": 0.4984763264656067, "step": 1103 }, { "clip_ratio/high_max": 0.0013244973933979054, "clip_ratio/high_mean": 0.0003600586894663138, "clip_ratio/low_mean": 0.00028554810154446386, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0006456067894760054, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3553.0, "completions/mean_length": 656.9788208007812, "completions/mean_terminated_length": 598.4256591796875, "completions/min_length": 113.0, "completions/min_terminated_length": 113.0, "epoch": 10.31729367162438, "grad_norm": 0.10693359375, "learning_rate": 1e-06, "loss": 0.0101, "num_tokens": 642672010.0, "reward": 0.5033482313156128, "reward_std": 0.18760831654071808, "rewards/verify_math_reward/mean": 0.5033482313156128, "rewards/verify_math_reward/std": 0.5002680420875549, "step": 1104 }, { "clip_ratio/high_max": 0.001573255103721749, "clip_ratio/high_mean": 0.0004818824679659883, "clip_ratio/low_mean": 0.00032442863539472455, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008063110954026342, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3687.0, "completions/mean_length": 645.6529541015625, "completions/mean_terminated_length": 590.885498046875, "completions/min_length": 131.0, "completions/min_terminated_length": 131.0, "epoch": 10.326625838436861, "grad_norm": 0.126953125, "learning_rate": 1e-06, "loss": 0.0147, "num_tokens": 643277859.0, "reward": 0.5558035969734192, "reward_std": 0.22920215129852295, "rewards/verify_math_reward/mean": 0.5558035969734192, "rewards/verify_math_reward/std": 0.49715372920036316, "step": 1105 }, { "clip_ratio/high_max": 0.0015616901346220402, "clip_ratio/high_mean": 0.0004561532878142316, "clip_ratio/low_mean": 0.00028561434965013177, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007417676533805206, "completions/clipped_ratio": 0.022321428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 2398.0, "completions/mean_length": 655.6920166015625, "completions/mean_terminated_length": 577.1461181640625, "completions/min_length": 134.0, "completions/min_terminated_length": 134.0, "epoch": 10.335958005249344, "grad_norm": 0.11767578125, "learning_rate": 1e-06, "loss": 0.0161, "num_tokens": 643868215.0, "reward": 0.5558035969734192, "reward_std": 0.2031298130750656, "rewards/verify_math_reward/mean": 0.5558035969734192, "rewards/verify_math_reward/std": 0.49715372920036316, "step": 1106 }, { "clip_ratio/high_max": 0.0016778797153165215, "clip_ratio/high_mean": 0.000509412726046321, "clip_ratio/low_mean": 0.0002634687833733551, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007728815107839182, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 1906.0, "completions/mean_length": 555.0067138671875, "completions/mean_terminated_length": 506.9389343261719, "completions/min_length": 11.0, "completions/min_terminated_length": 11.0, "epoch": 10.345290172061826, "grad_norm": 0.1279296875, "learning_rate": 1e-06, "loss": 0.0158, "num_tokens": 644402933.0, "reward": 0.5881696939468384, "reward_std": 0.19061599671840668, "rewards/verify_math_reward/mean": 0.5881696343421936, "rewards/verify_math_reward/std": 0.4924395978450775, "step": 1107 }, { "clip_ratio/high_max": 0.0017703265029922477, "clip_ratio/high_mean": 0.0005150289084667747, "clip_ratio/low_mean": 0.00039229383162364684, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009073227447515819, "completions/clipped_ratio": 0.0200892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 2841.0, "completions/mean_length": 621.4542846679688, "completions/mean_terminated_length": 550.2221069335938, "completions/min_length": 86.0, "completions/min_terminated_length": 86.0, "epoch": 10.354622338874307, "grad_norm": 0.1357421875, "learning_rate": 1e-06, "loss": 0.0087, "num_tokens": 644969940.0, "reward": 0.535714328289032, "reward_std": 0.23446954786777496, "rewards/verify_math_reward/mean": 0.5357142686843872, "rewards/verify_math_reward/std": 0.4990014135837555, "step": 1108 }, { "clip_ratio/high_max": 0.0018161127554776613, "clip_ratio/high_mean": 0.0005660896570134355, "clip_ratio/low_mean": 0.0002896276710089296, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008557173277949914, "completions/clipped_ratio": 0.010044642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3597.0, "completions/mean_length": 570.9642944335938, "completions/mean_terminated_length": 535.197265625, "completions/min_length": 129.0, "completions/min_terminated_length": 129.0, "epoch": 10.36395450568679, "grad_norm": 0.126953125, "learning_rate": 1e-06, "loss": -0.0058, "num_tokens": 645532052.0, "reward": 0.59375, "reward_std": 0.22988361120224, "rewards/verify_math_reward/mean": 0.59375, "rewards/verify_math_reward/std": 0.4914066195487976, "step": 1109 }, { "clip_ratio/high_max": 0.0016176943408936495, "clip_ratio/high_mean": 0.00042381206822028616, "clip_ratio/low_mean": 0.0002469557192625871, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0006707677830490866, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 2356.0, "completions/mean_length": 620.8381958007812, "completions/mean_terminated_length": 557.6533813476562, "completions/min_length": 112.0, "completions/min_terminated_length": 112.0, "epoch": 10.37328667249927, "grad_norm": 0.12890625, "learning_rate": 1e-06, "loss": 0.0146, "num_tokens": 646107883.0, "reward": 0.535714328289032, "reward_std": 0.18388886749744415, "rewards/verify_math_reward/mean": 0.5357142686843872, "rewards/verify_math_reward/std": 0.4990014135837555, "step": 1110 }, { "clip_ratio/high_max": 0.0016423155229858821, "clip_ratio/high_mean": 0.0004701535292497283, "clip_ratio/low_mean": 0.0003223942969725613, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007925478212200687, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3673.0, "completions/mean_length": 629.552490234375, "completions/mean_terminated_length": 570.5323486328125, "completions/min_length": 142.0, "completions/min_terminated_length": 142.0, "epoch": 10.382618839311753, "grad_norm": 0.1259765625, "learning_rate": 1e-06, "loss": 0.0128, "num_tokens": 646684274.0, "reward": 0.5870535969734192, "reward_std": 0.19877281785011292, "rewards/verify_math_reward/mean": 0.5870535969734192, "rewards/verify_math_reward/std": 0.49263834953308105, "step": 1111 }, { "clip_ratio/high_max": 0.002033679169471725, "clip_ratio/high_mean": 0.0007057901575535652, "clip_ratio/low_mean": 0.00032785739153951, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001033647544318228, "completions/clipped_ratio": 0.0189732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 4072.0, "completions/mean_length": 618.4654541015625, "completions/mean_terminated_length": 551.2092895507812, "completions/min_length": 142.0, "completions/min_terminated_length": 142.0, "epoch": 10.391951006124234, "grad_norm": 0.1357421875, "learning_rate": 1e-06, "loss": 0.0026, "num_tokens": 647254307.0, "reward": 0.6361607313156128, "reward_std": 0.23158451914787292, "rewards/verify_math_reward/mean": 0.6361607313156128, "rewards/verify_math_reward/std": 0.4813718795776367, "step": 1112 }, { "clip_ratio/high_max": 0.0013961403988105303, "clip_ratio/high_mean": 0.0003436762935962179, "clip_ratio/low_mean": 0.00026574405274004675, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.000609420357250201, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3937.0, "completions/mean_length": 644.7701416015625, "completions/mean_terminated_length": 589.9886474609375, "completions/min_length": 101.0, "completions/min_terminated_length": 101.0, "epoch": 10.401283172936717, "grad_norm": 0.115234375, "learning_rate": 1e-06, "loss": 0.0117, "num_tokens": 647874429.0, "reward": 0.515625, "reward_std": 0.1490258425474167, "rewards/verify_math_reward/mean": 0.515625, "rewards/verify_math_reward/std": 0.5000349283218384, "step": 1113 }, { "clip_ratio/high_max": 0.001803381193894893, "clip_ratio/high_mean": 0.0005605701894637605, "clip_ratio/low_mean": 0.0003618182871605313, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009223884499078849, "completions/clipped_ratio": 0.006696428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 2038.0, "completions/mean_length": 557.0870971679688, "completions/mean_terminated_length": 533.229248046875, "completions/min_length": 139.0, "completions/min_terminated_length": 139.0, "epoch": 10.410615339749198, "grad_norm": 0.140625, "learning_rate": 1e-06, "loss": -0.0009, "num_tokens": 648443371.0, "reward": 0.5848214626312256, "reward_std": 0.2209436297416687, "rewards/verify_math_reward/mean": 0.5848214030265808, "rewards/verify_math_reward/std": 0.49302801489830017, "step": 1114 }, { "clip_ratio/high_max": 0.0018055061482300516, "clip_ratio/high_mean": 0.0005517320200851827, "clip_ratio/low_mean": 0.00031505655988439685, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008667885795148322, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3865.0, "completions/mean_length": 625.1395263671875, "completions/mean_terminated_length": 578.0238037109375, "completions/min_length": 126.0, "completions/min_terminated_length": 126.0, "epoch": 10.41994750656168, "grad_norm": 0.11962890625, "learning_rate": 1e-06, "loss": -0.0165, "num_tokens": 649033640.0, "reward": 0.5926339626312256, "reward_std": 0.2077532857656479, "rewards/verify_math_reward/mean": 0.5926339030265808, "rewards/verify_math_reward/std": 0.49161845445632935, "step": 1115 }, { "clip_ratio/high_max": 0.0013779688324575545, "clip_ratio/high_mean": 0.00037272801978360803, "clip_ratio/low_mean": 0.00032349185505609057, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0006962198685869225, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 2630.0, "completions/mean_length": 606.9520263671875, "completions/mean_terminated_length": 551.5703125, "completions/min_length": 136.0, "completions/min_terminated_length": 136.0, "epoch": 10.429279673374161, "grad_norm": 0.12353515625, "learning_rate": 1e-06, "loss": -0.0106, "num_tokens": 649612381.0, "reward": 0.5011160969734192, "reward_std": 0.19948844611644745, "rewards/verify_math_reward/mean": 0.5011160969734192, "rewards/verify_math_reward/std": 0.5002779960632324, "step": 1116 }, { "clip_ratio/high_max": 0.0016746436685934896, "clip_ratio/high_mean": 0.0005172538121769321, "clip_ratio/low_mean": 0.00039144150798620103, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009086953277801513, "completions/clipped_ratio": 0.006696428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3014.0, "completions/mean_length": 567.3158569335938, "completions/mean_terminated_length": 543.5269775390625, "completions/min_length": 19.0, "completions/min_terminated_length": 19.0, "epoch": 10.438611840186644, "grad_norm": 0.125, "learning_rate": 1e-06, "loss": 0.0016, "num_tokens": 650176008.0, "reward": 0.5647321939468384, "reward_std": 0.2127426713705063, "rewards/verify_math_reward/mean": 0.5647321343421936, "rewards/verify_math_reward/std": 0.49606895446777344, "step": 1117 }, { "clip_ratio/high_max": 0.001711494096525712, "clip_ratio/high_mean": 0.0005152216519945796, "clip_ratio/low_mean": 0.0002970204674284105, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008122421086227405, "completions/clipped_ratio": 0.008928571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 2356.0, "completions/mean_length": 598.5167846679688, "completions/mean_terminated_length": 567.0078735351562, "completions/min_length": 159.0, "completions/min_terminated_length": 159.0, "epoch": 10.447944006999125, "grad_norm": 0.1220703125, "learning_rate": 1e-06, "loss": 0.0038, "num_tokens": 650770783.0, "reward": 0.543526828289032, "reward_std": 0.2141755372285843, "rewards/verify_math_reward/mean": 0.5435267686843872, "rewards/verify_math_reward/std": 0.49838000535964966, "step": 1118 }, { "clip_ratio/high_max": 0.0015689976444264175, "clip_ratio/high_mean": 0.00046198784798434644, "clip_ratio/low_mean": 0.00041180884591085487, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.000873796710948227, "completions/clipped_ratio": 0.021205357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3650.0, "completions/mean_length": 651.0245971679688, "completions/mean_terminated_length": 576.3899536132812, "completions/min_length": 154.0, "completions/min_terminated_length": 154.0, "epoch": 10.457276173811607, "grad_norm": 0.123046875, "learning_rate": 1e-06, "loss": -0.0086, "num_tokens": 651369429.0, "reward": 0.5055803656578064, "reward_std": 0.21568326652050018, "rewards/verify_math_reward/mean": 0.5055803656578064, "rewards/verify_math_reward/std": 0.5002480745315552, "step": 1119 }, { "clip_ratio/high_max": 0.0017002915546981967, "clip_ratio/high_mean": 0.0005153451375008444, "clip_ratio/low_mean": 0.00025879465385969525, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007741397971585684, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 1635.0, "completions/mean_length": 598.7511596679688, "completions/mean_terminated_length": 551.2771606445312, "completions/min_length": 100.0, "completions/min_terminated_length": 100.0, "epoch": 10.466608340624088, "grad_norm": 0.12890625, "learning_rate": 1e-06, "loss": 0.0083, "num_tokens": 651948134.0, "reward": 0.6104910969734192, "reward_std": 0.1849854290485382, "rewards/verify_math_reward/mean": 0.6104910969734192, "rewards/verify_math_reward/std": 0.48791125416755676, "step": 1120 }, { "clip_ratio/high_max": 0.0015437259207828902, "clip_ratio/high_mean": 0.000483118055626619, "clip_ratio/low_mean": 0.00033506975671571126, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008181878238247009, "completions/clipped_ratio": 0.010044642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3398.0, "completions/mean_length": 609.5569458007812, "completions/mean_terminated_length": 574.1814575195312, "completions/min_length": 95.0, "completions/min_terminated_length": 95.0, "epoch": 10.47594050743657, "grad_norm": 0.12451171875, "learning_rate": 1e-06, "loss": 0.0118, "num_tokens": 652548409.0, "reward": 0.551339328289032, "reward_std": 0.19268685579299927, "rewards/verify_math_reward/mean": 0.5513392686843872, "rewards/verify_math_reward/std": 0.4976350665092468, "step": 1121 }, { "clip_ratio/high_max": 0.001450056854991999, "clip_ratio/high_mean": 0.0005254861473531491, "clip_ratio/low_mean": 0.00033613819505262654, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008616243471806229, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 2954.0, "completions/mean_length": 593.6217041015625, "completions/mean_terminated_length": 529.9420166015625, "completions/min_length": 11.0, "completions/min_terminated_length": 11.0, "epoch": 10.485272674249051, "grad_norm": 0.130859375, "learning_rate": 1e-06, "loss": 0.014, "num_tokens": 653095390.0, "reward": 0.6272321939468384, "reward_std": 0.2109421044588089, "rewards/verify_math_reward/mean": 0.6272321343421936, "rewards/verify_math_reward/std": 0.4838111698627472, "step": 1122 }, { "clip_ratio/high_max": 0.0016598611282461206, "clip_ratio/high_mean": 0.0005630239687661742, "clip_ratio/low_mean": 0.0003776657865728339, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.000940689756134816, "completions/clipped_ratio": 0.0189732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 4016.0, "completions/mean_length": 627.6171875, "completions/mean_terminated_length": 560.5380859375, "completions/min_length": 13.0, "completions/min_terminated_length": 13.0, "epoch": 10.494604841061534, "grad_norm": 0.125, "learning_rate": 1e-06, "loss": -0.0064, "num_tokens": 653672199.0, "reward": 0.5658482313156128, "reward_std": 0.21845951676368713, "rewards/verify_math_reward/mean": 0.5658482313156128, "rewards/verify_math_reward/std": 0.49592188000679016, "step": 1123 }, { "clip_ratio/high_max": 0.0014626084221163183, "clip_ratio/high_mean": 0.0003792729844462883, "clip_ratio/low_mean": 0.00029118623911017494, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0006704592176447477, "completions/clipped_ratio": 0.0189732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3778.0, "completions/mean_length": 619.7824096679688, "completions/mean_terminated_length": 552.5517578125, "completions/min_length": 140.0, "completions/min_terminated_length": 140.0, "epoch": 10.503937007874015, "grad_norm": 0.1279296875, "learning_rate": 1e-06, "loss": 0.0103, "num_tokens": 654243796.0, "reward": 0.5379464626312256, "reward_std": 0.17900507152080536, "rewards/verify_math_reward/mean": 0.5379464030265808, "rewards/verify_math_reward/std": 0.4988364577293396, "step": 1124 }, { "clip_ratio/high_max": 0.0016729121289245086, "clip_ratio/high_mean": 0.000553323600342992, "clip_ratio/low_mean": 0.0002776794560759299, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008310030671054847, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 4066.0, "completions/mean_length": 582.513427734375, "completions/mean_terminated_length": 542.8577880859375, "completions/min_length": 86.0, "completions/min_terminated_length": 86.0, "epoch": 10.513269174686497, "grad_norm": 0.1357421875, "learning_rate": 1e-06, "loss": 0.0063, "num_tokens": 654804992.0, "reward": 0.590401828289032, "reward_std": 0.20272116363048553, "rewards/verify_math_reward/mean": 0.5904017686843872, "rewards/verify_math_reward/std": 0.49203425645828247, "step": 1125 }, { "clip_ratio/high_max": 0.0017559255848027533, "clip_ratio/high_mean": 0.0005251845698239777, "clip_ratio/low_mean": 0.00035391080314184364, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.000879095366144611, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 2949.0, "completions/mean_length": 567.1171875, "completions/mean_terminated_length": 523.25537109375, "completions/min_length": 135.0, "completions/min_terminated_length": 135.0, "epoch": 10.52260134149898, "grad_norm": 0.1435546875, "learning_rate": 1e-06, "loss": 0.0166, "num_tokens": 655373857.0, "reward": 0.5636160969734192, "reward_std": 0.2204137146472931, "rewards/verify_math_reward/mean": 0.5636160969734192, "rewards/verify_math_reward/std": 0.49621346592903137, "step": 1126 }, { "clip_ratio/high_max": 0.0015505834790019435, "clip_ratio/high_mean": 0.0004221622829163607, "clip_ratio/low_mean": 0.0002862095540194787, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.00070837183511685, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 2665.0, "completions/mean_length": 560.2756958007812, "completions/mean_terminated_length": 516.3287963867188, "completions/min_length": 48.0, "completions/min_terminated_length": 48.0, "epoch": 10.531933508311461, "grad_norm": 0.1279296875, "learning_rate": 1e-06, "loss": -0.0052, "num_tokens": 655922648.0, "reward": 0.5848214626312256, "reward_std": 0.18915992975234985, "rewards/verify_math_reward/mean": 0.5848214030265808, "rewards/verify_math_reward/std": 0.49302801489830017, "step": 1127 }, { "clip_ratio/high_max": 0.0017083090233427356, "clip_ratio/high_mean": 0.0005162691450095735, "clip_ratio/low_mean": 0.00035800668763386057, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008742758409425733, "completions/clipped_ratio": 0.0200892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3790.0, "completions/mean_length": 585.3538208007812, "completions/mean_terminated_length": 513.381591796875, "completions/min_length": 51.0, "completions/min_terminated_length": 51.0, "epoch": 10.541265675123944, "grad_norm": 0.13671875, "learning_rate": 1e-06, "loss": 0.0001, "num_tokens": 656474885.0, "reward": 0.5647321939468384, "reward_std": 0.21237428486347198, "rewards/verify_math_reward/mean": 0.5647321343421936, "rewards/verify_math_reward/std": 0.49606892466545105, "step": 1128 }, { "clip_ratio/high_max": 0.0015053191436891211, "clip_ratio/high_mean": 0.0004785692044606549, "clip_ratio/low_mean": 0.0003047558732305333, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007833250847397721, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 2637.0, "completions/mean_length": 570.0848388671875, "completions/mean_terminated_length": 526.2598876953125, "completions/min_length": 81.0, "completions/min_terminated_length": 81.0, "epoch": 10.550597841936424, "grad_norm": 0.12890625, "learning_rate": 1e-06, "loss": 0.0051, "num_tokens": 657029409.0, "reward": 0.546875, "reward_std": 0.19445420801639557, "rewards/verify_math_reward/mean": 0.546875, "rewards/verify_math_reward/std": 0.4980759024620056, "step": 1129 }, { "clip_ratio/high_max": 0.0017718157059789519, "clip_ratio/high_mean": 0.0005742348662352015, "clip_ratio/low_mean": 0.00030858243007969577, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008828172940411605, "completions/clipped_ratio": 0.006696428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3093.0, "completions/mean_length": 593.9442138671875, "completions/mean_terminated_length": 570.3348388671875, "completions/min_length": 82.0, "completions/min_terminated_length": 82.0, "epoch": 10.559930008748907, "grad_norm": 0.123046875, "learning_rate": 1e-06, "loss": -0.0005, "num_tokens": 657623919.0, "reward": 0.5970982313156128, "reward_std": 0.22950340807437897, "rewards/verify_math_reward/mean": 0.5970982313156128, "rewards/verify_math_reward/std": 0.4907552897930145, "step": 1130 }, { "clip_ratio/high_max": 0.0017404933114448795, "clip_ratio/high_mean": 0.0005020621056246455, "clip_ratio/low_mean": 0.0002497227479807407, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007517848521274573, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 4015.0, "completions/mean_length": 604.34375, "completions/mean_terminated_length": 540.8590698242188, "completions/min_length": 114.0, "completions/min_terminated_length": 114.0, "epoch": 10.569262175561388, "grad_norm": 0.11865234375, "learning_rate": 1e-06, "loss": -0.0015, "num_tokens": 658180339.0, "reward": 0.5613839626312256, "reward_std": 0.18994230031967163, "rewards/verify_math_reward/mean": 0.5613839030265808, "rewards/verify_math_reward/std": 0.496494859457016, "step": 1131 }, { "clip_ratio/high_max": 0.0017064755766114104, "clip_ratio/high_mean": 0.0004532395195155914, "clip_ratio/low_mean": 0.0003917174688012892, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008449569822914782, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3412.0, "completions/mean_length": 652.9967041015625, "completions/mean_terminated_length": 606.2590942382812, "completions/min_length": 30.0, "completions/min_terminated_length": 30.0, "epoch": 10.57859434237387, "grad_norm": 0.1328125, "learning_rate": 1e-06, "loss": 0.0161, "num_tokens": 658810120.0, "reward": 0.5558035969734192, "reward_std": 0.21872234344482422, "rewards/verify_math_reward/mean": 0.5558035969734192, "rewards/verify_math_reward/std": 0.49715369939804077, "step": 1132 }, { "clip_ratio/high_max": 0.0016609421973043936, "clip_ratio/high_mean": 0.0004729513090069304, "clip_ratio/low_mean": 0.0002798828978711754, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007528341966462904, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 4023.0, "completions/mean_length": 619.4006958007812, "completions/mean_terminated_length": 572.20703125, "completions/min_length": 74.0, "completions/min_terminated_length": 74.0, "epoch": 10.587926509186351, "grad_norm": 0.11962890625, "learning_rate": 1e-06, "loss": 0.0059, "num_tokens": 659414871.0, "reward": 0.574776828289032, "reward_std": 0.1935127079486847, "rewards/verify_math_reward/mean": 0.5747767686843872, "rewards/verify_math_reward/std": 0.49465295672416687, "step": 1133 }, { "clip_ratio/high_max": 0.0017192801878991304, "clip_ratio/high_mean": 0.0005340224756764655, "clip_ratio/low_mean": 0.0004510266913939631, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009850491751421941, "completions/clipped_ratio": 0.0200892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 2074.0, "completions/mean_length": 628.8381958007812, "completions/mean_terminated_length": 557.7574462890625, "completions/min_length": 124.0, "completions/min_terminated_length": 124.0, "epoch": 10.597258675998834, "grad_norm": 0.13671875, "learning_rate": 1e-06, "loss": 0.0192, "num_tokens": 659994414.0, "reward": 0.5345982313156128, "reward_std": 0.23657643795013428, "rewards/verify_math_reward/mean": 0.5345982313156128, "rewards/verify_math_reward/std": 0.4990801215171814, "step": 1134 }, { "clip_ratio/high_max": 0.0017060625768863247, "clip_ratio/high_mean": 0.0004993932670913637, "clip_ratio/low_mean": 0.00043186435993902705, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.000931257621232362, "completions/clipped_ratio": 0.005580357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 2884.0, "completions/mean_length": 550.6953125, "completions/mean_terminated_length": 530.8002319335938, "completions/min_length": 123.0, "completions/min_terminated_length": 123.0, "epoch": 10.606590842811315, "grad_norm": 0.1416015625, "learning_rate": 1e-06, "loss": 0.0047, "num_tokens": 660552565.0, "reward": 0.6261160969734192, "reward_std": 0.2282680869102478, "rewards/verify_math_reward/mean": 0.6261160969734192, "rewards/verify_math_reward/std": 0.48410359025001526, "step": 1135 }, { "clip_ratio/high_max": 0.0017179808710352518, "clip_ratio/high_mean": 0.0005433138130683801, "clip_ratio/low_mean": 0.0003254884570651484, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008688022735441336, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 2830.0, "completions/mean_length": 581.4598388671875, "completions/mean_terminated_length": 541.7923583984375, "completions/min_length": 109.0, "completions/min_terminated_length": 109.0, "epoch": 10.615923009623797, "grad_norm": 0.126953125, "learning_rate": 1e-06, "loss": 0.0065, "num_tokens": 661120321.0, "reward": 0.598214328289032, "reward_std": 0.20012575387954712, "rewards/verify_math_reward/mean": 0.5982142686843872, "rewards/verify_math_reward/std": 0.49053287506103516, "step": 1136 }, { "clip_ratio/high_max": 0.00196997782222752, "clip_ratio/high_mean": 0.0005800461913167965, "clip_ratio/low_mean": 0.0003350745846546488, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009151207609647827, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 2226.0, "completions/mean_length": 580.7098388671875, "completions/mean_terminated_length": 537.0169677734375, "completions/min_length": 111.0, "completions/min_terminated_length": 111.0, "epoch": 10.625255176436278, "grad_norm": 0.130859375, "learning_rate": 1e-06, "loss": 0.0019, "num_tokens": 661692381.0, "reward": 0.551339328289032, "reward_std": 0.21684511005878448, "rewards/verify_math_reward/mean": 0.5513392686843872, "rewards/verify_math_reward/std": 0.4976350665092468, "step": 1137 }, { "clip_ratio/high_max": 0.0016381486402679002, "clip_ratio/high_mean": 0.0005049274443535978, "clip_ratio/low_mean": 0.00031980725100311247, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008247346995631233, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3929.0, "completions/mean_length": 602.8080444335938, "completions/mean_terminated_length": 547.3605346679688, "completions/min_length": 76.0, "completions/min_terminated_length": 76.0, "epoch": 10.63458734324876, "grad_norm": 0.1279296875, "learning_rate": 1e-06, "loss": -0.0045, "num_tokens": 662267089.0, "reward": 0.5848214626312256, "reward_std": 0.22112908959388733, "rewards/verify_math_reward/mean": 0.5848214030265808, "rewards/verify_math_reward/std": 0.49302801489830017, "step": 1138 }, { "clip_ratio/high_max": 0.001646062865802378, "clip_ratio/high_mean": 0.0004756888117753988, "clip_ratio/low_mean": 0.00031733140269807336, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.000793020223682106, "completions/clipped_ratio": 0.0033482142857143016, "completions/max_length": 4096.0, "completions/max_terminated_length": 2243.0, "completions/mean_length": 551.5379638671875, "completions/mean_terminated_length": 539.6304931640625, "completions/min_length": 114.0, "completions/min_terminated_length": 114.0, "epoch": 10.643919510061242, "grad_norm": 0.12451171875, "learning_rate": 1e-06, "loss": 0.0279, "num_tokens": 662826115.0, "reward": 0.5714285969734192, "reward_std": 0.21428248286247253, "rewards/verify_math_reward/mean": 0.5714285969734192, "rewards/verify_math_reward/std": 0.49514803290367126, "step": 1139 }, { "clip_ratio/high_max": 0.0018040633385680849, "clip_ratio/high_mean": 0.0006041449305485003, "clip_ratio/low_mean": 0.00030515388198182336, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009092988229895127, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 2160.0, "completions/mean_length": 631.4319458007812, "completions/mean_terminated_length": 580.4246826171875, "completions/min_length": 91.0, "completions/min_terminated_length": 91.0, "epoch": 10.653251676873724, "grad_norm": 0.1376953125, "learning_rate": 1e-06, "loss": -0.0055, "num_tokens": 663437206.0, "reward": 0.535714328289032, "reward_std": 0.23401953279972076, "rewards/verify_math_reward/mean": 0.5357142686843872, "rewards/verify_math_reward/std": 0.4990014135837555, "step": 1140 }, { "clip_ratio/high_max": 0.0017425021833332721, "clip_ratio/high_mean": 0.0005442006226985541, "clip_ratio/low_mean": 0.0003284790451516528, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008726796668270254, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 4041.0, "completions/mean_length": 656.2589721679688, "completions/mean_terminated_length": 593.7181396484375, "completions/min_length": 97.0, "completions/min_terminated_length": 97.0, "epoch": 10.662583843686207, "grad_norm": 0.1318359375, "learning_rate": 1e-06, "loss": 0.0078, "num_tokens": 664043614.0, "reward": 0.527901828289032, "reward_std": 0.2386789470911026, "rewards/verify_math_reward/mean": 0.5279017686843872, "rewards/verify_math_reward/std": 0.49949970841407776, "step": 1141 }, { "clip_ratio/high_max": 0.0021527270846490865, "clip_ratio/high_mean": 0.0006683511567189271, "clip_ratio/low_mean": 0.00030635680445811886, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009747079675435089, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3337.0, "completions/mean_length": 608.411865234375, "completions/mean_terminated_length": 561.0690307617188, "completions/min_length": 161.0, "completions/min_terminated_length": 161.0, "epoch": 10.671916010498688, "grad_norm": 0.126953125, "learning_rate": 1e-06, "loss": -0.0025, "num_tokens": 664634191.0, "reward": 0.606026828289032, "reward_std": 0.23033711314201355, "rewards/verify_math_reward/mean": 0.6060267686843872, "rewards/verify_math_reward/std": 0.48890194296836853, "step": 1142 }, { "clip_ratio/high_max": 0.001847786519647343, "clip_ratio/high_mean": 0.0005114170812703378, "clip_ratio/low_mean": 0.00032902276154800347, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.000840439839521423, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 2231.0, "completions/mean_length": 599.5949096679688, "completions/mean_terminated_length": 536.0238647460938, "completions/min_length": 108.0, "completions/min_terminated_length": 108.0, "epoch": 10.68124817731117, "grad_norm": 0.1240234375, "learning_rate": 1e-06, "loss": 0.0136, "num_tokens": 665204764.0, "reward": 0.566964328289032, "reward_std": 0.18960639834403992, "rewards/verify_math_reward/mean": 0.5669642686843872, "rewards/verify_math_reward/std": 0.49577224254608154, "step": 1143 }, { "clip_ratio/high_max": 0.0016712147426005686, "clip_ratio/high_mean": 0.0005301716157646297, "clip_ratio/low_mean": 0.000316820767579884, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008469923868688056, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 2783.0, "completions/mean_length": 592.669677734375, "completions/mean_terminated_length": 553.128662109375, "completions/min_length": 135.0, "completions/min_terminated_length": 135.0, "epoch": 10.690580344123651, "grad_norm": 0.125, "learning_rate": 1e-06, "loss": 0.0049, "num_tokens": 665784492.0, "reward": 0.5290178656578064, "reward_std": 0.212375670671463, "rewards/verify_math_reward/mean": 0.5290178656578064, "rewards/verify_math_reward/std": 0.49943605065345764, "step": 1144 }, { "clip_ratio/high_max": 0.0014545907670253655, "clip_ratio/high_mean": 0.0003818193686129234, "clip_ratio/low_mean": 0.00033153243975903024, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007133518174669007, "completions/clipped_ratio": 0.021205357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3150.0, "completions/mean_length": 611.1105346679688, "completions/mean_terminated_length": 535.6111450195312, "completions/min_length": 4.0, "completions/min_terminated_length": 4.0, "epoch": 10.699912510936134, "grad_norm": 0.12109375, "learning_rate": 1e-06, "loss": -0.0174, "num_tokens": 666343279.0, "reward": 0.5245535969734192, "reward_std": 0.19050683081150055, "rewards/verify_math_reward/mean": 0.5245535969734192, "rewards/verify_math_reward/std": 0.4996756613254547, "step": 1145 }, { "clip_ratio/high_max": 0.001637159796700871, "clip_ratio/high_mean": 0.0005205864244999248, "clip_ratio/low_mean": 0.0003697998142797587, "clip_ratio/low_min": 1.2084299669368193e-05, "clip_ratio/region_mean": 0.0008903862335500889, "completions/clipped_ratio": 0.0078125, "completions/max_length": 4096.0, "completions/max_terminated_length": 3940.0, "completions/mean_length": 592.7098388671875, "completions/mean_terminated_length": 565.1248779296875, "completions/min_length": 127.0, "completions/min_terminated_length": 127.0, "epoch": 10.709244677748615, "grad_norm": 0.1337890625, "learning_rate": 1e-06, "loss": 0.0004, "num_tokens": 666927371.0, "reward": 0.53125, "reward_std": 0.2197069227695465, "rewards/verify_math_reward/mean": 0.53125, "rewards/verify_math_reward/std": 0.4993011951446533, "step": 1146 }, { "clip_ratio/high_max": 0.001656133903452428, "clip_ratio/high_mean": 0.0004848967168982199, "clip_ratio/low_mean": 0.0003100814160461596, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007949781238494324, "completions/clipped_ratio": 0.0200892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3398.0, "completions/mean_length": 639.5792846679688, "completions/mean_terminated_length": 568.7186889648438, "completions/min_length": 89.0, "completions/min_terminated_length": 89.0, "epoch": 10.718576844561097, "grad_norm": 0.125, "learning_rate": 1e-06, "loss": 0.0054, "num_tokens": 667516386.0, "reward": 0.5959821939468384, "reward_std": 0.2043369561433792, "rewards/verify_math_reward/mean": 0.5959821343421936, "rewards/verify_math_reward/std": 0.490975022315979, "step": 1147 }, { "clip_ratio/high_max": 0.0013927572299508029, "clip_ratio/high_mean": 0.00039721748328247486, "clip_ratio/low_mean": 0.0003871739263558993, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007843914254408446, "completions/clipped_ratio": 0.006696428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3564.0, "completions/mean_length": 604.599365234375, "completions/mean_terminated_length": 581.0618286132812, "completions/min_length": 95.0, "completions/min_terminated_length": 95.0, "epoch": 10.727909011373578, "grad_norm": 0.1376953125, "learning_rate": 1e-06, "loss": 0.0058, "num_tokens": 668119219.0, "reward": 0.5368303656578064, "reward_std": 0.2093709260225296, "rewards/verify_math_reward/mean": 0.5368303656578064, "rewards/verify_math_reward/std": 0.49892017245292664, "step": 1148 }, { "clip_ratio/high_max": 0.0015744132088002516, "clip_ratio/high_mean": 0.0004731255261276601, "clip_ratio/low_mean": 0.0003359877121056343, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008091132385743549, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3999.0, "completions/mean_length": 622.997802734375, "completions/mean_terminated_length": 563.8660888671875, "completions/min_length": 123.0, "completions/min_terminated_length": 123.0, "epoch": 10.73724117818606, "grad_norm": 0.12060546875, "learning_rate": 1e-06, "loss": -0.0003, "num_tokens": 668701513.0, "reward": 0.5803571939468384, "reward_std": 0.20546743273735046, "rewards/verify_math_reward/mean": 0.5803571343421936, "rewards/verify_math_reward/std": 0.4937761127948761, "step": 1149 }, { "clip_ratio/high_max": 0.0016613937586953398, "clip_ratio/high_mean": 0.00046575758778999443, "clip_ratio/low_mean": 0.0003909868725600063, "clip_ratio/low_min": 1.093804712581914e-05, "clip_ratio/region_mean": 0.0008567444722302753, "completions/clipped_ratio": 0.0234375, "completions/max_length": 4096.0, "completions/max_terminated_length": 3467.0, "completions/mean_length": 667.140625, "completions/mean_terminated_length": 584.8480224609375, "completions/min_length": 137.0, "completions/min_terminated_length": 137.0, "epoch": 10.746573344998541, "grad_norm": 0.13671875, "learning_rate": 1e-06, "loss": 0.0025, "num_tokens": 669304535.0, "reward": 0.5066964626312256, "reward_std": 0.24660933017730713, "rewards/verify_math_reward/mean": 0.5066964030265808, "rewards/verify_math_reward/std": 0.5002344250679016, "step": 1150 }, { "clip_ratio/high_max": 0.001383524305310857, "clip_ratio/high_mean": 0.00037995680236235785, "clip_ratio/low_mean": 0.0004186753594694892, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007986321697899257, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 2696.0, "completions/mean_length": 627.8984375, "completions/mean_terminated_length": 568.8502197265625, "completions/min_length": 95.0, "completions/min_terminated_length": 95.0, "epoch": 10.755905511811024, "grad_norm": 0.1298828125, "learning_rate": 1e-06, "loss": 0.0126, "num_tokens": 669896212.0, "reward": 0.494419664144516, "reward_std": 0.20888124406337738, "rewards/verify_math_reward/mean": 0.4944196343421936, "rewards/verify_math_reward/std": 0.5002480745315552, "step": 1151 }, { "clip_ratio/high_max": 0.0018027637515842798, "clip_ratio/high_mean": 0.0005304419826188678, "clip_ratio/low_mean": 0.0002956257392270345, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008260677177531761, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 2323.0, "completions/mean_length": 621.5357666015625, "completions/mean_terminated_length": 566.385498046875, "completions/min_length": 48.0, "completions/min_terminated_length": 48.0, "epoch": 10.765237678623505, "grad_norm": 0.12109375, "learning_rate": 1e-06, "loss": 0.0094, "num_tokens": 670489684.0, "reward": 0.5189732313156128, "reward_std": 0.23788337409496307, "rewards/verify_math_reward/mean": 0.5189732313156128, "rewards/verify_math_reward/std": 0.49991893768310547, "step": 1152 }, { "clip_ratio/high_max": 0.0015434565884788753, "clip_ratio/high_mean": 0.0005081853137198777, "clip_ratio/low_mean": 0.00028249833917470824, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007906836594884226, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3328.0, "completions/mean_length": 587.1674194335938, "completions/mean_terminated_length": 535.5084838867188, "completions/min_length": 104.0, "completions/min_terminated_length": 104.0, "epoch": 10.774569845435988, "grad_norm": 0.12890625, "learning_rate": 1e-06, "loss": -0.0119, "num_tokens": 671053954.0, "reward": 0.6104910969734192, "reward_std": 0.19287051260471344, "rewards/verify_math_reward/mean": 0.6104910969734192, "rewards/verify_math_reward/std": 0.48791128396987915, "step": 1153 }, { "clip_ratio/high_max": 0.001792408020264702, "clip_ratio/high_mean": 0.0005367111339182884, "clip_ratio/low_mean": 0.00033960035409563716, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008763114865359967, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3270.0, "completions/mean_length": 610.8515625, "completions/mean_terminated_length": 555.53173828125, "completions/min_length": 11.0, "completions/min_terminated_length": 11.0, "epoch": 10.783902012248468, "grad_norm": 0.130859375, "learning_rate": 1e-06, "loss": -0.0016, "num_tokens": 671632333.0, "reward": 0.5959821939468384, "reward_std": 0.20816698670387268, "rewards/verify_math_reward/mean": 0.5959821343421936, "rewards/verify_math_reward/std": 0.490975022315979, "step": 1154 }, { "clip_ratio/high_max": 0.0015771444259371492, "clip_ratio/high_mean": 0.000503479648841676, "clip_ratio/low_mean": 0.00034037283899124304, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008438524901066558, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 4056.0, "completions/mean_length": 597.609375, "completions/mean_terminated_length": 558.1241455078125, "completions/min_length": 131.0, "completions/min_terminated_length": 131.0, "epoch": 10.793234179060951, "grad_norm": 0.1279296875, "learning_rate": 1e-06, "loss": 0.021, "num_tokens": 672214135.0, "reward": 0.5178571939468384, "reward_std": 0.2090253233909607, "rewards/verify_math_reward/mean": 0.5178571343421936, "rewards/verify_math_reward/std": 0.4999600946903229, "step": 1155 }, { "clip_ratio/high_max": 0.0013750391290159314, "clip_ratio/high_mean": 0.00043364562338865653, "clip_ratio/low_mean": 0.0004127840682031092, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008464296993224707, "completions/clipped_ratio": 0.0234375, "completions/max_length": 4096.0, "completions/max_terminated_length": 1945.0, "completions/mean_length": 645.0100708007812, "completions/mean_terminated_length": 562.186279296875, "completions/min_length": 124.0, "completions/min_terminated_length": 124.0, "epoch": 10.802566345873432, "grad_norm": 0.12255859375, "learning_rate": 1e-06, "loss": 0.0027, "num_tokens": 672796912.0, "reward": 0.5446428656578064, "reward_std": 0.2171047180891037, "rewards/verify_math_reward/mean": 0.5446428656578064, "rewards/verify_math_reward/std": 0.4982811510562897, "step": 1156 }, { "clip_ratio/high_max": 0.0015667758489144035, "clip_ratio/high_mean": 0.0004141048735846198, "clip_ratio/low_mean": 0.0003231997775401396, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007373046614702616, "completions/clipped_ratio": 0.0200892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 4047.0, "completions/mean_length": 631.8392944335938, "completions/mean_terminated_length": 560.820068359375, "completions/min_length": 145.0, "completions/min_terminated_length": 145.0, "epoch": 10.811898512685914, "grad_norm": 0.12060546875, "learning_rate": 1e-06, "loss": -0.0021, "num_tokens": 673387832.0, "reward": 0.5457589626312256, "reward_std": 0.209066703915596, "rewards/verify_math_reward/mean": 0.5457589030265808, "rewards/verify_math_reward/std": 0.4981798231601715, "step": 1157 }, { "clip_ratio/high_max": 0.0016655336548865307, "clip_ratio/high_mean": 0.0005172209932879923, "clip_ratio/low_mean": 0.00022447497963185015, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007416959742840845, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 2078.0, "completions/mean_length": 604.8616333007812, "completions/mean_terminated_length": 549.4467163085938, "completions/min_length": 129.0, "completions/min_terminated_length": 129.0, "epoch": 10.821230679498395, "grad_norm": 0.1259765625, "learning_rate": 1e-06, "loss": 0.0023, "num_tokens": 673962228.0, "reward": 0.5602678656578064, "reward_std": 0.19982656836509705, "rewards/verify_math_reward/mean": 0.5602678656578064, "rewards/verify_math_reward/std": 0.4966317117214203, "step": 1158 }, { "clip_ratio/high_max": 0.001596397483808687, "clip_ratio/high_mean": 0.0004775468332809396, "clip_ratio/low_mean": 0.00030511208478856133, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007826589203432377, "completions/clipped_ratio": 0.0189732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3767.0, "completions/mean_length": 566.3114013671875, "completions/mean_terminated_length": 498.046630859375, "completions/min_length": 25.0, "completions/min_terminated_length": 25.0, "epoch": 10.830562846310878, "grad_norm": 0.1279296875, "learning_rate": 1e-06, "loss": -0.0051, "num_tokens": 674485163.0, "reward": 0.6428571939468384, "reward_std": 0.20324109494686127, "rewards/verify_math_reward/mean": 0.6428571343421936, "rewards/verify_math_reward/std": 0.4794250428676605, "step": 1159 }, { "clip_ratio/high_max": 0.0016447261969005922, "clip_ratio/high_mean": 0.000513678448214705, "clip_ratio/low_mean": 0.00041539912967891723, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009290775929002848, "completions/clipped_ratio": 0.0078125, "completions/max_length": 4096.0, "completions/max_terminated_length": 2496.0, "completions/mean_length": 593.7734375, "completions/mean_terminated_length": 566.1968994140625, "completions/min_length": 96.0, "completions/min_terminated_length": 96.0, "epoch": 10.83989501312336, "grad_norm": 0.12060546875, "learning_rate": 1e-06, "loss": 0.0168, "num_tokens": 675082320.0, "reward": 0.5323660969734192, "reward_std": 0.20996825397014618, "rewards/verify_math_reward/mean": 0.5323660969734192, "rewards/verify_math_reward/std": 0.4992299973964691, "step": 1160 }, { "clip_ratio/high_max": 0.0016843953844727366, "clip_ratio/high_mean": 0.00046381006325191265, "clip_ratio/low_mean": 0.00033338541311422887, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007971954810273019, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 2907.0, "completions/mean_length": 604.5167846679688, "completions/mean_terminated_length": 553.1132202148438, "completions/min_length": 129.0, "completions/min_terminated_length": 129.0, "epoch": 10.849227179935841, "grad_norm": 0.1259765625, "learning_rate": 1e-06, "loss": 0.0073, "num_tokens": 675652767.0, "reward": 0.5524553656578064, "reward_std": 0.20682109892368317, "rewards/verify_math_reward/mean": 0.5524553656578064, "rewards/verify_math_reward/std": 0.49751853942871094, "step": 1161 }, { "clip_ratio/high_max": 0.00183062973064807, "clip_ratio/high_mean": 0.0005302630440837675, "clip_ratio/low_mean": 0.00037720782142969256, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009074708586922497, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 2846.0, "completions/mean_length": 610.3828125, "completions/mean_terminated_length": 567.0587768554688, "completions/min_length": 174.0, "completions/min_terminated_length": 174.0, "epoch": 10.858559346748324, "grad_norm": 0.1337890625, "learning_rate": 1e-06, "loss": 0.0121, "num_tokens": 676242238.0, "reward": 0.5055803656578064, "reward_std": 0.2456229329109192, "rewards/verify_math_reward/mean": 0.5055803656578064, "rewards/verify_math_reward/std": 0.5002480745315552, "step": 1162 }, { "clip_ratio/high_max": 0.0014446050290644052, "clip_ratio/high_mean": 0.0004043962151172309, "clip_ratio/low_mean": 0.00040507040330339805, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008094666127362871, "completions/clipped_ratio": 0.010044642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 2527.0, "completions/mean_length": 564.3605346679688, "completions/mean_terminated_length": 528.5264892578125, "completions/min_length": 24.0, "completions/min_terminated_length": 24.0, "epoch": 10.867891513560805, "grad_norm": 0.1240234375, "learning_rate": 1e-06, "loss": -0.0006, "num_tokens": 676800721.0, "reward": 0.578125, "reward_std": 0.21327723562717438, "rewards/verify_math_reward/mean": 0.578125, "rewards/verify_math_reward/std": 0.4941346049308777, "step": 1163 }, { "clip_ratio/high_max": 0.0011761088244384155, "clip_ratio/high_mean": 0.0003668110516628076, "clip_ratio/low_mean": 0.00022661833645543084, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0005934293808422808, "completions/clipped_ratio": 0.021205357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3396.0, "completions/mean_length": 657.1629638671875, "completions/mean_terminated_length": 582.6613159179688, "completions/min_length": 18.0, "completions/min_terminated_length": 18.0, "epoch": 10.877223680373287, "grad_norm": 0.11328125, "learning_rate": 1e-06, "loss": 0.023, "num_tokens": 677394051.0, "reward": 0.5390625, "reward_std": 0.16183525323867798, "rewards/verify_math_reward/mean": 0.5390625, "rewards/verify_math_reward/std": 0.4987502098083496, "step": 1164 }, { "clip_ratio/high_max": 0.001815230340071139, "clip_ratio/high_mean": 0.0006076487995869684, "clip_ratio/low_mean": 0.000338437375603462, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009460861729166936, "completions/clipped_ratio": 0.0189732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 4053.0, "completions/mean_length": 628.4989013671875, "completions/mean_terminated_length": 561.4368286132812, "completions/min_length": 101.0, "completions/min_terminated_length": 101.0, "epoch": 10.886555847185768, "grad_norm": 0.12890625, "learning_rate": 1e-06, "loss": 0.0132, "num_tokens": 677972770.0, "reward": 0.6037946939468384, "reward_std": 0.22105281054973602, "rewards/verify_math_reward/mean": 0.6037946343421936, "rewards/verify_math_reward/std": 0.48938119411468506, "step": 1165 }, { "clip_ratio/high_max": 0.0016446989338874118, "clip_ratio/high_mean": 0.0005324632415977248, "clip_ratio/low_mean": 0.0003466120699613384, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008790752990535111, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 2077.0, "completions/mean_length": 602.2701416015625, "completions/mean_terminated_length": 562.8374633789062, "completions/min_length": 145.0, "completions/min_terminated_length": 145.0, "epoch": 10.89588801399825, "grad_norm": 0.1357421875, "learning_rate": 1e-06, "loss": 0.0098, "num_tokens": 678559916.0, "reward": 0.5814732313156128, "reward_std": 0.23191125690937042, "rewards/verify_math_reward/mean": 0.5814732313156128, "rewards/verify_math_reward/std": 0.4935929775238037, "step": 1166 }, { "clip_ratio/high_max": 0.001686195442744065, "clip_ratio/high_mean": 0.0005248477732493484, "clip_ratio/low_mean": 0.00036475858155426977, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008896063582142233, "completions/clipped_ratio": 0.008928571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3829.0, "completions/mean_length": 584.396240234375, "completions/mean_terminated_length": 552.7601318359375, "completions/min_length": 139.0, "completions/min_terminated_length": 139.0, "epoch": 10.905220180810732, "grad_norm": 0.1328125, "learning_rate": 1e-06, "loss": 0.0041, "num_tokens": 679138615.0, "reward": 0.606026828289032, "reward_std": 0.23090235888957977, "rewards/verify_math_reward/mean": 0.6060267686843872, "rewards/verify_math_reward/std": 0.48890194296836853, "step": 1167 }, { "clip_ratio/high_max": 0.0018442560667608632, "clip_ratio/high_mean": 0.0004919110244827607, "clip_ratio/low_mean": 0.00031940933422447415, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008113203757602605, "completions/clipped_ratio": 0.006696428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 2050.0, "completions/mean_length": 531.2098388671875, "completions/mean_terminated_length": 507.1775207519531, "completions/min_length": 167.0, "completions/min_terminated_length": 167.0, "epoch": 10.914552347623214, "grad_norm": 0.125, "learning_rate": 1e-06, "loss": 0.0134, "num_tokens": 679681755.0, "reward": 0.5970982313156128, "reward_std": 0.18182942271232605, "rewards/verify_math_reward/mean": 0.5970982313156128, "rewards/verify_math_reward/std": 0.49075523018836975, "step": 1168 }, { "clip_ratio/high_max": 0.0016073361293820199, "clip_ratio/high_mean": 0.0005071012240023265, "clip_ratio/low_mean": 0.00041626721326792904, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009233684304490453, "completions/clipped_ratio": 0.021205357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3944.0, "completions/mean_length": 698.6674194335938, "completions/mean_terminated_length": 625.0650024414062, "completions/min_length": 117.0, "completions/min_terminated_length": 117.0, "epoch": 10.923884514435695, "grad_norm": 0.1240234375, "learning_rate": 1e-06, "loss": -0.0065, "num_tokens": 680316033.0, "reward": 0.5033482313156128, "reward_std": 0.2425856590270996, "rewards/verify_math_reward/mean": 0.5033482313156128, "rewards/verify_math_reward/std": 0.5002680420875549, "step": 1169 }, { "clip_ratio/high_max": 0.001973909056687262, "clip_ratio/high_mean": 0.0006501263105747057, "clip_ratio/low_mean": 0.0004083588496541779, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010584851579551469, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 2576.0, "completions/mean_length": 581.9810791015625, "completions/mean_terminated_length": 526.2029418945312, "completions/min_length": 96.0, "completions/min_terminated_length": 96.0, "epoch": 10.933216681248178, "grad_norm": 0.146484375, "learning_rate": 1e-06, "loss": 0.0013, "num_tokens": 680871392.0, "reward": 0.5613839626312256, "reward_std": 0.24243342876434326, "rewards/verify_math_reward/mean": 0.5613839030265808, "rewards/verify_math_reward/std": 0.496494859457016, "step": 1170 }, { "clip_ratio/high_max": 0.0014305145341495518, "clip_ratio/high_mean": 0.00043259119070171437, "clip_ratio/low_mean": 0.00030472648995782947, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007373176758846967, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3810.0, "completions/mean_length": 625.8984375, "completions/mean_terminated_length": 570.8174438476562, "completions/min_length": 101.0, "completions/min_terminated_length": 101.0, "epoch": 10.942548848060659, "grad_norm": 0.1259765625, "learning_rate": 1e-06, "loss": 0.0028, "num_tokens": 681471277.0, "reward": 0.4799107313156128, "reward_std": 0.18908463418483734, "rewards/verify_math_reward/mean": 0.4799107015132904, "rewards/verify_math_reward/std": 0.4998753070831299, "step": 1171 }, { "clip_ratio/high_max": 0.0014520787253786693, "clip_ratio/high_mean": 0.00040996120560521376, "clip_ratio/low_mean": 0.0002406684872084952, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.000650629697702243, "completions/clipped_ratio": 0.0200892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3729.0, "completions/mean_length": 685.1473388671875, "completions/mean_terminated_length": 615.2210083007812, "completions/min_length": 145.0, "completions/min_terminated_length": 145.0, "epoch": 10.951881014873141, "grad_norm": 0.1083984375, "learning_rate": 1e-06, "loss": 0.0046, "num_tokens": 682101825.0, "reward": 0.5234375, "reward_std": 0.17630618810653687, "rewards/verify_math_reward/mean": 0.5234375, "rewards/verify_math_reward/std": 0.49972933530807495, "step": 1172 }, { "clip_ratio/high_max": 0.0014668763142253738, "clip_ratio/high_mean": 0.0003770719254134747, "clip_ratio/low_mean": 0.00043069329888112406, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008077652282736381, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3896.0, "completions/mean_length": 678.1830444335938, "completions/mean_terminated_length": 623.9320068359375, "completions/min_length": 5.0, "completions/min_terminated_length": 5.0, "epoch": 10.961213181685622, "grad_norm": 0.12109375, "learning_rate": 1e-06, "loss": 0.016, "num_tokens": 682733661.0, "reward": 0.5100446939468384, "reward_std": 0.2220708727836609, "rewards/verify_math_reward/mean": 0.5100446343421936, "rewards/verify_math_reward/std": 0.5001782774925232, "step": 1173 }, { "clip_ratio/high_max": 0.0015102527440831182, "clip_ratio/high_mean": 0.00045779168567605666, "clip_ratio/low_mean": 0.00034387840389626945, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008016700894586393, "completions/clipped_ratio": 0.010044642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3937.0, "completions/mean_length": 633.3449096679688, "completions/mean_terminated_length": 598.2108154296875, "completions/min_length": 145.0, "completions/min_terminated_length": 145.0, "epoch": 10.970545348498105, "grad_norm": 0.12353515625, "learning_rate": 1e-06, "loss": -0.0034, "num_tokens": 683349690.0, "reward": 0.5100446939468384, "reward_std": 0.21722276508808136, "rewards/verify_math_reward/mean": 0.5100446343421936, "rewards/verify_math_reward/std": 0.5001782774925232, "step": 1174 }, { "clip_ratio/high_max": 0.0015930744812067132, "clip_ratio/high_mean": 0.0005445129036161234, "clip_ratio/low_mean": 0.0002570602872538075, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008015732037165435, "completions/clipped_ratio": 0.0078125, "completions/max_length": 4096.0, "completions/max_terminated_length": 2080.0, "completions/mean_length": 567.5614013671875, "completions/mean_terminated_length": 539.7784423828125, "completions/min_length": 3.0, "completions/min_terminated_length": 3.0, "epoch": 10.979877515310585, "grad_norm": 0.1337890625, "learning_rate": 1e-06, "loss": -0.0077, "num_tokens": 683922553.0, "reward": 0.6004464626312256, "reward_std": 0.2145892083644867, "rewards/verify_math_reward/mean": 0.6004464030265808, "rewards/verify_math_reward/std": 0.49008017778396606, "step": 1175 }, { "clip_ratio/high_max": 0.00159230694953294, "clip_ratio/high_mean": 0.0004766566730722843, "clip_ratio/low_mean": 0.00037552160347331665, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008521782829120639, "completions/clipped_ratio": 0.029017857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3517.0, "completions/mean_length": 661.2064819335938, "completions/mean_terminated_length": 558.5574951171875, "completions/min_length": 108.0, "completions/min_terminated_length": 108.0, "epoch": 10.989209682123068, "grad_norm": 0.12353515625, "learning_rate": 1e-06, "loss": -0.0025, "num_tokens": 684498378.0, "reward": 0.574776828289032, "reward_std": 0.20925851166248322, "rewards/verify_math_reward/mean": 0.5747767686843872, "rewards/verify_math_reward/std": 0.49465295672416687, "step": 1176 }, { "clip_ratio/high_max": 0.0014878864712954964, "clip_ratio/high_mean": 0.0004385699053273129, "clip_ratio/low_mean": 0.0003012640701172131, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007398339857900282, "completions/clipped_ratio": 0.011363636363636354, "completions/max_length": 4096.0, "completions/max_terminated_length": 3766.0, "completions/mean_length": 676.6278686523438, "completions/mean_terminated_length": 637.32470703125, "completions/min_length": 112.0, "completions/min_terminated_length": 112.0, "epoch": 10.998541848935549, "grad_norm": 0.123046875, "learning_rate": 1e-06, "loss": 0.0124, "num_tokens": 685119355.0, "reward": 0.4921875298023224, "reward_std": 0.22338652610778809, "rewards/verify_math_reward/mean": 0.4921875, "rewards/verify_math_reward/std": 0.5002182126045227, "step": 1177 }, { "clip_ratio/high_max": 0.0018126432732969988, "clip_ratio/high_mean": 0.0004733995822334691, "clip_ratio/low_mean": 0.0003042564163706629, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007776560069032712, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 2209.0, "completions/mean_length": 646.6897583007812, "completions/mean_terminated_length": 591.9387817382812, "completions/min_length": 24.0, "completions/min_terminated_length": 24.0, "epoch": 11.009332166812483, "grad_norm": 0.1240234375, "learning_rate": 1e-06, "loss": -0.0006, "num_tokens": 685735141.0, "reward": 0.5446428656578064, "reward_std": 0.21429386734962463, "rewards/verify_math_reward/mean": 0.5446428656578064, "rewards/verify_math_reward/std": 0.49828118085861206, "step": 1178 }, { "clip_ratio/high_max": 0.0015300237100746017, "clip_ratio/high_mean": 0.0004948398907345108, "clip_ratio/low_mean": 0.0004086324597665225, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.00090347235709487, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3401.0, "completions/mean_length": 710.4788208007812, "completions/mean_terminated_length": 648.923828125, "completions/min_length": 159.0, "completions/min_terminated_length": 159.0, "epoch": 11.018664333624963, "grad_norm": 0.12451171875, "learning_rate": 1e-06, "loss": -0.0025, "num_tokens": 686397666.0, "reward": 0.527901828289032, "reward_std": 0.24254217743873596, "rewards/verify_math_reward/mean": 0.5279017686843872, "rewards/verify_math_reward/std": 0.49949970841407776, "step": 1179 }, { "clip_ratio/high_max": 0.0016976731531030964, "clip_ratio/high_mean": 0.0005497295298937388, "clip_ratio/low_mean": 0.00038699891047144774, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009367284392283182, "completions/clipped_ratio": 0.0234375, "completions/max_length": 4096.0, "completions/max_terminated_length": 2289.0, "completions/mean_length": 623.2176513671875, "completions/mean_terminated_length": 539.870849609375, "completions/min_length": 116.0, "completions/min_terminated_length": 116.0, "epoch": 11.027996500437446, "grad_norm": 0.142578125, "learning_rate": 1e-06, "loss": -0.0028, "num_tokens": 686951421.0, "reward": 0.515625, "reward_std": 0.24130618572235107, "rewards/verify_math_reward/mean": 0.515625, "rewards/verify_math_reward/std": 0.5000349283218384, "step": 1180 }, { "clip_ratio/high_max": 0.001765901306498563, "clip_ratio/high_mean": 0.0005292653304422856, "clip_ratio/low_mean": 0.0004168298777358359, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009460951951041352, "completions/clipped_ratio": 0.021205357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3862.0, "completions/mean_length": 626.6975708007812, "completions/mean_terminated_length": 551.535888671875, "completions/min_length": 112.0, "completions/min_terminated_length": 112.0, "epoch": 11.037328667249927, "grad_norm": 0.1328125, "learning_rate": 1e-06, "loss": 0.0013, "num_tokens": 687521270.0, "reward": 0.5424107313156128, "reward_std": 0.2051643431186676, "rewards/verify_math_reward/mean": 0.5424107313156128, "rewards/verify_math_reward/std": 0.4984763562679291, "step": 1181 }, { "clip_ratio/high_max": 0.0013871519849999459, "clip_ratio/high_mean": 0.00043188820632167335, "clip_ratio/low_mean": 0.0003643261718480062, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007962143845361425, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 2761.0, "completions/mean_length": 653.2589721679688, "completions/mean_terminated_length": 590.6636352539062, "completions/min_length": 147.0, "completions/min_terminated_length": 147.0, "epoch": 11.04666083406241, "grad_norm": 0.125, "learning_rate": 1e-06, "loss": -0.0019, "num_tokens": 688127518.0, "reward": 0.5636160969734192, "reward_std": 0.2021559327840805, "rewards/verify_math_reward/mean": 0.5636160969734192, "rewards/verify_math_reward/std": 0.49621346592903137, "step": 1182 }, { "clip_ratio/high_max": 0.0020745518704643473, "clip_ratio/high_mean": 0.0006256531460167025, "clip_ratio/low_mean": 0.00028837112915880425, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009140242773355567, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 2625.0, "completions/mean_length": 617.2980346679688, "completions/mean_terminated_length": 554.048828125, "completions/min_length": 141.0, "completions/min_terminated_length": 141.0, "epoch": 11.05599300087489, "grad_norm": 0.142578125, "learning_rate": 1e-06, "loss": -0.0039, "num_tokens": 688704465.0, "reward": 0.5859375, "reward_std": 0.2099350392818451, "rewards/verify_math_reward/mean": 0.5859375, "rewards/verify_math_reward/std": 0.4928344786167145, "step": 1183 }, { "clip_ratio/high_max": 0.001705239990769769, "clip_ratio/high_mean": 0.0005049621718171693, "clip_ratio/low_mean": 0.00036079768506169785, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008657598427816993, "completions/clipped_ratio": 0.022321428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3778.0, "completions/mean_length": 668.5770263671875, "completions/mean_terminated_length": 590.3253173828125, "completions/min_length": 154.0, "completions/min_terminated_length": 154.0, "epoch": 11.065325167687373, "grad_norm": 0.13671875, "learning_rate": 1e-06, "loss": -0.023, "num_tokens": 689317526.0, "reward": 0.5178571939468384, "reward_std": 0.23404881358146667, "rewards/verify_math_reward/mean": 0.5178571343421936, "rewards/verify_math_reward/std": 0.4999600946903229, "step": 1184 }, { "clip_ratio/high_max": 0.0017312664949713508, "clip_ratio/high_mean": 0.0005299104766436358, "clip_ratio/low_mean": 0.0003292298105179725, "clip_ratio/low_min": 8.933676326705609e-06, "clip_ratio/region_mean": 0.0008591402920501423, "completions/clipped_ratio": 0.0200892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3933.0, "completions/mean_length": 653.6328125, "completions/mean_terminated_length": 583.0603637695312, "completions/min_length": 102.0, "completions/min_terminated_length": 102.0, "epoch": 11.074657334499854, "grad_norm": 0.130859375, "learning_rate": 1e-06, "loss": 0.0123, "num_tokens": 689919005.0, "reward": 0.5, "reward_std": 0.23052442073822021, "rewards/verify_math_reward/mean": 0.5, "rewards/verify_math_reward/std": 0.5002792477607727, "step": 1185 }, { "clip_ratio/high_max": 0.0015408249964821152, "clip_ratio/high_mean": 0.0004702627782080526, "clip_ratio/low_mean": 0.00037055292841614573, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008408156918449095, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3983.0, "completions/mean_length": 644.9710083007812, "completions/mean_terminated_length": 590.1927490234375, "completions/min_length": 161.0, "completions/min_terminated_length": 161.0, "epoch": 11.083989501312336, "grad_norm": 0.125, "learning_rate": 1e-06, "loss": 0.008, "num_tokens": 690534483.0, "reward": 0.5212053656578064, "reward_std": 0.21222272515296936, "rewards/verify_math_reward/mean": 0.5212053656578064, "rewards/verify_math_reward/std": 0.49982914328575134, "step": 1186 }, { "clip_ratio/high_max": 0.0018965411545650568, "clip_ratio/high_mean": 0.0004936661780448048, "clip_ratio/low_mean": 0.0003178008994382253, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008114670790746459, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 4077.0, "completions/mean_length": 613.9263916015625, "completions/mean_terminated_length": 554.6401977539062, "completions/min_length": 124.0, "completions/min_terminated_length": 124.0, "epoch": 11.093321668124817, "grad_norm": 0.11767578125, "learning_rate": 1e-06, "loss": 0.0005, "num_tokens": 691116193.0, "reward": 0.59375, "reward_std": 0.19407722353935242, "rewards/verify_math_reward/mean": 0.59375, "rewards/verify_math_reward/std": 0.4914066195487976, "step": 1187 }, { "clip_ratio/high_max": 0.0014303534935606876, "clip_ratio/high_mean": 0.00045155592238188547, "clip_ratio/low_mean": 0.00034358008974777476, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007951360030347132, "completions/clipped_ratio": 0.008928571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3517.0, "completions/mean_length": 600.8761596679688, "completions/mean_terminated_length": 569.3885498046875, "completions/min_length": 98.0, "completions/min_terminated_length": 98.0, "epoch": 11.1026538349373, "grad_norm": 0.1318359375, "learning_rate": 1e-06, "loss": 0.0025, "num_tokens": 691721818.0, "reward": 0.5334821939468384, "reward_std": 0.2241317480802536, "rewards/verify_math_reward/mean": 0.5334821343421936, "rewards/verify_math_reward/std": 0.49915632605552673, "step": 1188 }, { "clip_ratio/high_max": 0.0016902213592402404, "clip_ratio/high_mean": 0.0004496746387303574, "clip_ratio/low_mean": 0.00028331191299457714, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007329865593419527, "completions/clipped_ratio": 0.010044642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 2120.0, "completions/mean_length": 563.6785888671875, "completions/mean_terminated_length": 527.837646484375, "completions/min_length": 136.0, "completions/min_terminated_length": 136.0, "epoch": 11.11198600174978, "grad_norm": 0.123046875, "learning_rate": 1e-06, "loss": 0.0005, "num_tokens": 692265354.0, "reward": 0.5881696939468384, "reward_std": 0.19948594272136688, "rewards/verify_math_reward/mean": 0.5881696343421936, "rewards/verify_math_reward/std": 0.4924396276473999, "step": 1189 }, { "clip_ratio/high_max": 0.0020211618048051605, "clip_ratio/high_mean": 0.0005771383284809417, "clip_ratio/low_mean": 0.0003027978863201497, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008799362194622518, "completions/clipped_ratio": 0.0234375, "completions/max_length": 4096.0, "completions/max_terminated_length": 4054.0, "completions/mean_length": 663.1707763671875, "completions/mean_terminated_length": 580.7828369140625, "completions/min_length": 112.0, "completions/min_terminated_length": 112.0, "epoch": 11.121318168562263, "grad_norm": 0.119140625, "learning_rate": 1e-06, "loss": 0.0201, "num_tokens": 692868315.0, "reward": 0.53125, "reward_std": 0.19047221541404724, "rewards/verify_math_reward/mean": 0.53125, "rewards/verify_math_reward/std": 0.4993011951446533, "step": 1190 }, { "clip_ratio/high_max": 0.0014844330980849918, "clip_ratio/high_mean": 0.0004902599914657912, "clip_ratio/low_mean": 0.0003573382582544582, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008475982481286337, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3828.0, "completions/mean_length": 614.3035888671875, "completions/mean_terminated_length": 559.03857421875, "completions/min_length": 133.0, "completions/min_terminated_length": 133.0, "epoch": 11.130650335374744, "grad_norm": 0.126953125, "learning_rate": 1e-06, "loss": 0.0008, "num_tokens": 693438459.0, "reward": 0.559151828289032, "reward_std": 0.20636573433876038, "rewards/verify_math_reward/mean": 0.5591517686843872, "rewards/verify_math_reward/std": 0.496766060590744, "step": 1191 }, { "clip_ratio/high_max": 0.0015765076191200933, "clip_ratio/high_mean": 0.00040870388295388693, "clip_ratio/low_mean": 0.00033751574142115714, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.000746219618122268, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 2421.0, "completions/mean_length": 646.7957763671875, "completions/mean_terminated_length": 596.0147094726562, "completions/min_length": 128.0, "completions/min_terminated_length": 128.0, "epoch": 11.139982502187227, "grad_norm": 0.11669921875, "learning_rate": 1e-06, "loss": 0.0066, "num_tokens": 694067772.0, "reward": 0.478794664144516, "reward_std": 0.19764302670955658, "rewards/verify_math_reward/mean": 0.4787946343421936, "rewards/verify_math_reward/std": 0.49982914328575134, "step": 1192 }, { "clip_ratio/high_max": 0.00181157861425163, "clip_ratio/high_mean": 0.0005747752998104261, "clip_ratio/low_mean": 0.00035434867038475204, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.000929123970308865, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3214.0, "completions/mean_length": 614.7589721679688, "completions/mean_terminated_length": 567.5022583007812, "completions/min_length": 138.0, "completions/min_terminated_length": 138.0, "epoch": 11.149314668999708, "grad_norm": 0.1357421875, "learning_rate": 1e-06, "loss": 0.0009, "num_tokens": 694653468.0, "reward": 0.5524553656578064, "reward_std": 0.24563290178775787, "rewards/verify_math_reward/mean": 0.5524553656578064, "rewards/verify_math_reward/std": 0.49751853942871094, "step": 1193 }, { "clip_ratio/high_max": 0.001964186062650697, "clip_ratio/high_mean": 0.0005575590637363348, "clip_ratio/low_mean": 0.00041727745838215924, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009748365491759614, "completions/clipped_ratio": 0.022321428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3598.0, "completions/mean_length": 626.2511596679688, "completions/mean_terminated_length": 547.0330810546875, "completions/min_length": 59.0, "completions/min_terminated_length": 59.0, "epoch": 11.15864683581219, "grad_norm": 0.1357421875, "learning_rate": 1e-06, "loss": -0.0045, "num_tokens": 695217165.0, "reward": 0.5602678656578064, "reward_std": 0.23124048113822937, "rewards/verify_math_reward/mean": 0.5602678656578064, "rewards/verify_math_reward/std": 0.4966317117214203, "step": 1194 }, { "clip_ratio/high_max": 0.0016791548505352694, "clip_ratio/high_mean": 0.00042282035997232015, "clip_ratio/low_mean": 0.0003706209006395511, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007934412560643977, "completions/clipped_ratio": 0.0189732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 2843.0, "completions/mean_length": 589.8817138671875, "completions/mean_terminated_length": 522.0728149414062, "completions/min_length": 110.0, "completions/min_terminated_length": 110.0, "epoch": 11.167979002624673, "grad_norm": 0.13671875, "learning_rate": 1e-06, "loss": -0.0027, "num_tokens": 695778035.0, "reward": 0.5602678656578064, "reward_std": 0.19287119805812836, "rewards/verify_math_reward/mean": 0.5602678656578064, "rewards/verify_math_reward/std": 0.4966317415237427, "step": 1195 }, { "clip_ratio/high_max": 0.001689097861344635, "clip_ratio/high_mean": 0.0005339278081919474, "clip_ratio/low_mean": 0.0003026856468295591, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008366134429707017, "completions/clipped_ratio": 0.0234375, "completions/max_length": 4096.0, "completions/max_terminated_length": 4086.0, "completions/mean_length": 657.9408569335938, "completions/mean_terminated_length": 575.4274291992188, "completions/min_length": 11.0, "completions/min_terminated_length": 11.0, "epoch": 11.177311169437154, "grad_norm": 0.130859375, "learning_rate": 1e-06, "loss": 0.0049, "num_tokens": 696360278.0, "reward": 0.5546875, "reward_std": 0.23409047722816467, "rewards/verify_math_reward/mean": 0.5546875, "rewards/verify_math_reward/std": 0.4972778558731079, "step": 1196 }, { "clip_ratio/high_max": 0.001657502443777048, "clip_ratio/high_mean": 0.0004737180547635944, "clip_ratio/low_mean": 0.00027587881493218447, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007495968684452237, "completions/clipped_ratio": 0.010044642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3989.0, "completions/mean_length": 558.896240234375, "completions/mean_terminated_length": 523.0067138671875, "completions/min_length": 93.0, "completions/min_terminated_length": 93.0, "epoch": 11.186643336249636, "grad_norm": 0.130859375, "learning_rate": 1e-06, "loss": 0.0053, "num_tokens": 696910841.0, "reward": 0.582589328289032, "reward_std": 0.19918467104434967, "rewards/verify_math_reward/mean": 0.5825892686843872, "rewards/verify_math_reward/std": 0.4934072494506836, "step": 1197 }, { "clip_ratio/high_max": 0.0012414027651175275, "clip_ratio/high_mean": 0.0003762090127565898, "clip_ratio/low_mean": 0.00029280607691362093, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0006690150892154634, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3913.0, "completions/mean_length": 619.1027221679688, "completions/mean_terminated_length": 567.9139404296875, "completions/min_length": 99.0, "completions/min_terminated_length": 99.0, "epoch": 11.195975503062117, "grad_norm": 0.12158203125, "learning_rate": 1e-06, "loss": 0.0015, "num_tokens": 697496725.0, "reward": 0.546875, "reward_std": 0.20531155169010162, "rewards/verify_math_reward/mean": 0.546875, "rewards/verify_math_reward/std": 0.4980759024620056, "step": 1198 }, { "clip_ratio/high_max": 0.0015222707679640735, "clip_ratio/high_mean": 0.00043158240328011743, "clip_ratio/low_mean": 0.00021781701445888757, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0006493994064840081, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3517.0, "completions/mean_length": 600.0145263671875, "completions/mean_terminated_length": 536.4511108398438, "completions/min_length": 145.0, "completions/min_terminated_length": 145.0, "epoch": 11.2053076698746, "grad_norm": 0.12060546875, "learning_rate": 1e-06, "loss": 0.0014, "num_tokens": 698064514.0, "reward": 0.5602678656578064, "reward_std": 0.18013553321361542, "rewards/verify_math_reward/mean": 0.5602678656578064, "rewards/verify_math_reward/std": 0.4966317415237427, "step": 1199 }, { "clip_ratio/high_max": 0.0013311835728018195, "clip_ratio/high_mean": 0.0003712672070150802, "clip_ratio/low_mean": 0.00038878351938365086, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007600507132110579, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 3761.0, "completions/mean_length": 607.3527221679688, "completions/mean_terminated_length": 567.9774169921875, "completions/min_length": 133.0, "completions/min_terminated_length": 133.0, "epoch": 11.21463983668708, "grad_norm": 0.126953125, "learning_rate": 1e-06, "loss": 0.0178, "num_tokens": 698662950.0, "reward": 0.5658482313156128, "reward_std": 0.2123749703168869, "rewards/verify_math_reward/mean": 0.5658482313156128, "rewards/verify_math_reward/std": 0.49592188000679016, "step": 1200 }, { "clip_ratio/high_max": 0.0015760056467115646, "clip_ratio/high_mean": 0.00045181570465047116, "clip_ratio/low_mean": 0.0002670606072570081, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007188763220256078, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 3505.0, "completions/mean_length": 626.4799194335938, "completions/mean_terminated_length": 583.35595703125, "completions/min_length": 116.0, "completions/min_terminated_length": 116.0, "epoch": 11.223972003499563, "grad_norm": 0.11767578125, "learning_rate": 1e-06, "loss": 0.018, "num_tokens": 699261916.0, "reward": 0.5524553656578064, "reward_std": 0.1813715547323227, "rewards/verify_math_reward/mean": 0.5524553656578064, "rewards/verify_math_reward/std": 0.49751853942871094, "step": 1201 }, { "clip_ratio/high_max": 0.001806717382351053, "clip_ratio/high_mean": 0.0005310341794029227, "clip_ratio/low_mean": 0.0002920879667271947, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008231221463574911, "completions/clipped_ratio": 0.010044642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3179.0, "completions/mean_length": 632.8705444335938, "completions/mean_terminated_length": 597.7316284179688, "completions/min_length": 102.0, "completions/min_terminated_length": 102.0, "epoch": 11.233304170312044, "grad_norm": 0.1171875, "learning_rate": 1e-06, "loss": 0.0079, "num_tokens": 699879192.0, "reward": 0.5848214626312256, "reward_std": 0.19805558025836945, "rewards/verify_math_reward/mean": 0.5848214030265808, "rewards/verify_math_reward/std": 0.49302801489830017, "step": 1202 }, { "clip_ratio/high_max": 0.0014581408777303295, "clip_ratio/high_mean": 0.0004344063413554977, "clip_ratio/low_mean": 0.0003344224043075883, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007688287473683886, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 3717.0, "completions/mean_length": 593.4252319335938, "completions/mean_terminated_length": 553.8927612304688, "completions/min_length": 35.0, "completions/min_terminated_length": 35.0, "epoch": 11.242636337124527, "grad_norm": 0.12109375, "learning_rate": 1e-06, "loss": 0.0002, "num_tokens": 700456373.0, "reward": 0.6004464626312256, "reward_std": 0.1965913474559784, "rewards/verify_math_reward/mean": 0.6004464030265808, "rewards/verify_math_reward/std": 0.49008017778396606, "step": 1203 }, { "clip_ratio/high_max": 0.001748106321429077, "clip_ratio/high_mean": 0.0005243480889021157, "clip_ratio/low_mean": 0.0003121826123333449, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.000836530721244344, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3366.0, "completions/mean_length": 616.2377319335938, "completions/mean_terminated_length": 565.0067749023438, "completions/min_length": 120.0, "completions/min_terminated_length": 120.0, "epoch": 11.251968503937007, "grad_norm": 0.1201171875, "learning_rate": 1e-06, "loss": 0.0154, "num_tokens": 701048426.0, "reward": 0.625, "reward_std": 0.20151470601558685, "rewards/verify_math_reward/mean": 0.625, "rewards/verify_math_reward/std": 0.48439329862594604, "step": 1204 }, { "clip_ratio/high_max": 0.0017423929130018223, "clip_ratio/high_mean": 0.0004999095231141837, "clip_ratio/low_mean": 0.0003165828585451891, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.000816492373814981, "completions/clipped_ratio": 0.022321428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3488.0, "completions/mean_length": 660.7935791015625, "completions/mean_terminated_length": 582.3641357421875, "completions/min_length": 4.0, "completions/min_terminated_length": 4.0, "epoch": 11.26130067074949, "grad_norm": 0.12158203125, "learning_rate": 1e-06, "loss": 0.0046, "num_tokens": 701668137.0, "reward": 0.4810267984867096, "reward_std": 0.20098592340946198, "rewards/verify_math_reward/mean": 0.4810267984867096, "rewards/verify_math_reward/std": 0.49991899728775024, "step": 1205 }, { "clip_ratio/high_max": 0.0017148759006886394, "clip_ratio/high_mean": 0.00047484214167070604, "clip_ratio/low_mean": 0.00037459873703937774, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008494408657497843, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 2263.0, "completions/mean_length": 562.4375, "completions/mean_terminated_length": 498.1908874511719, "completions/min_length": 50.0, "completions/min_terminated_length": 50.0, "epoch": 11.27063283756197, "grad_norm": 0.1376953125, "learning_rate": 1e-06, "loss": -0.006, "num_tokens": 702197481.0, "reward": 0.5691964626312256, "reward_std": 0.21139857172966003, "rewards/verify_math_reward/mean": 0.5691964030265808, "rewards/verify_math_reward/std": 0.4954652488231659, "step": 1206 }, { "clip_ratio/high_max": 0.0014666623719676863, "clip_ratio/high_mean": 0.0004838364152419672, "clip_ratio/low_mean": 0.00034066045623148966, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008244968639701256, "completions/clipped_ratio": 0.0078125, "completions/max_length": 4096.0, "completions/max_terminated_length": 3976.0, "completions/mean_length": 586.6763916015625, "completions/mean_terminated_length": 559.0438842773438, "completions/min_length": 123.0, "completions/min_terminated_length": 123.0, "epoch": 11.279965004374453, "grad_norm": 0.1298828125, "learning_rate": 1e-06, "loss": 0.003, "num_tokens": 702780599.0, "reward": 0.5636160969734192, "reward_std": 0.21676772832870483, "rewards/verify_math_reward/mean": 0.5636160969734192, "rewards/verify_math_reward/std": 0.49621346592903137, "step": 1207 }, { "clip_ratio/high_max": 0.0014182000450091437, "clip_ratio/high_mean": 0.000344319263831494, "clip_ratio/low_mean": 0.00031040950091210107, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0006547287696321291, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3035.0, "completions/mean_length": 596.328125, "completions/mean_terminated_length": 548.8212890625, "completions/min_length": 141.0, "completions/min_terminated_length": 141.0, "epoch": 11.289297171186934, "grad_norm": 0.1103515625, "learning_rate": 1e-06, "loss": 0.0035, "num_tokens": 703358725.0, "reward": 0.5569196939468384, "reward_std": 0.15409964323043823, "rewards/verify_math_reward/mean": 0.5569196343421936, "rewards/verify_math_reward/std": 0.49702703952789307, "step": 1208 }, { "clip_ratio/high_max": 0.0018192526422353694, "clip_ratio/high_mean": 0.0005398614571276994, "clip_ratio/low_mean": 0.00029727401306445245, "clip_ratio/low_min": 1.1015156815119553e-05, "clip_ratio/region_mean": 0.0008371354679184151, "completions/clipped_ratio": 0.008928571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3090.0, "completions/mean_length": 536.396240234375, "completions/mean_terminated_length": 504.3277282714844, "completions/min_length": 67.0, "completions/min_terminated_length": 67.0, "epoch": 11.298629337999417, "grad_norm": 0.126953125, "learning_rate": 1e-06, "loss": -0.0058, "num_tokens": 703894256.0, "reward": 0.5703125, "reward_std": 0.19340254366397858, "rewards/verify_math_reward/mean": 0.5703125, "rewards/verify_math_reward/std": 0.49530795216560364, "step": 1209 }, { "clip_ratio/high_max": 0.0016637576281937072, "clip_ratio/high_mean": 0.0005448425899885478, "clip_ratio/low_mean": 0.0003061326644910878, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008509752551617566, "completions/clipped_ratio": 0.006696428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 2822.0, "completions/mean_length": 532.28125, "completions/mean_terminated_length": 508.2561950683594, "completions/min_length": 92.0, "completions/min_terminated_length": 92.0, "epoch": 11.307961504811898, "grad_norm": 0.12890625, "learning_rate": 1e-06, "loss": 0.0109, "num_tokens": 704425860.0, "reward": 0.6774553656578064, "reward_std": 0.19835910201072693, "rewards/verify_math_reward/mean": 0.6774553656578064, "rewards/verify_math_reward/std": 0.4677111804485321, "step": 1210 }, { "clip_ratio/high_max": 0.0015508226497331634, "clip_ratio/high_mean": 0.0004395264149934519, "clip_ratio/low_mean": 0.00027341181407791737, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007129382206585433, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 3985.0, "completions/mean_length": 665.7611694335938, "completions/mean_terminated_length": 623.1254272460938, "completions/min_length": 12.0, "completions/min_terminated_length": 12.0, "epoch": 11.31729367162438, "grad_norm": 0.1123046875, "learning_rate": 1e-06, "loss": -0.0042, "num_tokens": 705077542.0, "reward": 0.5, "reward_std": 0.18517020344734192, "rewards/verify_math_reward/mean": 0.5, "rewards/verify_math_reward/std": 0.5002792477607727, "step": 1211 }, { "clip_ratio/high_max": 0.0018860916006815387, "clip_ratio/high_mean": 0.0005438715411401063, "clip_ratio/low_mean": 0.00027447828165350074, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008183498266589595, "completions/clipped_ratio": 0.0078125, "completions/max_length": 4096.0, "completions/max_terminated_length": 4045.0, "completions/mean_length": 567.6752319335938, "completions/mean_terminated_length": 539.8931884765625, "completions/min_length": 70.0, "completions/min_terminated_length": 70.0, "epoch": 11.326625838436861, "grad_norm": 0.13671875, "learning_rate": 1e-06, "loss": -0.0036, "num_tokens": 705644011.0, "reward": 0.5848214626312256, "reward_std": 0.20298148691654205, "rewards/verify_math_reward/mean": 0.5848214030265808, "rewards/verify_math_reward/std": 0.49302801489830017, "step": 1212 }, { "clip_ratio/high_max": 0.0018231462363473838, "clip_ratio/high_mean": 0.0006360889037750894, "clip_ratio/low_mean": 0.0002481379181062948, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008842268271109788, "completions/clipped_ratio": 0.010044642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3642.0, "completions/mean_length": 560.6194458007812, "completions/mean_terminated_length": 524.7474365234375, "completions/min_length": 72.0, "completions/min_terminated_length": 72.0, "epoch": 11.335958005249344, "grad_norm": 0.13671875, "learning_rate": 1e-06, "loss": 0.003, "num_tokens": 706186790.0, "reward": 0.613839328289032, "reward_std": 0.2141006737947464, "rewards/verify_math_reward/mean": 0.6138392686843872, "rewards/verify_math_reward/std": 0.48714008927345276, "step": 1213 }, { "clip_ratio/high_max": 0.001437395154425758, "clip_ratio/high_mean": 0.0004377646807824931, "clip_ratio/low_mean": 0.0004015669476302719, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008393316211368074, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 2824.0, "completions/mean_length": 574.2444458007812, "completions/mean_terminated_length": 530.47119140625, "completions/min_length": 105.0, "completions/min_terminated_length": 105.0, "epoch": 11.345290172061826, "grad_norm": 0.134765625, "learning_rate": 1e-06, "loss": 0.0046, "num_tokens": 706744473.0, "reward": 0.53125, "reward_std": 0.2232327163219452, "rewards/verify_math_reward/mean": 0.53125, "rewards/verify_math_reward/std": 0.4993011951446533, "step": 1214 }, { "clip_ratio/high_max": 0.0016219537110373494, "clip_ratio/high_mean": 0.0004899214943634433, "clip_ratio/low_mean": 0.00033940166224510904, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008293231612697127, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 2581.0, "completions/mean_length": 620.4152221679688, "completions/mean_terminated_length": 569.2457275390625, "completions/min_length": 173.0, "completions/min_terminated_length": 173.0, "epoch": 11.354622338874307, "grad_norm": 0.12158203125, "learning_rate": 1e-06, "loss": 0.0022, "num_tokens": 707341573.0, "reward": 0.5792410969734192, "reward_std": 0.19798073172569275, "rewards/verify_math_reward/mean": 0.5792410969734192, "rewards/verify_math_reward/std": 0.49395665526390076, "step": 1215 }, { "clip_ratio/high_max": 0.0016301512459904188, "clip_ratio/high_mean": 0.0004961456893397553, "clip_ratio/low_mean": 0.0002186143169637944, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.000714760011760518, "completions/clipped_ratio": 0.022321428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3414.0, "completions/mean_length": 641.5346069335938, "completions/mean_terminated_length": 562.66552734375, "completions/min_length": 113.0, "completions/min_terminated_length": 113.0, "epoch": 11.36395450568679, "grad_norm": 0.12060546875, "learning_rate": 1e-06, "loss": -0.0021, "num_tokens": 707916292.0, "reward": 0.5491071939468384, "reward_std": 0.19753523170948029, "rewards/verify_math_reward/mean": 0.5491071343421936, "rewards/verify_math_reward/std": 0.49786055088043213, "step": 1216 }, { "clip_ratio/high_max": 0.0016524278253200464, "clip_ratio/high_mean": 0.00047501923427262227, "clip_ratio/low_mean": 0.0003669948181368454, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008420140538873966, "completions/clipped_ratio": 0.010044642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 4039.0, "completions/mean_length": 575.849365234375, "completions/mean_terminated_length": 540.1318969726562, "completions/min_length": 11.0, "completions/min_terminated_length": 11.0, "epoch": 11.37328667249927, "grad_norm": 0.1474609375, "learning_rate": 1e-06, "loss": -0.0038, "num_tokens": 708495637.0, "reward": 0.5535714626312256, "reward_std": 0.22109587490558624, "rewards/verify_math_reward/mean": 0.5535714030265808, "rewards/verify_math_reward/std": 0.4973994791507721, "step": 1217 }, { "clip_ratio/high_max": 0.0015316274593715207, "clip_ratio/high_mean": 0.0005121717392739811, "clip_ratio/low_mean": 0.00032788698342756106, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008400587203141185, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3477.0, "completions/mean_length": 625.1506958007812, "completions/mean_terminated_length": 578.0350952148438, "completions/min_length": 154.0, "completions/min_terminated_length": 154.0, "epoch": 11.382618839311753, "grad_norm": 0.1259765625, "learning_rate": 1e-06, "loss": 0.0005, "num_tokens": 709101636.0, "reward": 0.5881696939468384, "reward_std": 0.21808859705924988, "rewards/verify_math_reward/mean": 0.5881696343421936, "rewards/verify_math_reward/std": 0.4924395978450775, "step": 1218 }, { "clip_ratio/high_max": 0.0015591038190905238, "clip_ratio/high_mean": 0.0004793016846633691, "clip_ratio/low_mean": 0.00037911617471309, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008584178658566088, "completions/clipped_ratio": 0.0189732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3903.0, "completions/mean_length": 599.328125, "completions/mean_terminated_length": 531.701904296875, "completions/min_length": 142.0, "completions/min_terminated_length": 142.0, "epoch": 11.391951006124234, "grad_norm": 0.1376953125, "learning_rate": 1e-06, "loss": -0.0133, "num_tokens": 709659026.0, "reward": 0.515625, "reward_std": 0.22007529437541962, "rewards/verify_math_reward/mean": 0.515625, "rewards/verify_math_reward/std": 0.5000349283218384, "step": 1219 }, { "clip_ratio/high_max": 0.001727671418848331, "clip_ratio/high_mean": 0.0005489591512741754, "clip_ratio/low_mean": 0.0003577263383931495, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.000906685498648585, "completions/clipped_ratio": 0.006696428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3732.0, "completions/mean_length": 567.5960083007812, "completions/mean_terminated_length": 543.8090209960938, "completions/min_length": 3.0, "completions/min_terminated_length": 3.0, "epoch": 11.401283172936717, "grad_norm": 0.1318359375, "learning_rate": 1e-06, "loss": 0.0042, "num_tokens": 710230920.0, "reward": 0.5680803656578064, "reward_std": 0.22819000482559204, "rewards/verify_math_reward/mean": 0.5680803656578064, "rewards/verify_math_reward/std": 0.4956200420856476, "step": 1220 }, { "clip_ratio/high_max": 0.0017083319780795136, "clip_ratio/high_mean": 0.0005138730077760556, "clip_ratio/low_mean": 0.0003482699106598375, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008621429242339218, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3697.0, "completions/mean_length": 637.671875, "completions/mean_terminated_length": 578.7900390625, "completions/min_length": 117.0, "completions/min_terminated_length": 117.0, "epoch": 11.410615339749198, "grad_norm": 0.1279296875, "learning_rate": 1e-06, "loss": -0.023, "num_tokens": 710823234.0, "reward": 0.4955357313156128, "reward_std": 0.23082607984542847, "rewards/verify_math_reward/mean": 0.4955357015132904, "rewards/verify_math_reward/std": 0.500259280204773, "step": 1221 }, { "clip_ratio/high_max": 0.0018757069447019603, "clip_ratio/high_mean": 0.000587552998240426, "clip_ratio/low_mean": 0.0003630494585422639, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009506024634902133, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 2193.0, "completions/mean_length": 603.5223388671875, "completions/mean_terminated_length": 564.1038208007812, "completions/min_length": 157.0, "completions/min_terminated_length": 157.0, "epoch": 11.41994750656168, "grad_norm": 0.1376953125, "learning_rate": 1e-06, "loss": 0.015, "num_tokens": 711410422.0, "reward": 0.566964328289032, "reward_std": 0.2346218079328537, "rewards/verify_math_reward/mean": 0.5669642686843872, "rewards/verify_math_reward/std": 0.49577224254608154, "step": 1222 }, { "clip_ratio/high_max": 0.0016131734191731084, "clip_ratio/high_mean": 0.0005183955978509402, "clip_ratio/low_mean": 0.00035145649144396884, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008698520741745597, "completions/clipped_ratio": 0.0189732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 4063.0, "completions/mean_length": 698.1563110351562, "completions/mean_terminated_length": 632.44140625, "completions/min_length": 132.0, "completions/min_terminated_length": 132.0, "epoch": 11.429279673374161, "grad_norm": 0.1279296875, "learning_rate": 1e-06, "loss": 0.0018, "num_tokens": 712046442.0, "reward": 0.4966517984867096, "reward_std": 0.23724789917469025, "rewards/verify_math_reward/mean": 0.4966517984867096, "rewards/verify_math_reward/std": 0.5002680420875549, "step": 1223 }, { "clip_ratio/high_max": 0.0016537298324692529, "clip_ratio/high_mean": 0.0005164458686977014, "clip_ratio/low_mean": 0.00037962824217174784, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008960741015471285, "completions/clipped_ratio": 0.0078125, "completions/max_length": 4096.0, "completions/max_terminated_length": 2107.0, "completions/mean_length": 531.1517944335938, "completions/mean_terminated_length": 503.0821228027344, "completions/min_length": 109.0, "completions/min_terminated_length": 109.0, "epoch": 11.438611840186644, "grad_norm": 0.12451171875, "learning_rate": 1e-06, "loss": 0.003, "num_tokens": 712577826.0, "reward": 0.6116071939468384, "reward_std": 0.19783805310726166, "rewards/verify_math_reward/mean": 0.6116071343421936, "rewards/verify_math_reward/std": 0.48765692114830017, "step": 1224 }, { "clip_ratio/high_max": 0.001743421961691638, "clip_ratio/high_mean": 0.0005555848567837529, "clip_ratio/low_mean": 0.0003264261929416534, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008820110333545017, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 3422.0, "completions/mean_length": 610.7455444335938, "completions/mean_terminated_length": 571.4085693359375, "completions/min_length": 3.0, "completions/min_terminated_length": 3.0, "epoch": 11.447944006999125, "grad_norm": 0.12060546875, "learning_rate": 1e-06, "loss": 0.0132, "num_tokens": 713176054.0, "reward": 0.5412946939468384, "reward_std": 0.2167356312274933, "rewards/verify_math_reward/mean": 0.5412946343421936, "rewards/verify_math_reward/std": 0.49857014417648315, "step": 1225 }, { "clip_ratio/high_max": 0.0014606723916585906, "clip_ratio/high_mean": 0.0004765461920896996, "clip_ratio/low_mean": 0.00041098473593592644, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008875309404174914, "completions/clipped_ratio": 0.008928571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 1936.0, "completions/mean_length": 564.921875, "completions/mean_terminated_length": 533.1103515625, "completions/min_length": 40.0, "completions/min_terminated_length": 40.0, "epoch": 11.457276173811607, "grad_norm": 0.1259765625, "learning_rate": 1e-06, "loss": 0.0007, "num_tokens": 713725872.0, "reward": 0.5892857313156128, "reward_std": 0.2180815488100052, "rewards/verify_math_reward/mean": 0.5892857313156128, "rewards/verify_math_reward/std": 0.49223825335502625, "step": 1226 }, { "clip_ratio/high_max": 0.0020563315320032416, "clip_ratio/high_mean": 0.0006233833271380718, "clip_ratio/low_mean": 0.00030864210191339225, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009320254212070722, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 2460.0, "completions/mean_length": 575.4642944335938, "completions/mean_terminated_length": 531.7062377929688, "completions/min_length": 127.0, "completions/min_terminated_length": 127.0, "epoch": 11.466608340624088, "grad_norm": 0.1376953125, "learning_rate": 1e-06, "loss": 0.0033, "num_tokens": 714278440.0, "reward": 0.5803571939468384, "reward_std": 0.20400065183639526, "rewards/verify_math_reward/mean": 0.5803571343421936, "rewards/verify_math_reward/std": 0.4937761425971985, "step": 1227 }, { "clip_ratio/high_max": 0.0015116178656171542, "clip_ratio/high_mean": 0.00046261808665803983, "clip_ratio/low_mean": 0.0003711675785780244, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008337856588696013, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 4019.0, "completions/mean_length": 615.4576416015625, "completions/mean_terminated_length": 568.21044921875, "completions/min_length": 131.0, "completions/min_terminated_length": 131.0, "epoch": 11.47594050743657, "grad_norm": 0.12060546875, "learning_rate": 1e-06, "loss": -0.0024, "num_tokens": 714865338.0, "reward": 0.5569196939468384, "reward_std": 0.22699564695358276, "rewards/verify_math_reward/mean": 0.5569196343421936, "rewards/verify_math_reward/std": 0.4970270097255707, "step": 1228 }, { "clip_ratio/high_max": 0.0015121131345949834, "clip_ratio/high_mean": 0.0004476012652503414, "clip_ratio/low_mean": 0.00033094190712290583, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007785431607771898, "completions/clipped_ratio": 0.021205357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3422.0, "completions/mean_length": 591.7154541015625, "completions/mean_terminated_length": 515.7958984375, "completions/min_length": 92.0, "completions/min_terminated_length": 92.0, "epoch": 11.485272674249051, "grad_norm": 0.12255859375, "learning_rate": 1e-06, "loss": -0.0009, "num_tokens": 715389763.0, "reward": 0.5770089626312256, "reward_std": 0.19535532593727112, "rewards/verify_math_reward/mean": 0.5770089030265808, "rewards/verify_math_reward/std": 0.4943099319934845, "step": 1229 }, { "clip_ratio/high_max": 0.0016218743603531038, "clip_ratio/high_mean": 0.0005120393584547855, "clip_ratio/low_mean": 0.0004929801207254059, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010050194896393805, "completions/clipped_ratio": 0.0189732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 4065.0, "completions/mean_length": 632.5245971679688, "completions/mean_terminated_length": 565.5403442382812, "completions/min_length": 136.0, "completions/min_terminated_length": 136.0, "epoch": 11.494604841061534, "grad_norm": 0.1318359375, "learning_rate": 1e-06, "loss": 0.0166, "num_tokens": 715978729.0, "reward": 0.5245535969734192, "reward_std": 0.24472180008888245, "rewards/verify_math_reward/mean": 0.5245535969734192, "rewards/verify_math_reward/std": 0.4996756911277771, "step": 1230 }, { "clip_ratio/high_max": 0.0016897195973797352, "clip_ratio/high_mean": 0.00047367160982503265, "clip_ratio/low_mean": 0.00027815549071874557, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007518271086155437, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3108.0, "completions/mean_length": 634.296875, "completions/mean_terminated_length": 571.3568115234375, "completions/min_length": 68.0, "completions/min_terminated_length": 68.0, "epoch": 11.503937007874015, "grad_norm": 0.1318359375, "learning_rate": 1e-06, "loss": 0.0042, "num_tokens": 716574787.0, "reward": 0.5055803656578064, "reward_std": 0.21022644639015198, "rewards/verify_math_reward/mean": 0.5055803656578064, "rewards/verify_math_reward/std": 0.5002480745315552, "step": 1231 }, { "clip_ratio/high_max": 0.001779398171493085, "clip_ratio/high_mean": 0.000525535028828017, "clip_ratio/low_mean": 0.0003985766495588905, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009241116840712493, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 3413.0, "completions/mean_length": 598.2890625, "completions/mean_terminated_length": 558.8115234375, "completions/min_length": 119.0, "completions/min_terminated_length": 119.0, "epoch": 11.513269174686497, "grad_norm": 0.1396484375, "learning_rate": 1e-06, "loss": -0.0039, "num_tokens": 717161190.0, "reward": 0.5290178656578064, "reward_std": 0.2320656180381775, "rewards/verify_math_reward/mean": 0.5290178656578064, "rewards/verify_math_reward/std": 0.49943602085113525, "step": 1232 }, { "clip_ratio/high_max": 0.0019015505331481108, "clip_ratio/high_mean": 0.0005253872518551361, "clip_ratio/low_mean": 0.00030738215093606414, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008327694104082184, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 4068.0, "completions/mean_length": 596.786865234375, "completions/mean_terminated_length": 537.2088623046875, "completions/min_length": 7.0, "completions/min_terminated_length": 7.0, "epoch": 11.52260134149898, "grad_norm": 0.12255859375, "learning_rate": 1e-06, "loss": -0.0068, "num_tokens": 717716911.0, "reward": 0.582589328289032, "reward_std": 0.1997230499982834, "rewards/verify_math_reward/mean": 0.5825892686843872, "rewards/verify_math_reward/std": 0.4934072494506836, "step": 1233 }, { "clip_ratio/high_max": 0.001570004318637075, "clip_ratio/high_mean": 0.0004583853195754273, "clip_ratio/low_mean": 0.0003396300830900145, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007980154096003389, "completions/clipped_ratio": 0.0279017857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3448.0, "completions/mean_length": 658.271240234375, "completions/mean_terminated_length": 559.5993041992188, "completions/min_length": 138.0, "completions/min_terminated_length": 138.0, "epoch": 11.531933508311461, "grad_norm": 0.1259765625, "learning_rate": 1e-06, "loss": 0.0065, "num_tokens": 718295802.0, "reward": 0.5479910969734192, "reward_std": 0.22251734137535095, "rewards/verify_math_reward/mean": 0.5479910969734192, "rewards/verify_math_reward/std": 0.49796947836875916, "step": 1234 }, { "clip_ratio/high_max": 0.0015217493146337802, "clip_ratio/high_mean": 0.00048210438262685784, "clip_ratio/low_mean": 0.00033898701758516836, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008210913938455633, "completions/clipped_ratio": 0.021205357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3704.0, "completions/mean_length": 686.388427734375, "completions/mean_terminated_length": 612.5199584960938, "completions/min_length": 87.0, "completions/min_terminated_length": 87.0, "epoch": 11.541265675123944, "grad_norm": 0.1201171875, "learning_rate": 1e-06, "loss": -0.0011, "num_tokens": 718921110.0, "reward": 0.4955357313156128, "reward_std": 0.2175191193819046, "rewards/verify_math_reward/mean": 0.4955357015132904, "rewards/verify_math_reward/std": 0.500259280204773, "step": 1235 }, { "clip_ratio/high_max": 0.001625314975171932, "clip_ratio/high_mean": 0.0005147203451087989, "clip_ratio/low_mean": 0.0003135977467536577, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008283180973194249, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 2903.0, "completions/mean_length": 622.138427734375, "completions/mean_terminated_length": 566.9977416992188, "completions/min_length": 87.0, "completions/min_terminated_length": 87.0, "epoch": 11.550597841936424, "grad_norm": 0.1162109375, "learning_rate": 1e-06, "loss": -0.0049, "num_tokens": 719505434.0, "reward": 0.5837053656578064, "reward_std": 0.19527865946292877, "rewards/verify_math_reward/mean": 0.5837053656578064, "rewards/verify_math_reward/std": 0.49321895837783813, "step": 1236 }, { "clip_ratio/high_max": 0.001830635885198717, "clip_ratio/high_mean": 0.0005781422328254848, "clip_ratio/low_mean": 0.0004298345254483138, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010079767635033932, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3734.0, "completions/mean_length": 593.9877319335938, "completions/mean_terminated_length": 546.4490966796875, "completions/min_length": 66.0, "completions/min_terminated_length": 66.0, "epoch": 11.559930008748907, "grad_norm": 0.1455078125, "learning_rate": 1e-06, "loss": 0.0252, "num_tokens": 720093543.0, "reward": 0.5569196939468384, "reward_std": 0.24156899750232697, "rewards/verify_math_reward/mean": 0.5569196343421936, "rewards/verify_math_reward/std": 0.49702703952789307, "step": 1237 }, { "clip_ratio/high_max": 0.001967149275515112, "clip_ratio/high_mean": 0.0005794478338430054, "clip_ratio/low_mean": 0.00040232166224996035, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009817694872253924, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 2611.0, "completions/mean_length": 630.9955444335938, "completions/mean_terminated_length": 587.9276733398438, "completions/min_length": 163.0, "completions/min_terminated_length": 163.0, "epoch": 11.569262175561388, "grad_norm": 0.138671875, "learning_rate": 1e-06, "loss": -0.0034, "num_tokens": 720702059.0, "reward": 0.53125, "reward_std": 0.26404082775115967, "rewards/verify_math_reward/mean": 0.53125, "rewards/verify_math_reward/std": 0.4993011951446533, "step": 1238 }, { "clip_ratio/high_max": 0.0022166906601341907, "clip_ratio/high_mean": 0.0006844085405646183, "clip_ratio/low_mean": 0.0003884082661897992, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010728167962952284, "completions/clipped_ratio": 0.010044642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 2134.0, "completions/mean_length": 561.8973388671875, "completions/mean_terminated_length": 526.038330078125, "completions/min_length": 131.0, "completions/min_terminated_length": 131.0, "epoch": 11.57859434237387, "grad_norm": 0.1318359375, "learning_rate": 1e-06, "loss": -0.0005, "num_tokens": 721256103.0, "reward": 0.621651828289032, "reward_std": 0.22079460322856903, "rewards/verify_math_reward/mean": 0.6216517686843872, "rewards/verify_math_reward/std": 0.4852459728717804, "step": 1239 }, { "clip_ratio/high_max": 0.0014821816630501417, "clip_ratio/high_mean": 0.0004229704757108266, "clip_ratio/low_mean": 0.00023618010527570732, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0006591505796222918, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3536.0, "completions/mean_length": 606.015625, "completions/mean_terminated_length": 550.6190795898438, "completions/min_length": 22.0, "completions/min_terminated_length": 22.0, "epoch": 11.587926509186351, "grad_norm": 0.12353515625, "learning_rate": 1e-06, "loss": -0.0039, "num_tokens": 721836101.0, "reward": 0.5401785969734192, "reward_std": 0.17551524937152863, "rewards/verify_math_reward/mean": 0.5401785969734192, "rewards/verify_math_reward/std": 0.49866142868995667, "step": 1240 }, { "clip_ratio/high_max": 0.0016147985652423813, "clip_ratio/high_mean": 0.0004946439746618125, "clip_ratio/low_mean": 0.0003169257595345698, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008115697264656774, "completions/clipped_ratio": 0.0200892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3862.0, "completions/mean_length": 636.3214721679688, "completions/mean_terminated_length": 565.3941040039062, "completions/min_length": 90.0, "completions/min_terminated_length": 90.0, "epoch": 11.597258675998834, "grad_norm": 0.125, "learning_rate": 1e-06, "loss": 0.0074, "num_tokens": 722437685.0, "reward": 0.5100446939468384, "reward_std": 0.19430406391620636, "rewards/verify_math_reward/mean": 0.5100446343421936, "rewards/verify_math_reward/std": 0.5001782774925232, "step": 1241 }, { "clip_ratio/high_max": 0.001336024504780653, "clip_ratio/high_mean": 0.0003556971034868184, "clip_ratio/low_mean": 0.00037930445262190915, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007350015512201935, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3995.0, "completions/mean_length": 628.1473388671875, "completions/mean_terminated_length": 577.0917358398438, "completions/min_length": 138.0, "completions/min_terminated_length": 138.0, "epoch": 11.606590842811315, "grad_norm": 0.1240234375, "learning_rate": 1e-06, "loss": 0.0066, "num_tokens": 723049737.0, "reward": 0.4765625298023224, "reward_std": 0.19456368684768677, "rewards/verify_math_reward/mean": 0.4765625, "rewards/verify_math_reward/std": 0.49972933530807495, "step": 1242 }, { "clip_ratio/high_max": 0.0015667741399738588, "clip_ratio/high_mean": 0.0004944293539210776, "clip_ratio/low_mean": 0.0002827515141916592, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007771808786856127, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 1985.0, "completions/mean_length": 623.9085083007812, "completions/mean_terminated_length": 580.7525634765625, "completions/min_length": 130.0, "completions/min_terminated_length": 130.0, "epoch": 11.615923009623797, "grad_norm": 0.11279296875, "learning_rate": 1e-06, "loss": 0.011, "num_tokens": 723652263.0, "reward": 0.559151828289032, "reward_std": 0.2024155557155609, "rewards/verify_math_reward/mean": 0.5591517686843872, "rewards/verify_math_reward/std": 0.496766060590744, "step": 1243 }, { "clip_ratio/high_max": 0.0016037574614529149, "clip_ratio/high_mean": 0.0004749589256789477, "clip_ratio/low_mean": 0.00030728922411071835, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007822481584298657, "completions/clipped_ratio": 0.006696428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3717.0, "completions/mean_length": 580.1808471679688, "completions/mean_terminated_length": 556.4786376953125, "completions/min_length": 101.0, "completions/min_terminated_length": 101.0, "epoch": 11.625255176436278, "grad_norm": 0.1171875, "learning_rate": 1e-06, "loss": -0.006, "num_tokens": 724235737.0, "reward": 0.6227678656578064, "reward_std": 0.18648359179496765, "rewards/verify_math_reward/mean": 0.6227678656578064, "rewards/verify_math_reward/std": 0.4849644899368286, "step": 1244 }, { "clip_ratio/high_max": 0.0015045660693431273, "clip_ratio/high_mean": 0.00046128759026942134, "clip_ratio/low_mean": 0.00039577127654411015, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008570588602196949, "completions/clipped_ratio": 0.0234375, "completions/max_length": 4096.0, "completions/max_terminated_length": 2840.0, "completions/mean_length": 643.0848388671875, "completions/mean_terminated_length": 560.21484375, "completions/min_length": 104.0, "completions/min_terminated_length": 104.0, "epoch": 11.63458734324876, "grad_norm": 0.12060546875, "learning_rate": 1e-06, "loss": -0.001, "num_tokens": 724813597.0, "reward": 0.5558035969734192, "reward_std": 0.20989085733890533, "rewards/verify_math_reward/mean": 0.5558035969734192, "rewards/verify_math_reward/std": 0.49715372920036316, "step": 1245 }, { "clip_ratio/high_max": 0.0018296742314305448, "clip_ratio/high_mean": 0.0005302263004978158, "clip_ratio/low_mean": 0.000352895020341748, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008831213299345109, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3971.0, "completions/mean_length": 625.333740234375, "completions/mean_terminated_length": 562.2306518554688, "completions/min_length": 4.0, "completions/min_terminated_length": 4.0, "epoch": 11.643919510061242, "grad_norm": 0.134765625, "learning_rate": 1e-06, "loss": 0.0056, "num_tokens": 725396704.0, "reward": 0.535714328289032, "reward_std": 0.20174476504325867, "rewards/verify_math_reward/mean": 0.5357142686843872, "rewards/verify_math_reward/std": 0.4990014135837555, "step": 1246 }, { "clip_ratio/high_max": 0.0016752165302023059, "clip_ratio/high_mean": 0.0004810842406186566, "clip_ratio/low_mean": 0.00031510455107763846, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007961887913552346, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 2581.0, "completions/mean_length": 587.296875, "completions/mean_terminated_length": 531.6032104492188, "completions/min_length": 137.0, "completions/min_terminated_length": 137.0, "epoch": 11.653251676873724, "grad_norm": 0.12890625, "learning_rate": 1e-06, "loss": -0.0103, "num_tokens": 725954762.0, "reward": 0.578125, "reward_std": 0.21605345606803894, "rewards/verify_math_reward/mean": 0.578125, "rewards/verify_math_reward/std": 0.4941346049308777, "step": 1247 }, { "clip_ratio/high_max": 0.0018545566108514322, "clip_ratio/high_mean": 0.0005846017716066854, "clip_ratio/low_mean": 0.000446058412308048, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010306601780030178, "completions/clipped_ratio": 0.0189732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3776.0, "completions/mean_length": 621.5703125, "completions/mean_terminated_length": 554.374267578125, "completions/min_length": 140.0, "completions/min_terminated_length": 140.0, "epoch": 11.662583843686207, "grad_norm": 0.1396484375, "learning_rate": 1e-06, "loss": -0.0031, "num_tokens": 726522169.0, "reward": 0.5770089626312256, "reward_std": 0.23488323390483856, "rewards/verify_math_reward/mean": 0.5770089030265808, "rewards/verify_math_reward/std": 0.4943099319934845, "step": 1248 }, { "clip_ratio/high_max": 0.001839515312894946, "clip_ratio/high_mean": 0.0005849610543009476, "clip_ratio/low_mean": 0.00047545505958623835, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001060416106156481, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3943.0, "completions/mean_length": 617.1373291015625, "completions/mean_terminated_length": 561.917236328125, "completions/min_length": 104.0, "completions/min_terminated_length": 104.0, "epoch": 11.671916010498688, "grad_norm": 0.1396484375, "learning_rate": 1e-06, "loss": 0.0074, "num_tokens": 727102364.0, "reward": 0.543526828289032, "reward_std": 0.24833638966083527, "rewards/verify_math_reward/mean": 0.5435267686843872, "rewards/verify_math_reward/std": 0.49838000535964966, "step": 1249 }, { "clip_ratio/high_max": 0.00165630233095726, "clip_ratio/high_mean": 0.0005054166886111489, "clip_ratio/low_mean": 0.0004257477132796339, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009311644053013879, "completions/clipped_ratio": 0.021205357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3868.0, "completions/mean_length": 632.325927734375, "completions/mean_terminated_length": 557.2861938476562, "completions/min_length": 145.0, "completions/min_terminated_length": 145.0, "epoch": 11.68124817731117, "grad_norm": 0.1279296875, "learning_rate": 1e-06, "loss": 0.0047, "num_tokens": 727682776.0, "reward": 0.5167410969734192, "reward_std": 0.22480645775794983, "rewards/verify_math_reward/mean": 0.5167410969734192, "rewards/verify_math_reward/std": 0.4999987483024597, "step": 1250 }, { "clip_ratio/high_max": 0.0013985980440338608, "clip_ratio/high_mean": 0.0003993640550561395, "clip_ratio/low_mean": 0.0003613221728073768, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007606862300235662, "completions/clipped_ratio": 0.010044642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3050.0, "completions/mean_length": 640.7824096679688, "completions/mean_terminated_length": 605.7237548828125, "completions/min_length": 129.0, "completions/min_terminated_length": 129.0, "epoch": 11.690580344123651, "grad_norm": 0.11865234375, "learning_rate": 1e-06, "loss": 0.0049, "num_tokens": 728312045.0, "reward": 0.5111607313156128, "reward_std": 0.20102868974208832, "rewards/verify_math_reward/mean": 0.5111607313156128, "rewards/verify_math_reward/std": 0.5001546144485474, "step": 1251 }, { "clip_ratio/high_max": 0.0018681076990105794, "clip_ratio/high_mean": 0.0005925826236534704, "clip_ratio/low_mean": 0.00031510629719377903, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009076889082280104, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 2492.0, "completions/mean_length": 592.7734375, "completions/mean_terminated_length": 549.2305297851562, "completions/min_length": 79.0, "completions/min_terminated_length": 79.0, "epoch": 11.699912510936134, "grad_norm": 0.1240234375, "learning_rate": 1e-06, "loss": -0.0032, "num_tokens": 728894554.0, "reward": 0.5714285969734192, "reward_std": 0.21365560591220856, "rewards/verify_math_reward/mean": 0.5714285969734192, "rewards/verify_math_reward/std": 0.49514803290367126, "step": 1252 }, { "clip_ratio/high_max": 0.0016980385498754913, "clip_ratio/high_mean": 0.0005554409699470853, "clip_ratio/low_mean": 0.0003138662875699083, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008693072650203248, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3484.0, "completions/mean_length": 629.7288208007812, "completions/mean_terminated_length": 578.6964721679688, "completions/min_length": 77.0, "completions/min_terminated_length": 77.0, "epoch": 11.709244677748615, "grad_norm": 0.125, "learning_rate": 1e-06, "loss": 0.004, "num_tokens": 729492471.0, "reward": 0.5837053656578064, "reward_std": 0.2323242574930191, "rewards/verify_math_reward/mean": 0.5837053656578064, "rewards/verify_math_reward/std": 0.49321895837783813, "step": 1253 }, { "clip_ratio/high_max": 0.0015078579999681097, "clip_ratio/high_mean": 0.0004032950316741335, "clip_ratio/low_mean": 0.000326658735843921, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007299537505787157, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3044.0, "completions/mean_length": 623.310302734375, "completions/mean_terminated_length": 564.1838989257812, "completions/min_length": 133.0, "completions/min_terminated_length": 133.0, "epoch": 11.718576844561097, "grad_norm": 0.12890625, "learning_rate": 1e-06, "loss": 0.0002, "num_tokens": 730079109.0, "reward": 0.5223214626312256, "reward_std": 0.21421831846237183, "rewards/verify_math_reward/mean": 0.5223214030265808, "rewards/verify_math_reward/std": 0.49978047609329224, "step": 1254 }, { "clip_ratio/high_max": 0.0017146104437415488, "clip_ratio/high_mean": 0.0005236497797795892, "clip_ratio/low_mean": 0.0003831217946981269, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009067715918718022, "completions/clipped_ratio": 0.021205357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 4043.0, "completions/mean_length": 661.4944458007812, "completions/mean_terminated_length": 587.086669921875, "completions/min_length": 57.0, "completions/min_terminated_length": 57.0, "epoch": 11.727909011373578, "grad_norm": 0.1298828125, "learning_rate": 1e-06, "loss": -0.0179, "num_tokens": 730689096.0, "reward": 0.551339328289032, "reward_std": 0.23619845509529114, "rewards/verify_math_reward/mean": 0.5513392686843872, "rewards/verify_math_reward/std": 0.4976350665092468, "step": 1255 }, { "clip_ratio/high_max": 0.0017623059266043128, "clip_ratio/high_mean": 0.0005050417953498254, "clip_ratio/low_mean": 0.00023900017436062626, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007440419644808571, "completions/clipped_ratio": 0.0189732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3739.0, "completions/mean_length": 624.341552734375, "completions/mean_terminated_length": 557.1990966796875, "completions/min_length": 126.0, "completions/min_terminated_length": 126.0, "epoch": 11.73724117818606, "grad_norm": 0.1201171875, "learning_rate": 1e-06, "loss": 0.0003, "num_tokens": 731270906.0, "reward": 0.582589328289032, "reward_std": 0.16728995740413666, "rewards/verify_math_reward/mean": 0.5825892686843872, "rewards/verify_math_reward/std": 0.4934072494506836, "step": 1256 }, { "clip_ratio/high_max": 0.0017931064594449708, "clip_ratio/high_mean": 0.0005195500677928067, "clip_ratio/low_mean": 0.0002956146066708243, "clip_ratio/low_min": 1.3975849469716195e-05, "clip_ratio/region_mean": 0.0008151646707119653, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 2304.0, "completions/mean_length": 604.9799194335938, "completions/mean_terminated_length": 565.577880859375, "completions/min_length": 89.0, "completions/min_terminated_length": 89.0, "epoch": 11.746573344998541, "grad_norm": 0.1171875, "learning_rate": 1e-06, "loss": -0.0029, "num_tokens": 731862856.0, "reward": 0.546875, "reward_std": 0.19223180413246155, "rewards/verify_math_reward/mean": 0.546875, "rewards/verify_math_reward/std": 0.4980759024620056, "step": 1257 }, { "clip_ratio/high_max": 0.0016030209390009986, "clip_ratio/high_mean": 0.0004903168048713269, "clip_ratio/low_mean": 0.00039110172519940534, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008814185353003268, "completions/clipped_ratio": 0.0279017857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3955.0, "completions/mean_length": 649.2545166015625, "completions/mean_terminated_length": 550.32373046875, "completions/min_length": 5.0, "completions/min_terminated_length": 5.0, "epoch": 11.755905511811024, "grad_norm": 0.142578125, "learning_rate": 1e-06, "loss": 0.0079, "num_tokens": 732437124.0, "reward": 0.4765625298023224, "reward_std": 0.23281168937683105, "rewards/verify_math_reward/mean": 0.4765625, "rewards/verify_math_reward/std": 0.49972933530807495, "step": 1258 }, { "clip_ratio/high_max": 0.0018444312427163823, "clip_ratio/high_mean": 0.0006008650298099383, "clip_ratio/low_mean": 0.00036294881215326313, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009638138421905751, "completions/clipped_ratio": 0.0279017857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 2702.0, "completions/mean_length": 654.8828125, "completions/mean_terminated_length": 556.1136474609375, "completions/min_length": 141.0, "completions/min_terminated_length": 141.0, "epoch": 11.765237678623505, "grad_norm": 0.1279296875, "learning_rate": 1e-06, "loss": -0.0028, "num_tokens": 733012931.0, "reward": 0.5245535969734192, "reward_std": 0.22263678908348083, "rewards/verify_math_reward/mean": 0.5245535969734192, "rewards/verify_math_reward/std": 0.4996756911277771, "step": 1259 }, { "clip_ratio/high_max": 0.0015400391521325218, "clip_ratio/high_mean": 0.00044522428856907936, "clip_ratio/low_mean": 0.00026891370066550735, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007141379853692342, "completions/clipped_ratio": 0.0033482142857143016, "completions/max_length": 4096.0, "completions/max_terminated_length": 2768.0, "completions/mean_length": 538.982177734375, "completions/mean_terminated_length": 527.032470703125, "completions/min_length": 145.0, "completions/min_terminated_length": 145.0, "epoch": 11.774569845435988, "grad_norm": 0.1240234375, "learning_rate": 1e-06, "loss": 0.0065, "num_tokens": 733567811.0, "reward": 0.609375, "reward_std": 0.18840177357196808, "rewards/verify_math_reward/mean": 0.609375, "rewards/verify_math_reward/std": 0.48816296458244324, "step": 1260 }, { "clip_ratio/high_max": 0.0014853214934191783, "clip_ratio/high_mean": 0.0004668172549600058, "clip_ratio/low_mean": 0.0003691930512559338, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008360102988262952, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 3929.0, "completions/mean_length": 563.6986694335938, "completions/mean_terminated_length": 519.7943725585938, "completions/min_length": 84.0, "completions/min_terminated_length": 84.0, "epoch": 11.783902012248468, "grad_norm": 0.125, "learning_rate": 1e-06, "loss": 0.0162, "num_tokens": 734110885.0, "reward": 0.598214328289032, "reward_std": 0.19276243448257446, "rewards/verify_math_reward/mean": 0.5982142686843872, "rewards/verify_math_reward/std": 0.49053287506103516, "step": 1261 }, { "clip_ratio/high_max": 0.0015220143450278556, "clip_ratio/high_mean": 0.0004234905613884621, "clip_ratio/low_mean": 0.00032890618831515894, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007523967351517058, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3383.0, "completions/mean_length": 596.0892944335938, "completions/mean_terminated_length": 536.4994506835938, "completions/min_length": 122.0, "completions/min_terminated_length": 122.0, "epoch": 11.793234179060951, "grad_norm": 0.1376953125, "learning_rate": 1e-06, "loss": -0.0105, "num_tokens": 734669845.0, "reward": 0.6037946939468384, "reward_std": 0.1965906322002411, "rewards/verify_math_reward/mean": 0.6037946343421936, "rewards/verify_math_reward/std": 0.48938122391700745, "step": 1262 }, { "clip_ratio/high_max": 0.0017434260353184072, "clip_ratio/high_mean": 0.00047081311981855833, "clip_ratio/low_mean": 0.00035179688006792276, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008226099962485023, "completions/clipped_ratio": 0.0200892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3485.0, "completions/mean_length": 663.1283569335938, "completions/mean_terminated_length": 592.7506103515625, "completions/min_length": 7.0, "completions/min_terminated_length": 7.0, "epoch": 11.802566345873432, "grad_norm": 0.1181640625, "learning_rate": 1e-06, "loss": 0.0008, "num_tokens": 735276000.0, "reward": 0.5167410969734192, "reward_std": 0.20880597829818726, "rewards/verify_math_reward/mean": 0.5167410969734192, "rewards/verify_math_reward/std": 0.4999987483024597, "step": 1263 }, { "clip_ratio/high_max": 0.0015636055650247727, "clip_ratio/high_mean": 0.00051671839150913, "clip_ratio/low_mean": 0.0003458441292423231, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008625625368949841, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 2872.0, "completions/mean_length": 615.0592041015625, "completions/mean_terminated_length": 563.8108520507812, "completions/min_length": 75.0, "completions/min_terminated_length": 75.0, "epoch": 11.811898512685914, "grad_norm": 0.1357421875, "learning_rate": 1e-06, "loss": 0.0175, "num_tokens": 735860925.0, "reward": 0.5892857313156128, "reward_std": 0.24029332399368286, "rewards/verify_math_reward/mean": 0.5892857313156128, "rewards/verify_math_reward/std": 0.49223825335502625, "step": 1264 }, { "clip_ratio/high_max": 0.0014206711366568925, "clip_ratio/high_mean": 0.0003940347921798093, "clip_ratio/low_mean": 0.00027018483649499103, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.00066421962401364, "completions/clipped_ratio": 0.0189732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3907.0, "completions/mean_length": 659.5256958007812, "completions/mean_terminated_length": 593.063720703125, "completions/min_length": 149.0, "completions/min_terminated_length": 149.0, "epoch": 11.821230679498395, "grad_norm": 0.12109375, "learning_rate": 1e-06, "loss": 0.002, "num_tokens": 736470892.0, "reward": 0.559151828289032, "reward_std": 0.19114412367343903, "rewards/verify_math_reward/mean": 0.5591517686843872, "rewards/verify_math_reward/std": 0.496766060590744, "step": 1265 }, { "clip_ratio/high_max": 0.0014103773528404417, "clip_ratio/high_mean": 0.00042180658181223407, "clip_ratio/low_mean": 0.0004090827937943686, "clip_ratio/low_min": 1.2098334991605952e-05, "clip_ratio/region_mean": 0.0008308893839057419, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3368.0, "completions/mean_length": 632.0926513671875, "completions/mean_terminated_length": 581.0950927734375, "completions/min_length": 135.0, "completions/min_terminated_length": 135.0, "epoch": 11.830562846310878, "grad_norm": 0.130859375, "learning_rate": 1e-06, "loss": 0.0104, "num_tokens": 737067039.0, "reward": 0.5, "reward_std": 0.22218288481235504, "rewards/verify_math_reward/mean": 0.5, "rewards/verify_math_reward/std": 0.5002792477607727, "step": 1266 }, { "clip_ratio/high_max": 0.0014555100688085076, "clip_ratio/high_mean": 0.0004522725402011929, "clip_ratio/low_mean": 0.0003380375752612963, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007903101081865316, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3823.0, "completions/mean_length": 610.6652221679688, "completions/mean_terminated_length": 559.3521728515625, "completions/min_length": 99.0, "completions/min_terminated_length": 99.0, "epoch": 11.83989501312336, "grad_norm": 0.1181640625, "learning_rate": 1e-06, "loss": -0.0003, "num_tokens": 737653339.0, "reward": 0.5758928656578064, "reward_std": 0.202602818608284, "rewards/verify_math_reward/mean": 0.5758928656578064, "rewards/verify_math_reward/std": 0.49448272585868835, "step": 1267 }, { "clip_ratio/high_max": 0.001670739199653326, "clip_ratio/high_mean": 0.0004310596360710406, "clip_ratio/low_mean": 0.00029482788818313566, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007258875193656422, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3527.0, "completions/mean_length": 591.5324096679688, "completions/mean_terminated_length": 531.8649291992188, "completions/min_length": 93.0, "completions/min_terminated_length": 93.0, "epoch": 11.849227179935841, "grad_norm": 0.1259765625, "learning_rate": 1e-06, "loss": -0.0006, "num_tokens": 738224960.0, "reward": 0.5446428656578064, "reward_std": 0.18261782824993134, "rewards/verify_math_reward/mean": 0.5446428656578064, "rewards/verify_math_reward/std": 0.49828118085861206, "step": 1268 }, { "clip_ratio/high_max": 0.0018886373818531865, "clip_ratio/high_mean": 0.0006127497381385183, "clip_ratio/low_mean": 0.0003710556979967805, "clip_ratio/low_min": 1.0192433364863973e-05, "clip_ratio/region_mean": 0.0009838054302235832, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3482.0, "completions/mean_length": 588.2098388671875, "completions/mean_terminated_length": 532.5306396484375, "completions/min_length": 120.0, "completions/min_terminated_length": 120.0, "epoch": 11.858559346748324, "grad_norm": 0.140625, "learning_rate": 1e-06, "loss": -0.0106, "num_tokens": 738782620.0, "reward": 0.5725446939468384, "reward_std": 0.23953697085380554, "rewards/verify_math_reward/mean": 0.5725446343421936, "rewards/verify_math_reward/std": 0.49498558044433594, "step": 1269 }, { "clip_ratio/high_max": 0.001498469925536483, "clip_ratio/high_mean": 0.00044745506534127344, "clip_ratio/low_mean": 0.0004049732694966224, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008524283375663799, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3885.0, "completions/mean_length": 602.6975708007812, "completions/mean_terminated_length": 539.1829223632812, "completions/min_length": 144.0, "completions/min_terminated_length": 144.0, "epoch": 11.867891513560805, "grad_norm": 0.140625, "learning_rate": 1e-06, "loss": 0.0055, "num_tokens": 739348637.0, "reward": 0.5770089626312256, "reward_std": 0.21538014709949493, "rewards/verify_math_reward/mean": 0.5770089030265808, "rewards/verify_math_reward/std": 0.4943099319934845, "step": 1270 }, { "clip_ratio/high_max": 0.0013045662499280297, "clip_ratio/high_mean": 0.0003715952520906285, "clip_ratio/low_mean": 0.0002883205274883949, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0006599157804885181, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 2819.0, "completions/mean_length": 599.9955444335938, "completions/mean_terminated_length": 536.4318237304688, "completions/min_length": 136.0, "completions/min_terminated_length": 136.0, "epoch": 11.877223680373287, "grad_norm": 0.1162109375, "learning_rate": 1e-06, "loss": 0.0115, "num_tokens": 739917097.0, "reward": 0.6037946939468384, "reward_std": 0.18438448011875153, "rewards/verify_math_reward/mean": 0.6037946343421936, "rewards/verify_math_reward/std": 0.48938122391700745, "step": 1271 }, { "clip_ratio/high_max": 0.001831248866437818, "clip_ratio/high_mean": 0.0005290896176575188, "clip_ratio/low_mean": 0.0003391159937109478, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008682056013640249, "completions/clipped_ratio": 0.006696428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3494.0, "completions/mean_length": 567.4453125, "completions/mean_terminated_length": 543.6572875976562, "completions/min_length": 126.0, "completions/min_terminated_length": 126.0, "epoch": 11.886555847185768, "grad_norm": 0.126953125, "learning_rate": 1e-06, "loss": 0.0005, "num_tokens": 740496048.0, "reward": 0.5647321939468384, "reward_std": 0.19197219610214233, "rewards/verify_math_reward/mean": 0.5647321343421936, "rewards/verify_math_reward/std": 0.49606892466545105, "step": 1272 }, { "clip_ratio/high_max": 0.0017426248832634883, "clip_ratio/high_mean": 0.0005477522827277426, "clip_ratio/low_mean": 0.0002948331142533789, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008425853984590503, "completions/clipped_ratio": 0.0189732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 2761.0, "completions/mean_length": 639.9486694335938, "completions/mean_terminated_length": 573.1080322265625, "completions/min_length": 115.0, "completions/min_terminated_length": 115.0, "epoch": 11.89588801399825, "grad_norm": 0.12158203125, "learning_rate": 1e-06, "loss": -0.0177, "num_tokens": 741088490.0, "reward": 0.566964328289032, "reward_std": 0.19956304132938385, "rewards/verify_math_reward/mean": 0.5669642686843872, "rewards/verify_math_reward/std": 0.49577224254608154, "step": 1273 }, { "clip_ratio/high_max": 0.0013651914941874566, "clip_ratio/high_mean": 0.0004101007530152856, "clip_ratio/low_mean": 0.0003461956030150759, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007562963492091512, "completions/clipped_ratio": 0.010044642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3307.0, "completions/mean_length": 638.1551513671875, "completions/mean_terminated_length": 603.0698852539062, "completions/min_length": 144.0, "completions/min_terminated_length": 144.0, "epoch": 11.905220180810732, "grad_norm": 0.12353515625, "learning_rate": 1e-06, "loss": 0.0095, "num_tokens": 741712741.0, "reward": 0.5178571939468384, "reward_std": 0.19681887328624725, "rewards/verify_math_reward/mean": 0.5178571343421936, "rewards/verify_math_reward/std": 0.4999600946903229, "step": 1274 }, { "clip_ratio/high_max": 0.001752853102516383, "clip_ratio/high_mean": 0.0004983964163329802, "clip_ratio/low_mean": 0.00033527855714510224, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008336749533555121, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 2770.0, "completions/mean_length": 657.325927734375, "completions/mean_terminated_length": 606.6998901367188, "completions/min_length": 111.0, "completions/min_terminated_length": 111.0, "epoch": 11.914552347623214, "grad_norm": 0.119140625, "learning_rate": 1e-06, "loss": 0.0131, "num_tokens": 742339633.0, "reward": 0.5267857313156128, "reward_std": 0.2151198536157608, "rewards/verify_math_reward/mean": 0.5267857313156128, "rewards/verify_math_reward/std": 0.4995608627796173, "step": 1275 }, { "clip_ratio/high_max": 0.0017284796349485987, "clip_ratio/high_mean": 0.0005174747725504858, "clip_ratio/low_mean": 0.0004058257002270693, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009233004866473493, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 3381.0, "completions/mean_length": 616.536865234375, "completions/mean_terminated_length": 577.2652587890625, "completions/min_length": 14.0, "completions/min_terminated_length": 14.0, "epoch": 11.923884514435695, "grad_norm": 0.134765625, "learning_rate": 1e-06, "loss": 0.0022, "num_tokens": 742942770.0, "reward": 0.4977678656578064, "reward_std": 0.2377803474664688, "rewards/verify_math_reward/mean": 0.4977678656578064, "rewards/verify_math_reward/std": 0.5002743005752563, "step": 1276 }, { "clip_ratio/high_max": 0.001540876210128772, "clip_ratio/high_mean": 0.00041743938800209435, "clip_ratio/low_mean": 0.0003925449013877369, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008099842816591263, "completions/clipped_ratio": 0.008928571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 2496.0, "completions/mean_length": 594.8828125, "completions/mean_terminated_length": 563.3412475585938, "completions/min_length": 126.0, "completions/min_terminated_length": 126.0, "epoch": 11.933216681248178, "grad_norm": 0.1259765625, "learning_rate": 1e-06, "loss": 0.01, "num_tokens": 743533489.0, "reward": 0.5792410969734192, "reward_std": 0.21436099708080292, "rewards/verify_math_reward/mean": 0.5792410969734192, "rewards/verify_math_reward/std": 0.49395665526390076, "step": 1277 }, { "clip_ratio/high_max": 0.001457372645745636, "clip_ratio/high_mean": 0.00048528808770242904, "clip_ratio/low_mean": 0.0004092531180504011, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008945412091634353, "completions/clipped_ratio": 0.0189732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3916.0, "completions/mean_length": 647.7600708007812, "completions/mean_terminated_length": 581.0704956054688, "completions/min_length": 140.0, "completions/min_terminated_length": 140.0, "epoch": 11.942548848060659, "grad_norm": 0.134765625, "learning_rate": 1e-06, "loss": -0.0072, "num_tokens": 744128266.0, "reward": 0.5770089626312256, "reward_std": 0.2130589634180069, "rewards/verify_math_reward/mean": 0.5770089030265808, "rewards/verify_math_reward/std": 0.4943099319934845, "step": 1278 }, { "clip_ratio/high_max": 0.0012613602284545777, "clip_ratio/high_mean": 0.00037037625725133694, "clip_ratio/low_mean": 0.000281501633935477, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0006518779018733767, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 2290.0, "completions/mean_length": 628.4553833007812, "completions/mean_terminated_length": 573.4149780273438, "completions/min_length": 119.0, "completions/min_terminated_length": 119.0, "epoch": 11.951881014873141, "grad_norm": 0.11865234375, "learning_rate": 1e-06, "loss": 0.0002, "num_tokens": 744715634.0, "reward": 0.5602678656578064, "reward_std": 0.18283648788928986, "rewards/verify_math_reward/mean": 0.5602678656578064, "rewards/verify_math_reward/std": 0.4966317415237427, "step": 1279 }, { "clip_ratio/high_max": 0.0018274919157192926, "clip_ratio/high_mean": 0.0005502146226490368, "clip_ratio/low_mean": 0.00035680173800756165, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009070163514479646, "completions/clipped_ratio": 0.010044642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3529.0, "completions/mean_length": 551.4096069335938, "completions/mean_terminated_length": 515.4441528320312, "completions/min_length": 16.0, "completions/min_terminated_length": 16.0, "epoch": 11.961213181685622, "grad_norm": 0.14453125, "learning_rate": 1e-06, "loss": 0.0034, "num_tokens": 745266617.0, "reward": 0.5837053656578064, "reward_std": 0.24633900821208954, "rewards/verify_math_reward/mean": 0.5837053656578064, "rewards/verify_math_reward/std": 0.49321895837783813, "step": 1280 }, { "clip_ratio/high_max": 0.0017539462169224862, "clip_ratio/high_mean": 0.0006478564841927437, "clip_ratio/low_mean": 0.00044828502723248675, "clip_ratio/low_min": 1.151861397374887e-05, "clip_ratio/region_mean": 0.0010961415009660413, "completions/clipped_ratio": 0.008928571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3247.0, "completions/mean_length": 575.724365234375, "completions/mean_terminated_length": 544.0101318359375, "completions/min_length": 87.0, "completions/min_terminated_length": 87.0, "epoch": 11.970545348498105, "grad_norm": 0.14453125, "learning_rate": 1e-06, "loss": 0.0009, "num_tokens": 745841866.0, "reward": 0.5837053656578064, "reward_std": 0.25254228711128235, "rewards/verify_math_reward/mean": 0.5837053656578064, "rewards/verify_math_reward/std": 0.49321892857551575, "step": 1281 }, { "clip_ratio/high_max": 0.0018160486351916916, "clip_ratio/high_mean": 0.000605407185275908, "clip_ratio/low_mean": 0.0003585709459912323, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.000963978141953703, "completions/clipped_ratio": 0.008928571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 2837.0, "completions/mean_length": 601.1495971679688, "completions/mean_terminated_length": 569.6644287109375, "completions/min_length": 85.0, "completions/min_terminated_length": 85.0, "epoch": 11.979877515310585, "grad_norm": 0.130859375, "learning_rate": 1e-06, "loss": 0.0011, "num_tokens": 746441256.0, "reward": 0.559151828289032, "reward_std": 0.23684489727020264, "rewards/verify_math_reward/mean": 0.5591517686843872, "rewards/verify_math_reward/std": 0.496766060590744, "step": 1282 }, { "clip_ratio/high_max": 0.001632143199458369, "clip_ratio/high_mean": 0.0004890562947821309, "clip_ratio/low_mean": 0.0003405987251881015, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008296550331579056, "completions/clipped_ratio": 0.022321428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 2619.0, "completions/mean_length": 654.8817138671875, "completions/mean_terminated_length": 576.3173217773438, "completions/min_length": 125.0, "completions/min_terminated_length": 125.0, "epoch": 11.989209682123068, "grad_norm": 0.1181640625, "learning_rate": 1e-06, "loss": 0.0027, "num_tokens": 747039302.0, "reward": 0.546875, "reward_std": 0.2061375081539154, "rewards/verify_math_reward/mean": 0.546875, "rewards/verify_math_reward/std": 0.4980759024620056, "step": 1283 }, { "clip_ratio/high_max": 0.0014544960076818825, "clip_ratio/high_mean": 0.0004236216881281507, "clip_ratio/low_mean": 0.00033478742693660024, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.000758409119953285, "completions/clipped_ratio": 0.014204545454545414, "completions/max_length": 4096.0, "completions/max_terminated_length": 2703.0, "completions/mean_length": 622.5767211914062, "completions/mean_terminated_length": 572.52734375, "completions/min_length": 139.0, "completions/min_terminated_length": 139.0, "epoch": 11.998541848935549, "grad_norm": 0.11767578125, "learning_rate": 1e-06, "loss": 0.0118, "num_tokens": 747607454.0, "reward": 0.5915178656578064, "reward_std": 0.1876882165670395, "rewards/verify_math_reward/mean": 0.5915178656578064, "rewards/verify_math_reward/std": 0.49182769656181335, "step": 1284 }, { "clip_ratio/high_max": 0.002111249454173958, "clip_ratio/high_mean": 0.0005780745918855246, "clip_ratio/low_mean": 0.0002785197034427256, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.000856594292599766, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3269.0, "completions/mean_length": 585.9765625, "completions/mean_terminated_length": 522.157958984375, "completions/min_length": 44.0, "completions/min_terminated_length": 44.0, "epoch": 12.009332166812483, "grad_norm": 0.1318359375, "learning_rate": 1e-06, "loss": -0.0036, "num_tokens": 748154449.0, "reward": 0.5379464626312256, "reward_std": 0.19122152030467987, "rewards/verify_math_reward/mean": 0.5379464030265808, "rewards/verify_math_reward/std": 0.4988364577293396, "step": 1285 }, { "clip_ratio/high_max": 0.0018276780647283886, "clip_ratio/high_mean": 0.0005416600556600315, "clip_ratio/low_mean": 0.0004022405148589314, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009439005571039161, "completions/clipped_ratio": 0.006696428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 2381.0, "completions/mean_length": 549.6217041015625, "completions/mean_terminated_length": 525.7135009765625, "completions/min_length": 155.0, "completions/min_terminated_length": 155.0, "epoch": 12.018664333624963, "grad_norm": 0.12890625, "learning_rate": 1e-06, "loss": -0.0023, "num_tokens": 748710414.0, "reward": 0.559151828289032, "reward_std": 0.2138826996088028, "rewards/verify_math_reward/mean": 0.5591517686843872, "rewards/verify_math_reward/std": 0.496766060590744, "step": 1286 }, { "clip_ratio/high_max": 0.0016587318723395583, "clip_ratio/high_mean": 0.0004891278510967823, "clip_ratio/low_mean": 0.00039340282819466665, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008825306913422537, "completions/clipped_ratio": 0.006696428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 2889.0, "completions/mean_length": 567.2589721679688, "completions/mean_terminated_length": 543.4696655273438, "completions/min_length": 113.0, "completions/min_terminated_length": 113.0, "epoch": 12.027996500437446, "grad_norm": 0.12060546875, "learning_rate": 1e-06, "loss": 0.0112, "num_tokens": 749285470.0, "reward": 0.582589328289032, "reward_std": 0.208912655711174, "rewards/verify_math_reward/mean": 0.5825892686843872, "rewards/verify_math_reward/std": 0.4934072494506836, "step": 1287 }, { "clip_ratio/high_max": 0.0017563361643624376, "clip_ratio/high_mean": 0.000560080104833105, "clip_ratio/low_mean": 0.00037574479779323156, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009358249071738101, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 3690.0, "completions/mean_length": 595.8671875, "completions/mean_terminated_length": 556.3623046875, "completions/min_length": 115.0, "completions/min_terminated_length": 115.0, "epoch": 12.037328667249927, "grad_norm": 0.12890625, "learning_rate": 1e-06, "loss": -0.008, "num_tokens": 749866727.0, "reward": 0.6049107313156128, "reward_std": 0.2463369071483612, "rewards/verify_math_reward/mean": 0.6049107313156128, "rewards/verify_math_reward/std": 0.48914292454719543, "step": 1288 }, { "clip_ratio/high_max": 0.0015858357965043979, "clip_ratio/high_mean": 0.0004953791228672344, "clip_ratio/low_mean": 0.0003331395057557529, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008285186449938919, "completions/clipped_ratio": 0.0267857142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 2684.0, "completions/mean_length": 642.193115234375, "completions/mean_terminated_length": 547.1341552734375, "completions/min_length": 129.0, "completions/min_terminated_length": 129.0, "epoch": 12.04666083406241, "grad_norm": 0.1279296875, "learning_rate": 1e-06, "loss": -0.0013, "num_tokens": 750421764.0, "reward": 0.6194196939468384, "reward_std": 0.22713902592658997, "rewards/verify_math_reward/mean": 0.6194196343421936, "rewards/verify_math_reward/std": 0.48580074310302734, "step": 1289 }, { "clip_ratio/high_max": 0.0016000282575987512, "clip_ratio/high_mean": 0.0004811051303477143, "clip_ratio/low_mean": 0.00029532301141443895, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007764281344861956, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 3579.0, "completions/mean_length": 601.4788208007812, "completions/mean_terminated_length": 558.0440673828125, "completions/min_length": 123.0, "completions/min_terminated_length": 123.0, "epoch": 12.05599300087489, "grad_norm": 0.11865234375, "learning_rate": 1e-06, "loss": 0.0007, "num_tokens": 751002225.0, "reward": 0.5625, "reward_std": 0.21350222826004028, "rewards/verify_math_reward/mean": 0.5625, "rewards/verify_math_reward/std": 0.49635544419288635, "step": 1290 }, { "clip_ratio/high_max": 0.0014871957937430125, "clip_ratio/high_mean": 0.00042845377947742236, "clip_ratio/low_mean": 0.0002470025140155485, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0006754562973583234, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3640.0, "completions/mean_length": 605.4420166015625, "completions/mean_terminated_length": 550.0363159179688, "completions/min_length": 67.0, "completions/min_terminated_length": 67.0, "epoch": 12.065325167687373, "grad_norm": 0.1103515625, "learning_rate": 1e-06, "loss": -0.0068, "num_tokens": 751577965.0, "reward": 0.5412946939468384, "reward_std": 0.18039585649967194, "rewards/verify_math_reward/mean": 0.5412946343421936, "rewards/verify_math_reward/std": 0.49857014417648315, "step": 1291 }, { "clip_ratio/high_max": 0.0017766085093171569, "clip_ratio/high_mean": 0.0006102052429923788, "clip_ratio/low_mean": 0.00036779468780423485, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009779999272723217, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3753.0, "completions/mean_length": 617.357177734375, "completions/mean_terminated_length": 570.1357421875, "completions/min_length": 116.0, "completions/min_terminated_length": 116.0, "epoch": 12.074657334499854, "grad_norm": 0.12890625, "learning_rate": 1e-06, "loss": 0.0096, "num_tokens": 752176501.0, "reward": 0.5725446939468384, "reward_std": 0.23149938881397247, "rewards/verify_math_reward/mean": 0.5725446343421936, "rewards/verify_math_reward/std": 0.49498558044433594, "step": 1292 }, { "clip_ratio/high_max": 0.0015194485486063058, "clip_ratio/high_mean": 0.0004642161998162919, "clip_ratio/low_mean": 0.0003009372112501296, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007651534215256106, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 2347.0, "completions/mean_length": 644.2957763671875, "completions/mean_terminated_length": 601.3932495117188, "completions/min_length": 114.0, "completions/min_terminated_length": 114.0, "epoch": 12.083989501312336, "grad_norm": 0.119140625, "learning_rate": 1e-06, "loss": 0.0115, "num_tokens": 752798990.0, "reward": 0.5446428656578064, "reward_std": 0.19223181903362274, "rewards/verify_math_reward/mean": 0.5446428656578064, "rewards/verify_math_reward/std": 0.49828118085861206, "step": 1293 }, { "clip_ratio/high_max": 0.0015338030989369145, "clip_ratio/high_mean": 0.0004702502242253104, "clip_ratio/low_mean": 0.00039518083610801114, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008654310668134713, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3720.0, "completions/mean_length": 608.8203125, "completions/mean_terminated_length": 561.4830322265625, "completions/min_length": 96.0, "completions/min_terminated_length": 96.0, "epoch": 12.093321668124817, "grad_norm": 0.1328125, "learning_rate": 1e-06, "loss": 0.006, "num_tokens": 753389789.0, "reward": 0.5, "reward_std": 0.22748348116874695, "rewards/verify_math_reward/mean": 0.5, "rewards/verify_math_reward/std": 0.5002792477607727, "step": 1294 }, { "clip_ratio/high_max": 0.0016932627568166936, "clip_ratio/high_mean": 0.0005789731208096782, "clip_ratio/low_mean": 0.0003626756431458489, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009416487591806799, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 3856.0, "completions/mean_length": 647.4475708007812, "completions/mean_terminated_length": 608.5248413085938, "completions/min_length": 117.0, "completions/min_terminated_length": 117.0, "epoch": 12.1026538349373, "grad_norm": 0.150390625, "learning_rate": 1e-06, "loss": 0.0029, "num_tokens": 754021646.0, "reward": 0.5546875, "reward_std": 0.26287147402763367, "rewards/verify_math_reward/mean": 0.5546875, "rewards/verify_math_reward/std": 0.4972778558731079, "step": 1295 }, { "clip_ratio/high_max": 0.002040789042439428, "clip_ratio/high_mean": 0.0006191360489538056, "clip_ratio/low_mean": 0.0003115602083880731, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009306962592745549, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3324.0, "completions/mean_length": 596.380615234375, "completions/mean_terminated_length": 544.8572998046875, "completions/min_length": 78.0, "completions/min_terminated_length": 78.0, "epoch": 12.11198600174978, "grad_norm": 0.13671875, "learning_rate": 1e-06, "loss": 0.0018, "num_tokens": 754585835.0, "reward": 0.5424107313156128, "reward_std": 0.2160520702600479, "rewards/verify_math_reward/mean": 0.5424107313156128, "rewards/verify_math_reward/std": 0.4984763264656067, "step": 1296 }, { "clip_ratio/high_max": 0.001216744318298879, "clip_ratio/high_mean": 0.00034977001041625044, "clip_ratio/low_mean": 0.0003800091785706172, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007297791808014154, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 3509.0, "completions/mean_length": 598.8895263671875, "completions/mean_terminated_length": 559.4187622070312, "completions/min_length": 115.0, "completions/min_terminated_length": 115.0, "epoch": 12.121318168562263, "grad_norm": 0.1279296875, "learning_rate": 1e-06, "loss": 0.003, "num_tokens": 755167584.0, "reward": 0.5379464626312256, "reward_std": 0.22683270275592804, "rewards/verify_math_reward/mean": 0.5379464030265808, "rewards/verify_math_reward/std": 0.4988364577293396, "step": 1297 }, { "clip_ratio/high_max": 0.001482685913288151, "clip_ratio/high_mean": 0.0004471595132145012, "clip_ratio/low_mean": 0.0003107851521235716, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007579446782983723, "completions/clipped_ratio": 0.0200892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 2543.0, "completions/mean_length": 651.341552734375, "completions/mean_terminated_length": 580.7221069335938, "completions/min_length": 135.0, "completions/min_terminated_length": 135.0, "epoch": 12.130650335374744, "grad_norm": 0.12451171875, "learning_rate": 1e-06, "loss": 0.0093, "num_tokens": 755770994.0, "reward": 0.5212053656578064, "reward_std": 0.19340254366397858, "rewards/verify_math_reward/mean": 0.5212053656578064, "rewards/verify_math_reward/std": 0.49982914328575134, "step": 1298 }, { "clip_ratio/high_max": 0.0015878854601396597, "clip_ratio/high_mean": 0.00045524305164690304, "clip_ratio/low_mean": 0.0003832767840776796, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008385198325413512, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3743.0, "completions/mean_length": 604.0859375, "completions/mean_terminated_length": 544.6322631835938, "completions/min_length": 145.0, "completions/min_terminated_length": 145.0, "epoch": 12.139982502187227, "grad_norm": 0.1357421875, "learning_rate": 1e-06, "loss": 0.0019, "num_tokens": 756336135.0, "reward": 0.578125, "reward_std": 0.2112463265657425, "rewards/verify_math_reward/mean": 0.578125, "rewards/verify_math_reward/std": 0.4941346049308777, "step": 1299 }, { "clip_ratio/high_max": 0.0016629375404590974, "clip_ratio/high_mean": 0.0004791853848473693, "clip_ratio/low_mean": 0.0003064827004664039, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007856680845179653, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 3746.0, "completions/mean_length": 649.060302734375, "completions/mean_terminated_length": 606.2169799804688, "completions/min_length": 128.0, "completions/min_terminated_length": 128.0, "epoch": 12.149314668999708, "grad_norm": 0.119140625, "learning_rate": 1e-06, "loss": -0.0057, "num_tokens": 756972629.0, "reward": 0.5078125, "reward_std": 0.1943693608045578, "rewards/verify_math_reward/mean": 0.5078125, "rewards/verify_math_reward/std": 0.5002182126045227, "step": 1300 }, { "clip_ratio/high_max": 0.0019123325018881587, "clip_ratio/high_mean": 0.0005961817856814378, "clip_ratio/low_mean": 0.00030001878246821434, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008962005608736945, "completions/clipped_ratio": 0.0078125, "completions/max_length": 4096.0, "completions/max_terminated_length": 3020.0, "completions/mean_length": 540.4152221679688, "completions/mean_terminated_length": 512.41845703125, "completions/min_length": 40.0, "completions/min_terminated_length": 40.0, "epoch": 12.15864683581219, "grad_norm": 0.1318359375, "learning_rate": 1e-06, "loss": -0.0005, "num_tokens": 757512697.0, "reward": 0.6283482313156128, "reward_std": 0.20339517295360565, "rewards/verify_math_reward/mean": 0.6283482313156128, "rewards/verify_math_reward/std": 0.4835159480571747, "step": 1301 }, { "clip_ratio/high_max": 0.0014530583339364966, "clip_ratio/high_mean": 0.0004249860510299186, "clip_ratio/low_mean": 0.00036164326274956693, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007866293171900907, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 4011.0, "completions/mean_length": 637.5736694335938, "completions/mean_terminated_length": 574.6931762695312, "completions/min_length": 51.0, "completions/min_terminated_length": 51.0, "epoch": 12.167979002624673, "grad_norm": 0.12060546875, "learning_rate": 1e-06, "loss": -0.009, "num_tokens": 758108867.0, "reward": 0.5424107313156128, "reward_std": 0.2099343240261078, "rewards/verify_math_reward/mean": 0.5424107313156128, "rewards/verify_math_reward/std": 0.4984763264656067, "step": 1302 }, { "clip_ratio/high_max": 0.00189531694559264, "clip_ratio/high_mean": 0.0006254707104744739, "clip_ratio/low_mean": 0.0002795308180338907, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009050015250977594, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3845.0, "completions/mean_length": 591.786865234375, "completions/mean_terminated_length": 544.2183227539062, "completions/min_length": 104.0, "completions/min_terminated_length": 104.0, "epoch": 12.177311169437154, "grad_norm": 0.12890625, "learning_rate": 1e-06, "loss": -0.0054, "num_tokens": 758672668.0, "reward": 0.6049107313156128, "reward_std": 0.21684511005878448, "rewards/verify_math_reward/mean": 0.6049107313156128, "rewards/verify_math_reward/std": 0.48914292454719543, "step": 1303 }, { "clip_ratio/high_max": 0.0017920883110491559, "clip_ratio/high_mean": 0.0005238584753897157, "clip_ratio/low_mean": 0.0002807907675332899, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008046492453104293, "completions/clipped_ratio": 0.0189732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3315.0, "completions/mean_length": 596.279052734375, "completions/mean_terminated_length": 528.5938110351562, "completions/min_length": 115.0, "completions/min_terminated_length": 115.0, "epoch": 12.186643336249636, "grad_norm": 0.1279296875, "learning_rate": 1e-06, "loss": 0.0074, "num_tokens": 759216982.0, "reward": 0.6116071939468384, "reward_std": 0.1867866963148117, "rewards/verify_math_reward/mean": 0.6116071343421936, "rewards/verify_math_reward/std": 0.48765692114830017, "step": 1304 }, { "clip_ratio/high_max": 0.001743438082485227, "clip_ratio/high_mean": 0.00048671018521417864, "clip_ratio/low_mean": 0.0002289760147959896, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007156861984185525, "completions/clipped_ratio": 0.005580357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 2576.0, "completions/mean_length": 597.5379638671875, "completions/mean_terminated_length": 577.90576171875, "completions/min_length": 47.0, "completions/min_terminated_length": 47.0, "epoch": 12.195975503062117, "grad_norm": 0.11572265625, "learning_rate": 1e-06, "loss": 0.0017, "num_tokens": 759822592.0, "reward": 0.5792410969734192, "reward_std": 0.17652486264705658, "rewards/verify_math_reward/mean": 0.5792410969734192, "rewards/verify_math_reward/std": 0.49395665526390076, "step": 1305 }, { "clip_ratio/high_max": 0.0018792225855577271, "clip_ratio/high_mean": 0.0005498812547557463, "clip_ratio/low_mean": 0.0003889644242462964, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009388456892338581, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3961.0, "completions/mean_length": 607.3058471679688, "completions/mean_terminated_length": 551.9296875, "completions/min_length": 78.0, "completions/min_terminated_length": 78.0, "epoch": 12.2053076698746, "grad_norm": 0.1298828125, "learning_rate": 1e-06, "loss": -0.0052, "num_tokens": 760395762.0, "reward": 0.5290178656578064, "reward_std": 0.22710372507572174, "rewards/verify_math_reward/mean": 0.5290178656578064, "rewards/verify_math_reward/std": 0.49943602085113525, "step": 1306 }, { "clip_ratio/high_max": 0.001504525847849436, "clip_ratio/high_mean": 0.0004240454372848035, "clip_ratio/low_mean": 0.0002640022747755211, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0006880477149024955, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3016.0, "completions/mean_length": 621.7600708007812, "completions/mean_terminated_length": 574.5984497070312, "completions/min_length": 137.0, "completions/min_terminated_length": 137.0, "epoch": 12.21463983668708, "grad_norm": 0.1201171875, "learning_rate": 1e-06, "loss": 0.0016, "num_tokens": 760992355.0, "reward": 0.566964328289032, "reward_std": 0.2045213133096695, "rewards/verify_math_reward/mean": 0.5669642686843872, "rewards/verify_math_reward/std": 0.49577224254608154, "step": 1307 }, { "clip_ratio/high_max": 0.0015156005474636913, "clip_ratio/high_mean": 0.00044573200568720495, "clip_ratio/low_mean": 0.00032972581243484456, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007754578136882628, "completions/clipped_ratio": 0.024553571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3881.0, "completions/mean_length": 675.0022583007812, "completions/mean_terminated_length": 588.89013671875, "completions/min_length": 164.0, "completions/min_terminated_length": 164.0, "epoch": 12.223972003499563, "grad_norm": 0.12353515625, "learning_rate": 1e-06, "loss": 0.0071, "num_tokens": 761592933.0, "reward": 0.543526828289032, "reward_std": 0.19227346777915955, "rewards/verify_math_reward/mean": 0.5435267686843872, "rewards/verify_math_reward/std": 0.49838000535964966, "step": 1308 }, { "clip_ratio/high_max": 0.0014195018447935581, "clip_ratio/high_mean": 0.0003841636016659322, "clip_ratio/low_mean": 0.00029450061424540763, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0006786642152292188, "completions/clipped_ratio": 0.030133928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3192.0, "completions/mean_length": 670.6998291015625, "completions/mean_terminated_length": 564.2750244140625, "completions/min_length": 86.0, "completions/min_terminated_length": 86.0, "epoch": 12.233304170312044, "grad_norm": 0.12060546875, "learning_rate": 1e-06, "loss": -0.0166, "num_tokens": 762170096.0, "reward": 0.5167410969734192, "reward_std": 0.18850985169410706, "rewards/verify_math_reward/mean": 0.5167410969734192, "rewards/verify_math_reward/std": 0.4999987483024597, "step": 1309 }, { "clip_ratio/high_max": 0.0015460189688383252, "clip_ratio/high_mean": 0.00041675835473142797, "clip_ratio/low_mean": 0.0002649051477874309, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0006816635018367379, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 2552.0, "completions/mean_length": 591.8449096679688, "completions/mean_terminated_length": 548.2904052734375, "completions/min_length": 130.0, "completions/min_terminated_length": 130.0, "epoch": 12.242636337124527, "grad_norm": 0.10986328125, "learning_rate": 1e-06, "loss": -0.0024, "num_tokens": 762742605.0, "reward": 0.5691964626312256, "reward_std": 0.17934276163578033, "rewards/verify_math_reward/mean": 0.5691964030265808, "rewards/verify_math_reward/std": 0.4954652488231659, "step": 1310 }, { "clip_ratio/high_max": 0.0015403651232190896, "clip_ratio/high_mean": 0.00041163716650771676, "clip_ratio/low_mean": 0.0003618612611262506, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007734984287708357, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3269.0, "completions/mean_length": 596.2902221679688, "completions/mean_terminated_length": 536.7037963867188, "completions/min_length": 142.0, "completions/min_terminated_length": 142.0, "epoch": 12.251968503937007, "grad_norm": 0.13671875, "learning_rate": 1e-06, "loss": 0.0037, "num_tokens": 763295785.0, "reward": 0.5870535969734192, "reward_std": 0.20305564999580383, "rewards/verify_math_reward/mean": 0.5870535969734192, "rewards/verify_math_reward/std": 0.49263834953308105, "step": 1311 }, { "clip_ratio/high_max": 0.0014359627748490311, "clip_ratio/high_mean": 0.00041973795032390626, "clip_ratio/low_mean": 0.0003717834165399836, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007915213773230789, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3662.0, "completions/mean_length": 627.7835083007812, "completions/mean_terminated_length": 572.732421875, "completions/min_length": 5.0, "completions/min_terminated_length": 5.0, "epoch": 12.26130067074949, "grad_norm": 0.12255859375, "learning_rate": 1e-06, "loss": -0.0047, "num_tokens": 763893279.0, "reward": 0.5345982313156128, "reward_std": 0.20366504788398743, "rewards/verify_math_reward/mean": 0.5345982313156128, "rewards/verify_math_reward/std": 0.4990801215171814, "step": 1312 }, { "clip_ratio/high_max": 0.0015365403196483385, "clip_ratio/high_mean": 0.0004191620234905713, "clip_ratio/low_mean": 0.00035687019101260375, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007760322178000933, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3612.0, "completions/mean_length": 651.7444458007812, "completions/mean_terminated_length": 597.07373046875, "completions/min_length": 142.0, "completions/min_terminated_length": 142.0, "epoch": 12.27063283756197, "grad_norm": 0.11572265625, "learning_rate": 1e-06, "loss": 0.0027, "num_tokens": 764505522.0, "reward": 0.5602678656578064, "reward_std": 0.1982831358909607, "rewards/verify_math_reward/mean": 0.5602678656578064, "rewards/verify_math_reward/std": 0.4966317415237427, "step": 1313 }, { "clip_ratio/high_max": 0.001468470062718552, "clip_ratio/high_mean": 0.0004182748273251491, "clip_ratio/low_mean": 0.0003367118406458758, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007549866759291035, "completions/clipped_ratio": 0.008928571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3434.0, "completions/mean_length": 565.216552734375, "completions/mean_terminated_length": 533.4076538085938, "completions/min_length": 121.0, "completions/min_terminated_length": 121.0, "epoch": 12.279965004374453, "grad_norm": 0.12890625, "learning_rate": 1e-06, "loss": 0.0198, "num_tokens": 765070220.0, "reward": 0.5859375, "reward_std": 0.2056485265493393, "rewards/verify_math_reward/mean": 0.5859375, "rewards/verify_math_reward/std": 0.4928344786167145, "step": 1314 }, { "clip_ratio/high_max": 0.0018188020530942595, "clip_ratio/high_mean": 0.0005683027397935803, "clip_ratio/low_mean": 0.00036719275317409483, "clip_ratio/low_min": 1.0539629329286981e-05, "clip_ratio/region_mean": 0.0009354955009257537, "completions/clipped_ratio": 0.010044642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 2320.0, "completions/mean_length": 541.7991333007812, "completions/mean_terminated_length": 505.7361755371094, "completions/min_length": 133.0, "completions/min_terminated_length": 133.0, "epoch": 12.289297171186934, "grad_norm": 0.140625, "learning_rate": 1e-06, "loss": 0.0132, "num_tokens": 765599024.0, "reward": 0.625, "reward_std": 0.21225804090499878, "rewards/verify_math_reward/mean": 0.625, "rewards/verify_math_reward/std": 0.48439329862594604, "step": 1315 }, { "clip_ratio/high_max": 0.0017290630557909026, "clip_ratio/high_mean": 0.0005711032329145382, "clip_ratio/low_mean": 0.0003347759829921415, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009058792084033485, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 2950.0, "completions/mean_length": 598.5971069335938, "completions/mean_terminated_length": 555.1265869140625, "completions/min_length": 131.0, "completions/min_terminated_length": 131.0, "epoch": 12.298629337999417, "grad_norm": 0.1259765625, "learning_rate": 1e-06, "loss": 0.0126, "num_tokens": 766182983.0, "reward": 0.535714328289032, "reward_std": 0.2175600677728653, "rewards/verify_math_reward/mean": 0.5357142686843872, "rewards/verify_math_reward/std": 0.4990014135837555, "step": 1316 }, { "clip_ratio/high_max": 0.001851984688983066, "clip_ratio/high_mean": 0.0005933641227784392, "clip_ratio/low_mean": 0.00034520033500484715, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.00093856447347207, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 3424.0, "completions/mean_length": 598.552490234375, "completions/mean_terminated_length": 559.077880859375, "completions/min_length": 127.0, "completions/min_terminated_length": 127.0, "epoch": 12.307961504811898, "grad_norm": 0.130859375, "learning_rate": 1e-06, "loss": 0.005, "num_tokens": 766765470.0, "reward": 0.6104910969734192, "reward_std": 0.20929424464702606, "rewards/verify_math_reward/mean": 0.6104910969734192, "rewards/verify_math_reward/std": 0.48791128396987915, "step": 1317 }, { "clip_ratio/high_max": 0.0013350010449357796, "clip_ratio/high_mean": 0.0002992231101188736, "clip_ratio/low_mean": 0.00033686329379634117, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0006360864131238486, "completions/clipped_ratio": 0.0234375, "completions/max_length": 4096.0, "completions/max_terminated_length": 2309.0, "completions/mean_length": 677.078125, "completions/mean_terminated_length": 595.0239868164062, "completions/min_length": 123.0, "completions/min_terminated_length": 123.0, "epoch": 12.31729367162438, "grad_norm": 0.10595703125, "learning_rate": 1e-06, "loss": -0.002, "num_tokens": 767385668.0, "reward": 0.520089328289032, "reward_std": 0.16608785092830658, "rewards/verify_math_reward/mean": 0.5200892686843872, "rewards/verify_math_reward/std": 0.4998753070831299, "step": 1318 }, { "clip_ratio/high_max": 0.0017508927767266869, "clip_ratio/high_mean": 0.00045783716450387146, "clip_ratio/low_mean": 0.00028557491077663144, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007434120889229234, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3778.0, "completions/mean_length": 575.4230346679688, "completions/mean_terminated_length": 523.5911254882812, "completions/min_length": 6.0, "completions/min_terminated_length": 6.0, "epoch": 12.326625838436861, "grad_norm": 0.13671875, "learning_rate": 1e-06, "loss": -0.0063, "num_tokens": 767931287.0, "reward": 0.5792410969734192, "reward_std": 0.18701490759849548, "rewards/verify_math_reward/mean": 0.5792410969734192, "rewards/verify_math_reward/std": 0.49395665526390076, "step": 1319 }, { "clip_ratio/high_max": 0.0017550511274748715, "clip_ratio/high_mean": 0.0005480323541178223, "clip_ratio/low_mean": 0.0002626408290780091, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008106731884254259, "completions/clipped_ratio": 0.022321428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 4045.0, "completions/mean_length": 669.03125, "completions/mean_terminated_length": 590.7899169921875, "completions/min_length": 116.0, "completions/min_terminated_length": 116.0, "epoch": 12.335958005249344, "grad_norm": 0.1279296875, "learning_rate": 1e-06, "loss": -0.0107, "num_tokens": 768535219.0, "reward": 0.5658482313156128, "reward_std": 0.2036636620759964, "rewards/verify_math_reward/mean": 0.5658482313156128, "rewards/verify_math_reward/std": 0.49592188000679016, "step": 1320 }, { "clip_ratio/high_max": 0.0014951197581467568, "clip_ratio/high_mean": 0.0004573226940465247, "clip_ratio/low_mean": 0.0003901700511050876, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008474927590214065, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 2326.0, "completions/mean_length": 647.1027221679688, "completions/mean_terminated_length": 584.3954467773438, "completions/min_length": 166.0, "completions/min_terminated_length": 166.0, "epoch": 12.345290172061826, "grad_norm": 0.12890625, "learning_rate": 1e-06, "loss": -0.0042, "num_tokens": 769134111.0, "reward": 0.5066964626312256, "reward_std": 0.1962229609489441, "rewards/verify_math_reward/mean": 0.5066964030265808, "rewards/verify_math_reward/std": 0.5002344250679016, "step": 1321 }, { "clip_ratio/high_max": 0.0010565303255134495, "clip_ratio/high_mean": 0.0002735761662506775, "clip_ratio/low_mean": 0.00023922488844618783, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0005128010548105522, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3989.0, "completions/mean_length": 605.0569458007812, "completions/mean_terminated_length": 553.661376953125, "completions/min_length": 124.0, "completions/min_terminated_length": 124.0, "epoch": 12.354622338874307, "grad_norm": 0.10498046875, "learning_rate": 1e-06, "loss": 0.0051, "num_tokens": 769709802.0, "reward": 0.5725446939468384, "reward_std": 0.1291857659816742, "rewards/verify_math_reward/mean": 0.5725446343421936, "rewards/verify_math_reward/std": 0.49498558044433594, "step": 1322 }, { "clip_ratio/high_max": 0.001643934969251859, "clip_ratio/high_mean": 0.0005014874755033816, "clip_ratio/low_mean": 0.000300719930919513, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008022074189284467, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 1879.0, "completions/mean_length": 608.5267944335938, "completions/mean_terminated_length": 549.1487426757812, "completions/min_length": 9.0, "completions/min_terminated_length": 9.0, "epoch": 12.36395450568679, "grad_norm": 0.1279296875, "learning_rate": 1e-06, "loss": 0.003, "num_tokens": 770280882.0, "reward": 0.5714285969734192, "reward_std": 0.200238436460495, "rewards/verify_math_reward/mean": 0.5714285969734192, "rewards/verify_math_reward/std": 0.49514806270599365, "step": 1323 }, { "clip_ratio/high_max": 0.0016534529640921392, "clip_ratio/high_mean": 0.00048265144096149015, "clip_ratio/low_mean": 0.0004150785985075345, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008977300421975087, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3974.0, "completions/mean_length": 650.7533569335938, "completions/mean_terminated_length": 588.1124877929688, "completions/min_length": 148.0, "completions/min_terminated_length": 148.0, "epoch": 12.37328667249927, "grad_norm": 0.142578125, "learning_rate": 1e-06, "loss": -0.0025, "num_tokens": 770894517.0, "reward": 0.455357164144516, "reward_std": 0.22575047612190247, "rewards/verify_math_reward/mean": 0.4553571343421936, "rewards/verify_math_reward/std": 0.49828118085861206, "step": 1324 }, { "clip_ratio/high_max": 0.001632866355976148, "clip_ratio/high_mean": 0.0005132672069976252, "clip_ratio/low_mean": 0.00030067291129398654, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008139401115840883, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3435.0, "completions/mean_length": 663.083740234375, "completions/mean_terminated_length": 604.634521484375, "completions/min_length": 123.0, "completions/min_terminated_length": 123.0, "epoch": 12.382618839311753, "grad_norm": 0.12109375, "learning_rate": 1e-06, "loss": 0.0085, "num_tokens": 771512992.0, "reward": 0.5167410969734192, "reward_std": 0.21064084768295288, "rewards/verify_math_reward/mean": 0.5167410969734192, "rewards/verify_math_reward/std": 0.4999987483024597, "step": 1325 }, { "clip_ratio/high_max": 0.0015619659827734722, "clip_ratio/high_mean": 0.0004668235363283202, "clip_ratio/low_mean": 0.0002761273143505605, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007429508582390554, "completions/clipped_ratio": 0.024553571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3833.0, "completions/mean_length": 706.9688110351562, "completions/mean_terminated_length": 621.6613159179688, "completions/min_length": 141.0, "completions/min_terminated_length": 141.0, "epoch": 12.391951006124234, "grad_norm": 0.11572265625, "learning_rate": 1e-06, "loss": -0.0112, "num_tokens": 772149284.0, "reward": 0.4441964626312256, "reward_std": 0.19580857455730438, "rewards/verify_math_reward/mean": 0.4441964328289032, "rewards/verify_math_reward/std": 0.49715369939804077, "step": 1326 }, { "clip_ratio/high_max": 0.0016889770995476283, "clip_ratio/high_mean": 0.00048299741774826543, "clip_ratio/low_mean": 0.00036200213139636617, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.000844999559376447, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 2471.0, "completions/mean_length": 605.1038208007812, "completions/mean_terminated_length": 545.6674194335938, "completions/min_length": 179.0, "completions/min_terminated_length": 179.0, "epoch": 12.401283172936717, "grad_norm": 0.134765625, "learning_rate": 1e-06, "loss": -0.0009, "num_tokens": 772708641.0, "reward": 0.5758928656578064, "reward_std": 0.20741882920265198, "rewards/verify_math_reward/mean": 0.5758928656578064, "rewards/verify_math_reward/std": 0.49448272585868835, "step": 1327 }, { "clip_ratio/high_max": 0.0019424109823376057, "clip_ratio/high_mean": 0.0006247407210366873, "clip_ratio/low_mean": 0.0004140146727422689, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010387554048065795, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 2134.0, "completions/mean_length": 627.3158569335938, "completions/mean_terminated_length": 568.2576904296875, "completions/min_length": 42.0, "completions/min_terminated_length": 42.0, "epoch": 12.410615339749198, "grad_norm": 0.1328125, "learning_rate": 1e-06, "loss": 0.0063, "num_tokens": 773298468.0, "reward": 0.5301339626312256, "reward_std": 0.2515665888786316, "rewards/verify_math_reward/mean": 0.5301339030265808, "rewards/verify_math_reward/std": 0.49936988949775696, "step": 1328 }, { "clip_ratio/high_max": 0.0016957903771981364, "clip_ratio/high_mean": 0.0005747592945226643, "clip_ratio/low_mean": 0.00045203657191450475, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010267958823533263, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 1939.0, "completions/mean_length": 576.208740234375, "completions/mean_terminated_length": 520.3389892578125, "completions/min_length": 120.0, "completions/min_terminated_length": 120.0, "epoch": 12.41994750656168, "grad_norm": 0.1396484375, "learning_rate": 1e-06, "loss": 0.0319, "num_tokens": 773847591.0, "reward": 0.5970982313156128, "reward_std": 0.23916973173618317, "rewards/verify_math_reward/mean": 0.5970982313156128, "rewards/verify_math_reward/std": 0.49075525999069214, "step": 1329 }, { "clip_ratio/high_max": 0.0018559801901574247, "clip_ratio/high_mean": 0.0005546434127836619, "clip_ratio/low_mean": 0.0003098287588727544, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008644721729069715, "completions/clipped_ratio": 0.0078125, "completions/max_length": 4096.0, "completions/max_terminated_length": 3005.0, "completions/mean_length": 586.9017944335938, "completions/mean_terminated_length": 559.2711181640625, "completions/min_length": 135.0, "completions/min_terminated_length": 135.0, "epoch": 12.429279673374161, "grad_norm": 0.125, "learning_rate": 1e-06, "loss": 0.0048, "num_tokens": 774445887.0, "reward": 0.5479910969734192, "reward_std": 0.1940765529870987, "rewards/verify_math_reward/mean": 0.5479910969734192, "rewards/verify_math_reward/std": 0.49796950817108154, "step": 1330 }, { "clip_ratio/high_max": 0.0017397524388798047, "clip_ratio/high_mean": 0.0005094935975193948, "clip_ratio/low_mean": 0.0003674017425510101, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008768953507569677, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 2870.0, "completions/mean_length": 584.7377319335938, "completions/mean_terminated_length": 545.1072387695312, "completions/min_length": 127.0, "completions/min_terminated_length": 127.0, "epoch": 12.438611840186644, "grad_norm": 0.1298828125, "learning_rate": 1e-06, "loss": 0.005, "num_tokens": 775016404.0, "reward": 0.598214328289032, "reward_std": 0.21775297820568085, "rewards/verify_math_reward/mean": 0.5982142686843872, "rewards/verify_math_reward/std": 0.49053290486335754, "step": 1331 }, { "clip_ratio/high_max": 0.0017374656772517483, "clip_ratio/high_mean": 0.0005541824407373497, "clip_ratio/low_mean": 0.00036899929591527325, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009231817352883809, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 3193.0, "completions/mean_length": 568.6060791015625, "completions/mean_terminated_length": 528.79345703125, "completions/min_length": 136.0, "completions/min_terminated_length": 136.0, "epoch": 12.447944006999125, "grad_norm": 0.13671875, "learning_rate": 1e-06, "loss": 0.0161, "num_tokens": 775570939.0, "reward": 0.6026785969734192, "reward_std": 0.22567518055438995, "rewards/verify_math_reward/mean": 0.6026785969734192, "rewards/verify_math_reward/std": 0.48961687088012695, "step": 1332 }, { "clip_ratio/high_max": 0.0012483817890824866, "clip_ratio/high_mean": 0.0003135813240078278, "clip_ratio/low_mean": 0.000292292923631976, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0006058742455934407, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 2985.0, "completions/mean_length": 644.6796875, "completions/mean_terminated_length": 581.9284057617188, "completions/min_length": 132.0, "completions/min_terminated_length": 132.0, "epoch": 12.457276173811607, "grad_norm": 0.1181640625, "learning_rate": 1e-06, "loss": -0.0201, "num_tokens": 776184036.0, "reward": 0.527901828289032, "reward_std": 0.17505809664726257, "rewards/verify_math_reward/mean": 0.5279017686843872, "rewards/verify_math_reward/std": 0.49949967861175537, "step": 1333 }, { "clip_ratio/high_max": 0.0018242450987600023, "clip_ratio/high_mean": 0.0005425435836059478, "clip_ratio/low_mean": 0.00029184862955844437, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008343922236235812, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3891.0, "completions/mean_length": 622.1373291015625, "completions/mean_terminated_length": 570.9931640625, "completions/min_length": 137.0, "completions/min_terminated_length": 137.0, "epoch": 12.466608340624088, "grad_norm": 0.1298828125, "learning_rate": 1e-06, "loss": 0.0013, "num_tokens": 776780287.0, "reward": 0.5524553656578064, "reward_std": 0.21722166240215302, "rewards/verify_math_reward/mean": 0.5524553656578064, "rewards/verify_math_reward/std": 0.49751853942871094, "step": 1334 }, { "clip_ratio/high_max": 0.0018869915465984377, "clip_ratio/high_mean": 0.000658529456359247, "clip_ratio/low_mean": 0.00034590689188007673, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010044363489214447, "completions/clipped_ratio": 0.010044642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3828.0, "completions/mean_length": 560.786865234375, "completions/mean_terminated_length": 524.9165649414062, "completions/min_length": 157.0, "completions/min_terminated_length": 157.0, "epoch": 12.47594050743657, "grad_norm": 0.1318359375, "learning_rate": 1e-06, "loss": 0.0077, "num_tokens": 777326504.0, "reward": 0.6227678656578064, "reward_std": 0.21989238262176514, "rewards/verify_math_reward/mean": 0.6227678656578064, "rewards/verify_math_reward/std": 0.4849644899368286, "step": 1335 }, { "clip_ratio/high_max": 0.0016766611024650047, "clip_ratio/high_mean": 0.0005130787046709884, "clip_ratio/low_mean": 0.0003538968317116087, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008669755347909813, "completions/clipped_ratio": 0.010044642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3928.0, "completions/mean_length": 583.9163208007812, "completions/mean_terminated_length": 548.2807006835938, "completions/min_length": 3.0, "completions/min_terminated_length": 3.0, "epoch": 12.485272674249051, "grad_norm": 0.1298828125, "learning_rate": 1e-06, "loss": -0.0071, "num_tokens": 777907597.0, "reward": 0.5569196939468384, "reward_std": 0.22198784351348877, "rewards/verify_math_reward/mean": 0.5569196343421936, "rewards/verify_math_reward/std": 0.4970270097255707, "step": 1336 }, { "clip_ratio/high_max": 0.0017860114867289667, "clip_ratio/high_mean": 0.0005190521185340913, "clip_ratio/low_mean": 0.00031431715660801274, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008333692812811933, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3945.0, "completions/mean_length": 600.2355346679688, "completions/mean_terminated_length": 536.6761474609375, "completions/min_length": 119.0, "completions/min_terminated_length": 119.0, "epoch": 12.494604841061534, "grad_norm": 0.12890625, "learning_rate": 1e-06, "loss": -0.0052, "num_tokens": 778472224.0, "reward": 0.5915178656578064, "reward_std": 0.20478203892707825, "rewards/verify_math_reward/mean": 0.5915178656578064, "rewards/verify_math_reward/std": 0.49182769656181335, "step": 1337 }, { "clip_ratio/high_max": 0.0014998347751316032, "clip_ratio/high_mean": 0.00045739044674064644, "clip_ratio/low_mean": 0.00024417975680535164, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007015701978616562, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 2504.0, "completions/mean_length": 614.5123291015625, "completions/mean_terminated_length": 571.2395629882812, "completions/min_length": 162.0, "completions/min_terminated_length": 162.0, "epoch": 12.503937007874015, "grad_norm": 0.1259765625, "learning_rate": 1e-06, "loss": 0.0027, "num_tokens": 779067547.0, "reward": 0.5524553656578064, "reward_std": 0.1862974464893341, "rewards/verify_math_reward/mean": 0.5524553656578064, "rewards/verify_math_reward/std": 0.49751853942871094, "step": 1338 }, { "clip_ratio/high_max": 0.0016813044057926163, "clip_ratio/high_mean": 0.00047154157800832763, "clip_ratio/low_mean": 0.0002729561776959599, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007444977500199457, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3681.0, "completions/mean_length": 668.7589721679688, "completions/mean_terminated_length": 606.4454345703125, "completions/min_length": 108.0, "completions/min_terminated_length": 108.0, "epoch": 12.513269174686497, "grad_norm": 0.1181640625, "learning_rate": 1e-06, "loss": 0.0055, "num_tokens": 779702187.0, "reward": 0.5111607313156128, "reward_std": 0.19411791861057281, "rewards/verify_math_reward/mean": 0.5111607313156128, "rewards/verify_math_reward/std": 0.5001546144485474, "step": 1339 }, { "clip_ratio/high_max": 0.0016666199371684343, "clip_ratio/high_mean": 0.0004731992985398392, "clip_ratio/low_mean": 0.00031938857523527986, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007925878726382507, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3986.0, "completions/mean_length": 649.583740234375, "completions/mean_terminated_length": 590.9046630859375, "completions/min_length": 18.0, "completions/min_terminated_length": 18.0, "epoch": 12.52260134149898, "grad_norm": 0.12158203125, "learning_rate": 1e-06, "loss": 0.0024, "num_tokens": 780312742.0, "reward": 0.559151828289032, "reward_std": 0.22195462882518768, "rewards/verify_math_reward/mean": 0.5591517686843872, "rewards/verify_math_reward/std": 0.496766060590744, "step": 1340 }, { "clip_ratio/high_max": 0.0019046869902012986, "clip_ratio/high_mean": 0.0005148489103703469, "clip_ratio/low_mean": 0.00033319990075142414, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008480487922497559, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 2395.0, "completions/mean_length": 645.7879638671875, "completions/mean_terminated_length": 583.0568237304688, "completions/min_length": 74.0, "completions/min_terminated_length": 74.0, "epoch": 12.531933508311461, "grad_norm": 0.1162109375, "learning_rate": 1e-06, "loss": -0.0069, "num_tokens": 780909128.0, "reward": 0.5558035969734192, "reward_std": 0.20162348449230194, "rewards/verify_math_reward/mean": 0.5558035969734192, "rewards/verify_math_reward/std": 0.49715372920036316, "step": 1341 }, { "clip_ratio/high_max": 0.0018353088380536065, "clip_ratio/high_mean": 0.0006077824446037994, "clip_ratio/low_mean": 0.0003275464979424214, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009353289542559651, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 4034.0, "completions/mean_length": 619.8694458007812, "completions/mean_terminated_length": 580.6354370117188, "completions/min_length": 71.0, "completions/min_terminated_length": 71.0, "epoch": 12.541265675123944, "grad_norm": 0.12890625, "learning_rate": 1e-06, "loss": 0.0082, "num_tokens": 781512203.0, "reward": 0.5345982313156128, "reward_std": 0.22485990822315216, "rewards/verify_math_reward/mean": 0.5345982313156128, "rewards/verify_math_reward/std": 0.4990801215171814, "step": 1342 }, { "clip_ratio/high_max": 0.0015425090605276637, "clip_ratio/high_mean": 0.0005186793109714927, "clip_ratio/low_mean": 0.00037904443502156937, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.000897723743946699, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3859.0, "completions/mean_length": 638.1194458007812, "completions/mean_terminated_length": 575.2488403320312, "completions/min_length": 82.0, "completions/min_terminated_length": 82.0, "epoch": 12.550597841936424, "grad_norm": 0.1279296875, "learning_rate": 1e-06, "loss": -0.0071, "num_tokens": 782100870.0, "reward": 0.535714328289032, "reward_std": 0.2338722199201584, "rewards/verify_math_reward/mean": 0.5357142686843872, "rewards/verify_math_reward/std": 0.4990014135837555, "step": 1343 }, { "clip_ratio/high_max": 0.002033382579611498, "clip_ratio/high_mean": 0.0005803411145279824, "clip_ratio/low_mean": 0.00043659423477038217, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010169353408855386, "completions/clipped_ratio": 0.0189732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 4093.0, "completions/mean_length": 594.0625, "completions/mean_terminated_length": 526.33447265625, "completions/min_length": 98.0, "completions/min_terminated_length": 98.0, "epoch": 12.559930008748907, "grad_norm": 0.13671875, "learning_rate": 1e-06, "loss": 0.0019, "num_tokens": 782643982.0, "reward": 0.5915178656578064, "reward_std": 0.23064091801643372, "rewards/verify_math_reward/mean": 0.5915178656578064, "rewards/verify_math_reward/std": 0.49182769656181335, "step": 1344 }, { "clip_ratio/high_max": 0.0017592068570593256, "clip_ratio/high_mean": 0.0005086708604267187, "clip_ratio/low_mean": 0.00036662707270807005, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008752979128985316, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3057.0, "completions/mean_length": 620.708740234375, "completions/mean_terminated_length": 573.5328369140625, "completions/min_length": 151.0, "completions/min_terminated_length": 151.0, "epoch": 12.569262175561388, "grad_norm": 0.1376953125, "learning_rate": 1e-06, "loss": 0.0092, "num_tokens": 783244393.0, "reward": 0.5267857313156128, "reward_std": 0.1980655938386917, "rewards/verify_math_reward/mean": 0.5267857313156128, "rewards/verify_math_reward/std": 0.4995608627796173, "step": 1345 }, { "clip_ratio/high_max": 0.0017595783992874203, "clip_ratio/high_mean": 0.000571181706391144, "clip_ratio/low_mean": 0.00035188769788874197, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009230693999597861, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 2412.0, "completions/mean_length": 585.169677734375, "completions/mean_terminated_length": 529.4421997070312, "completions/min_length": 84.0, "completions/min_terminated_length": 84.0, "epoch": 12.57859434237387, "grad_norm": 0.1298828125, "learning_rate": 1e-06, "loss": 0.0239, "num_tokens": 783805673.0, "reward": 0.6004464626312256, "reward_std": 0.20636393129825592, "rewards/verify_math_reward/mean": 0.6004464030265808, "rewards/verify_math_reward/std": 0.49008017778396606, "step": 1346 }, { "clip_ratio/high_max": 0.0018661543399502989, "clip_ratio/high_mean": 0.0005619721421226131, "clip_ratio/low_mean": 0.0003518954197261337, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009138675513895578, "completions/clipped_ratio": 0.021205357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 2460.0, "completions/mean_length": 618.2890625, "completions/mean_terminated_length": 542.9452514648438, "completions/min_length": 8.0, "completions/min_terminated_length": 8.0, "epoch": 12.587926509186351, "grad_norm": 0.130859375, "learning_rate": 1e-06, "loss": 0.0067, "num_tokens": 784372164.0, "reward": 0.5892857313156128, "reward_std": 0.2003892958164215, "rewards/verify_math_reward/mean": 0.5892857313156128, "rewards/verify_math_reward/std": 0.49223825335502625, "step": 1347 }, { "clip_ratio/high_max": 0.001620668293980998, "clip_ratio/high_mean": 0.0005225414563483355, "clip_ratio/low_mean": 0.000327016052210638, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008495574954849872, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 2856.0, "completions/mean_length": 590.1239013671875, "completions/mean_terminated_length": 530.4324951171875, "completions/min_length": 127.0, "completions/min_terminated_length": 127.0, "epoch": 12.597258675998834, "grad_norm": 0.12890625, "learning_rate": 1e-06, "loss": -0.001, "num_tokens": 784927435.0, "reward": 0.6026785969734192, "reward_std": 0.21184365451335907, "rewards/verify_math_reward/mean": 0.6026785969734192, "rewards/verify_math_reward/std": 0.48961687088012695, "step": 1348 }, { "clip_ratio/high_max": 0.0013208903519625892, "clip_ratio/high_mean": 0.00040145785442291526, "clip_ratio/low_mean": 0.00038392885812754685, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007853867066387465, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3601.0, "completions/mean_length": 650.529052734375, "completions/mean_terminated_length": 591.8660888671875, "completions/min_length": 153.0, "completions/min_terminated_length": 153.0, "epoch": 12.606590842811315, "grad_norm": 0.126953125, "learning_rate": 1e-06, "loss": 0.0141, "num_tokens": 785546997.0, "reward": 0.494419664144516, "reward_std": 0.19170935451984406, "rewards/verify_math_reward/mean": 0.4944196343421936, "rewards/verify_math_reward/std": 0.5002480745315552, "step": 1349 }, { "clip_ratio/high_max": 0.0013343321516003925, "clip_ratio/high_mean": 0.0003621808348270861, "clip_ratio/low_mean": 0.00033473987821253104, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0006969207147449197, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 2803.0, "completions/mean_length": 586.4308471679688, "completions/mean_terminated_length": 526.676513671875, "completions/min_length": 69.0, "completions/min_terminated_length": 69.0, "epoch": 12.615923009623797, "grad_norm": 0.1259765625, "learning_rate": 1e-06, "loss": 0.0023, "num_tokens": 786104367.0, "reward": 0.535714328289032, "reward_std": 0.1892675906419754, "rewards/verify_math_reward/mean": 0.5357142686843872, "rewards/verify_math_reward/std": 0.4990014135837555, "step": 1350 }, { "clip_ratio/high_max": 0.001797109693143284, "clip_ratio/high_mean": 0.0005520653926396335, "clip_ratio/low_mean": 0.00040413533145056135, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009562007217027713, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3401.0, "completions/mean_length": 617.1864013671875, "completions/mean_terminated_length": 565.9694213867188, "completions/min_length": 151.0, "completions/min_terminated_length": 151.0, "epoch": 12.625255176436278, "grad_norm": 0.1396484375, "learning_rate": 1e-06, "loss": -0.0055, "num_tokens": 786693614.0, "reward": 0.5390625, "reward_std": 0.25088509917259216, "rewards/verify_math_reward/mean": 0.5390625, "rewards/verify_math_reward/std": 0.4987502098083496, "step": 1351 }, { "clip_ratio/high_max": 0.0013580147542597842, "clip_ratio/high_mean": 0.0004347970145772706, "clip_ratio/low_mean": 0.0003087151912950503, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007435122161041363, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 4048.0, "completions/mean_length": 642.2756958007812, "completions/mean_terminated_length": 579.4806518554688, "completions/min_length": 143.0, "completions/min_terminated_length": 143.0, "epoch": 12.63458734324876, "grad_norm": 0.126953125, "learning_rate": 1e-06, "loss": 0.0011, "num_tokens": 787291469.0, "reward": 0.5189732313156128, "reward_std": 0.20336057245731354, "rewards/verify_math_reward/mean": 0.5189732313156128, "rewards/verify_math_reward/std": 0.49991893768310547, "step": 1352 }, { "clip_ratio/high_max": 0.0015516348994424334, "clip_ratio/high_mean": 0.0004743716763186967, "clip_ratio/low_mean": 0.0003102460071886526, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007846176763450785, "completions/clipped_ratio": 0.0234375, "completions/max_length": 4096.0, "completions/max_terminated_length": 2980.0, "completions/mean_length": 634.2667846679688, "completions/mean_terminated_length": 551.1851196289062, "completions/min_length": 11.0, "completions/min_terminated_length": 11.0, "epoch": 12.643919510061242, "grad_norm": 0.13671875, "learning_rate": 1e-06, "loss": 0.004, "num_tokens": 787867788.0, "reward": 0.559151828289032, "reward_std": 0.20474812388420105, "rewards/verify_math_reward/mean": 0.5591517686843872, "rewards/verify_math_reward/std": 0.496766060590744, "step": 1353 }, { "clip_ratio/high_max": 0.002083352690533502, "clip_ratio/high_mean": 0.0006984349906815623, "clip_ratio/low_mean": 0.000275083659403208, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009735186304169474, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 4035.0, "completions/mean_length": 526.3471069335938, "completions/mean_terminated_length": 486.0575866699219, "completions/min_length": 79.0, "completions/min_terminated_length": 79.0, "epoch": 12.653251676873724, "grad_norm": 0.1337890625, "learning_rate": 1e-06, "loss": 0.0046, "num_tokens": 788375771.0, "reward": 0.6328125, "reward_std": 0.20267769694328308, "rewards/verify_math_reward/mean": 0.6328125, "rewards/verify_math_reward/std": 0.48230743408203125, "step": 1354 }, { "clip_ratio/high_max": 0.0016434017079518526, "clip_ratio/high_mean": 0.0005675661604982452, "clip_ratio/low_mean": 0.00045137354595681245, "clip_ratio/low_min": 1.3174536434235051e-05, "clip_ratio/region_mean": 0.0010189397307840409, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3527.0, "completions/mean_length": 620.0625, "completions/mean_terminated_length": 568.8878784179688, "completions/min_length": 138.0, "completions/min_terminated_length": 138.0, "epoch": 12.662583843686207, "grad_norm": 0.1357421875, "learning_rate": 1e-06, "loss": 0.0013, "num_tokens": 788971259.0, "reward": 0.5691964626312256, "reward_std": 0.2634360194206238, "rewards/verify_math_reward/mean": 0.5691964030265808, "rewards/verify_math_reward/std": 0.4954652488231659, "step": 1355 }, { "clip_ratio/high_max": 0.0014816271814197535, "clip_ratio/high_mean": 0.0004082731221615177, "clip_ratio/low_mean": 0.0003605608969792229, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007688340228924062, "completions/clipped_ratio": 0.0200892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3368.0, "completions/mean_length": 647.2801513671875, "completions/mean_terminated_length": 576.5774536132812, "completions/min_length": 102.0, "completions/min_terminated_length": 102.0, "epoch": 12.671916010498688, "grad_norm": 0.1259765625, "learning_rate": 1e-06, "loss": -0.0105, "num_tokens": 789579166.0, "reward": 0.4921875298023224, "reward_std": 0.20080046355724335, "rewards/verify_math_reward/mean": 0.4921875, "rewards/verify_math_reward/std": 0.5002182126045227, "step": 1356 }, { "clip_ratio/high_max": 0.0015836158358979446, "clip_ratio/high_mean": 0.00048607764722419233, "clip_ratio/low_mean": 0.0003926212223177572, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008786988810243201, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3176.0, "completions/mean_length": 638.0614013671875, "completions/mean_terminated_length": 579.1861572265625, "completions/min_length": 134.0, "completions/min_terminated_length": 134.0, "epoch": 12.68124817731117, "grad_norm": 0.134765625, "learning_rate": 1e-06, "loss": 0.0118, "num_tokens": 790176213.0, "reward": 0.5412946939468384, "reward_std": 0.20508696138858795, "rewards/verify_math_reward/mean": 0.5412946343421936, "rewards/verify_math_reward/std": 0.49857014417648315, "step": 1357 }, { "clip_ratio/high_max": 0.0015654539438401116, "clip_ratio/high_mean": 0.0004823506724278559, "clip_ratio/low_mean": 0.0002884349729583846, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007707856429988169, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 2358.0, "completions/mean_length": 556.9765625, "completions/mean_terminated_length": 517.03271484375, "completions/min_length": 118.0, "completions/min_terminated_length": 118.0, "epoch": 12.690580344123651, "grad_norm": 0.119140625, "learning_rate": 1e-06, "loss": -0.0059, "num_tokens": 790720536.0, "reward": 0.6183035969734192, "reward_std": 0.18889032304286957, "rewards/verify_math_reward/mean": 0.6183035969734192, "rewards/verify_math_reward/std": 0.4860740303993225, "step": 1358 }, { "clip_ratio/high_max": 0.0018910308717750013, "clip_ratio/high_mean": 0.0006210268591075874, "clip_ratio/low_mean": 0.0004180689832082862, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010390958459538524, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 2288.0, "completions/mean_length": 572.1317138671875, "completions/mean_terminated_length": 528.3322143554688, "completions/min_length": 129.0, "completions/min_terminated_length": 129.0, "epoch": 12.699912510936134, "grad_norm": 0.154296875, "learning_rate": 1e-06, "loss": 0.0077, "num_tokens": 791278166.0, "reward": 0.613839328289032, "reward_std": 0.25539591908454895, "rewards/verify_math_reward/mean": 0.6138392686843872, "rewards/verify_math_reward/std": 0.48714008927345276, "step": 1359 }, { "clip_ratio/high_max": 0.001492702578616445, "clip_ratio/high_mean": 0.00044245443530144257, "clip_ratio/low_mean": 0.0003830527193713351, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008255071525127278, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3763.0, "completions/mean_length": 619.552490234375, "completions/mean_terminated_length": 560.3621215820312, "completions/min_length": 5.0, "completions/min_terminated_length": 5.0, "epoch": 12.709244677748615, "grad_norm": 0.1298828125, "learning_rate": 1e-06, "loss": 0.0055, "num_tokens": 791855125.0, "reward": 0.5234375, "reward_std": 0.2184620350599289, "rewards/verify_math_reward/mean": 0.5234375, "rewards/verify_math_reward/std": 0.49972933530807495, "step": 1360 }, { "clip_ratio/high_max": 0.0016415607551607536, "clip_ratio/high_mean": 0.00048394609439128544, "clip_ratio/low_mean": 0.00030661542632515193, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.00079056152026169, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3761.0, "completions/mean_length": 597.318115234375, "completions/mean_terminated_length": 545.80859375, "completions/min_length": 3.0, "completions/min_terminated_length": 3.0, "epoch": 12.718576844561097, "grad_norm": 0.1298828125, "learning_rate": 1e-06, "loss": 0.0088, "num_tokens": 792418370.0, "reward": 0.5334821939468384, "reward_std": 0.21673493087291718, "rewards/verify_math_reward/mean": 0.5334821343421936, "rewards/verify_math_reward/std": 0.49915632605552673, "step": 1361 }, { "clip_ratio/high_max": 0.0012727250514217303, "clip_ratio/high_mean": 0.0003514046095460799, "clip_ratio/low_mean": 0.0003181941166303659, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0006695987376588164, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 4011.0, "completions/mean_length": 651.5770263671875, "completions/mean_terminated_length": 604.8201904296875, "completions/min_length": 139.0, "completions/min_terminated_length": 139.0, "epoch": 12.727909011373578, "grad_norm": 0.11376953125, "learning_rate": 1e-06, "loss": -0.0001, "num_tokens": 793044295.0, "reward": 0.4676339626312256, "reward_std": 0.18239323794841766, "rewards/verify_math_reward/mean": 0.4676339328289032, "rewards/verify_math_reward/std": 0.4992299973964691, "step": 1362 }, { "clip_ratio/high_max": 0.001995025560972863, "clip_ratio/high_mean": 0.000556725545379777, "clip_ratio/low_mean": 0.00031709733048046473, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008738228671063553, "completions/clipped_ratio": 0.024553571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 2530.0, "completions/mean_length": 674.005615234375, "completions/mean_terminated_length": 587.868408203125, "completions/min_length": 96.0, "completions/min_terminated_length": 96.0, "epoch": 12.73724117818606, "grad_norm": 0.1318359375, "learning_rate": 1e-06, "loss": -0.0069, "num_tokens": 793651412.0, "reward": 0.515625, "reward_std": 0.21899224817752838, "rewards/verify_math_reward/mean": 0.515625, "rewards/verify_math_reward/std": 0.5000349283218384, "step": 1363 }, { "clip_ratio/high_max": 0.0013632476238853997, "clip_ratio/high_mean": 0.0004527963271812041, "clip_ratio/low_mean": 0.00034688679193095595, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007996831172931707, "completions/clipped_ratio": 0.010044642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3827.0, "completions/mean_length": 600.3683471679688, "completions/mean_terminated_length": 564.899658203125, "completions/min_length": 152.0, "completions/min_terminated_length": 152.0, "epoch": 12.746573344998541, "grad_norm": 0.130859375, "learning_rate": 1e-06, "loss": 0.0019, "num_tokens": 794253750.0, "reward": 0.546875, "reward_std": 0.2136916220188141, "rewards/verify_math_reward/mean": 0.546875, "rewards/verify_math_reward/std": 0.4980759024620056, "step": 1364 }, { "clip_ratio/high_max": 0.001735398400342092, "clip_ratio/high_mean": 0.00045994657079972967, "clip_ratio/low_mean": 0.00034610448278726835, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008060510581344715, "completions/clipped_ratio": 0.008928571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3983.0, "completions/mean_length": 566.786865234375, "completions/mean_terminated_length": 534.9921264648438, "completions/min_length": 100.0, "completions/min_terminated_length": 100.0, "epoch": 12.755905511811024, "grad_norm": 0.1259765625, "learning_rate": 1e-06, "loss": 0.0123, "num_tokens": 794808903.0, "reward": 0.5714285969734192, "reward_std": 0.2222556322813034, "rewards/verify_math_reward/mean": 0.5714285969734192, "rewards/verify_math_reward/std": 0.49514803290367126, "step": 1365 }, { "clip_ratio/high_max": 0.0014769204062758945, "clip_ratio/high_mean": 0.0003818334614607011, "clip_ratio/low_mean": 0.0002928974295173248, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0006747308880221681, "completions/clipped_ratio": 0.024553571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3841.0, "completions/mean_length": 668.0178833007812, "completions/mean_terminated_length": 581.72998046875, "completions/min_length": 83.0, "completions/min_terminated_length": 83.0, "epoch": 12.765237678623505, "grad_norm": 0.11767578125, "learning_rate": 1e-06, "loss": 0.0149, "num_tokens": 795407791.0, "reward": 0.515625, "reward_std": 0.1898314207792282, "rewards/verify_math_reward/mean": 0.515625, "rewards/verify_math_reward/std": 0.5000349283218384, "step": 1366 }, { "clip_ratio/high_max": 0.0018882580034187413, "clip_ratio/high_mean": 0.0005480299162172741, "clip_ratio/low_mean": 0.00035163842062502226, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.000899668327292602, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 4078.0, "completions/mean_length": 599.9944458007812, "completions/mean_terminated_length": 556.541259765625, "completions/min_length": 128.0, "completions/min_terminated_length": 128.0, "epoch": 12.774569845435988, "grad_norm": 0.138671875, "learning_rate": 1e-06, "loss": 0.0096, "num_tokens": 795986418.0, "reward": 0.5379464626312256, "reward_std": 0.21993333101272583, "rewards/verify_math_reward/mean": 0.5379464030265808, "rewards/verify_math_reward/std": 0.4988364577293396, "step": 1367 }, { "clip_ratio/high_max": 0.0015762244338475284, "clip_ratio/high_mean": 0.000493550815576782, "clip_ratio/low_mean": 0.0002903348273548545, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007838856463422417, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3824.0, "completions/mean_length": 601.7890625, "completions/mean_terminated_length": 538.2579345703125, "completions/min_length": 61.0, "completions/min_terminated_length": 61.0, "epoch": 12.783902012248468, "grad_norm": 0.1142578125, "learning_rate": 1e-06, "loss": -0.0018, "num_tokens": 796546229.0, "reward": 0.5859375, "reward_std": 0.17473429441452026, "rewards/verify_math_reward/mean": 0.5859375, "rewards/verify_math_reward/std": 0.4928344786167145, "step": 1368 }, { "clip_ratio/high_max": 0.0018076913638651604, "clip_ratio/high_mean": 0.0005719085979762895, "clip_ratio/low_mean": 0.00035960120885647484, "clip_ratio/low_min": 1.1192693818884436e-05, "clip_ratio/region_mean": 0.0009315098195656901, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 3877.0, "completions/mean_length": 612.7098388671875, "completions/mean_terminated_length": 569.4146728515625, "completions/min_length": 153.0, "completions/min_terminated_length": 153.0, "epoch": 12.793234179060951, "grad_norm": 0.13671875, "learning_rate": 1e-06, "loss": 0.0135, "num_tokens": 797151969.0, "reward": 0.566964328289032, "reward_std": 0.24408239126205444, "rewards/verify_math_reward/mean": 0.5669642686843872, "rewards/verify_math_reward/std": 0.49577224254608154, "step": 1369 }, { "clip_ratio/high_max": 0.0018303513852515607, "clip_ratio/high_mean": 0.0005259707338609587, "clip_ratio/low_mean": 0.0003362928214301064, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008622635577921756, "completions/clipped_ratio": 0.0200892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 4094.0, "completions/mean_length": 626.7924194335938, "completions/mean_terminated_length": 555.6697387695312, "completions/min_length": 136.0, "completions/min_terminated_length": 136.0, "epoch": 12.802566345873432, "grad_norm": 0.1337890625, "learning_rate": 1e-06, "loss": -0.006, "num_tokens": 797730607.0, "reward": 0.5680803656578064, "reward_std": 0.2282680869102478, "rewards/verify_math_reward/mean": 0.5680803656578064, "rewards/verify_math_reward/std": 0.4956200420856476, "step": 1370 }, { "clip_ratio/high_max": 0.0014975834533288435, "clip_ratio/high_mean": 0.00044420376389098237, "clip_ratio/low_mean": 0.00037671682684958796, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.000820920591650065, "completions/clipped_ratio": 0.0189732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3668.0, "completions/mean_length": 641.2567138671875, "completions/mean_terminated_length": 574.44140625, "completions/min_length": 96.0, "completions/min_terminated_length": 96.0, "epoch": 12.811898512685914, "grad_norm": 0.12060546875, "learning_rate": 1e-06, "loss": 0.012, "num_tokens": 798338549.0, "reward": 0.4933035969734192, "reward_std": 0.19227276742458344, "rewards/verify_math_reward/mean": 0.4933035671710968, "rewards/verify_math_reward/std": 0.5002344250679016, "step": 1371 }, { "clip_ratio/high_max": 0.001852706509453128, "clip_ratio/high_mean": 0.0005725582295781351, "clip_ratio/low_mean": 0.00041772364602365997, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009902818865157315, "completions/clipped_ratio": 0.03125, "completions/max_length": 4096.0, "completions/max_terminated_length": 3443.0, "completions/mean_length": 672.1942138671875, "completions/mean_terminated_length": 561.7488403320312, "completions/min_length": 139.0, "completions/min_terminated_length": 139.0, "epoch": 12.821230679498395, "grad_norm": 0.138671875, "learning_rate": 1e-06, "loss": 0.0011, "num_tokens": 798919547.0, "reward": 0.5033482313156128, "reward_std": 0.22038161754608154, "rewards/verify_math_reward/mean": 0.5033482313156128, "rewards/verify_math_reward/std": 0.5002680420875549, "step": 1372 }, { "clip_ratio/high_max": 0.0016795955361885717, "clip_ratio/high_mean": 0.0005697418389445374, "clip_ratio/low_mean": 0.0003776335677230236, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009473754089412978, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 1969.0, "completions/mean_length": 624.3092041015625, "completions/mean_terminated_length": 569.2029418945312, "completions/min_length": 24.0, "completions/min_terminated_length": 24.0, "epoch": 12.830562846310878, "grad_norm": 0.12890625, "learning_rate": 1e-06, "loss": 0.0172, "num_tokens": 799518296.0, "reward": 0.5691964626312256, "reward_std": 0.21936173737049103, "rewards/verify_math_reward/mean": 0.5691964030265808, "rewards/verify_math_reward/std": 0.4954652786254883, "step": 1373 }, { "clip_ratio/high_max": 0.0015312329906009836, "clip_ratio/high_mean": 0.00047515602091152687, "clip_ratio/low_mean": 0.00036321660013527435, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008383726217289222, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3892.0, "completions/mean_length": 603.40625, "completions/mean_terminated_length": 551.9863891601562, "completions/min_length": 24.0, "completions/min_terminated_length": 24.0, "epoch": 12.83989501312336, "grad_norm": 0.138671875, "learning_rate": 1e-06, "loss": -0.0017, "num_tokens": 800092716.0, "reward": 0.5926339626312256, "reward_std": 0.19892321527004242, "rewards/verify_math_reward/mean": 0.5926339030265808, "rewards/verify_math_reward/std": 0.49161848425865173, "step": 1374 }, { "clip_ratio/high_max": 0.0018305032135685906, "clip_ratio/high_mean": 0.0005841719332693174, "clip_ratio/low_mean": 0.00022793832374645717, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008121102459881513, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 2963.0, "completions/mean_length": 569.2756958007812, "completions/mean_terminated_length": 521.401611328125, "completions/min_length": 14.0, "completions/min_terminated_length": 14.0, "epoch": 12.849227179935841, "grad_norm": 0.12890625, "learning_rate": 1e-06, "loss": -0.0007, "num_tokens": 800645619.0, "reward": 0.6104910969734192, "reward_std": 0.20110353827476501, "rewards/verify_math_reward/mean": 0.6104910969734192, "rewards/verify_math_reward/std": 0.48791125416755676, "step": 1375 }, { "clip_ratio/high_max": 0.0018039065389530151, "clip_ratio/high_mean": 0.0005342165708270841, "clip_ratio/low_mean": 0.00036941456937711337, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009036311448653578, "completions/clipped_ratio": 0.008928571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 2808.0, "completions/mean_length": 594.372802734375, "completions/mean_terminated_length": 562.8265991210938, "completions/min_length": 11.0, "completions/min_terminated_length": 11.0, "epoch": 12.858559346748324, "grad_norm": 0.1298828125, "learning_rate": 1e-06, "loss": 0.0111, "num_tokens": 801233449.0, "reward": 0.5066964626312256, "reward_std": 0.2362728863954544, "rewards/verify_math_reward/mean": 0.5066964030265808, "rewards/verify_math_reward/std": 0.5002344250679016, "step": 1376 }, { "clip_ratio/high_max": 0.00152376087862649, "clip_ratio/high_mean": 0.000517280071562709, "clip_ratio/low_mean": 0.0004199211927016222, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009372012737003388, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 2306.0, "completions/mean_length": 638.482177734375, "completions/mean_terminated_length": 579.6140747070312, "completions/min_length": 83.0, "completions/min_terminated_length": 83.0, "epoch": 12.867891513560805, "grad_norm": 0.1259765625, "learning_rate": 1e-06, "loss": 0.0111, "num_tokens": 801829073.0, "reward": 0.535714328289032, "reward_std": 0.22425049543380737, "rewards/verify_math_reward/mean": 0.5357142686843872, "rewards/verify_math_reward/std": 0.4990014135837555, "step": 1377 }, { "clip_ratio/high_max": 0.0013722723051614594, "clip_ratio/high_mean": 0.000370172837847349, "clip_ratio/low_mean": 0.0003418285873522109, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007120014261090546, "completions/clipped_ratio": 0.022321428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3839.0, "completions/mean_length": 688.7857666015625, "completions/mean_terminated_length": 610.9954223632812, "completions/min_length": 170.0, "completions/min_terminated_length": 170.0, "epoch": 12.877223680373287, "grad_norm": 0.12158203125, "learning_rate": 1e-06, "loss": 0.0028, "num_tokens": 802467361.0, "reward": 0.4799107313156128, "reward_std": 0.19035457074642181, "rewards/verify_math_reward/mean": 0.4799107015132904, "rewards/verify_math_reward/std": 0.4998752772808075, "step": 1378 }, { "clip_ratio/high_max": 0.0017724383887980366, "clip_ratio/high_mean": 0.0005452787702324713, "clip_ratio/low_mean": 0.00040946645935946435, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009547452236802201, "completions/clipped_ratio": 0.010044642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 2504.0, "completions/mean_length": 598.6451416015625, "completions/mean_terminated_length": 563.158935546875, "completions/min_length": 106.0, "completions/min_terminated_length": 106.0, "epoch": 12.886555847185768, "grad_norm": 0.12255859375, "learning_rate": 1e-06, "loss": 0.0089, "num_tokens": 803063875.0, "reward": 0.5546875, "reward_std": 0.2230491042137146, "rewards/verify_math_reward/mean": 0.5546875, "rewards/verify_math_reward/std": 0.4972778558731079, "step": 1379 }, { "clip_ratio/high_max": 0.0019459667928458657, "clip_ratio/high_mean": 0.0006290923111009761, "clip_ratio/low_mean": 0.00042494892102240556, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010540412358750473, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3898.0, "completions/mean_length": 636.7533569335938, "completions/mean_terminated_length": 589.7952880859375, "completions/min_length": 127.0, "completions/min_terminated_length": 127.0, "epoch": 12.89588801399825, "grad_norm": 0.140625, "learning_rate": 1e-06, "loss": 0.0138, "num_tokens": 803671742.0, "reward": 0.5234375, "reward_std": 0.24822258949279785, "rewards/verify_math_reward/mean": 0.5234375, "rewards/verify_math_reward/std": 0.49972933530807495, "step": 1380 }, { "clip_ratio/high_max": 0.001682820825408271, "clip_ratio/high_mean": 0.0005148701404777967, "clip_ratio/low_mean": 0.0002758318174755914, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.000790701953064854, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3577.0, "completions/mean_length": 646.4765625, "completions/mean_terminated_length": 595.6907958984375, "completions/min_length": 153.0, "completions/min_terminated_length": 153.0, "epoch": 12.905220180810732, "grad_norm": 0.111328125, "learning_rate": 1e-06, "loss": 0.0047, "num_tokens": 804295249.0, "reward": 0.5290178656578064, "reward_std": 0.20662352442741394, "rewards/verify_math_reward/mean": 0.5290178656578064, "rewards/verify_math_reward/std": 0.49943602085113525, "step": 1381 }, { "clip_ratio/high_max": 0.0016614306960036629, "clip_ratio/high_mean": 0.0005450017248449512, "clip_ratio/low_mean": 0.0003277860500929819, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008727877766432357, "completions/clipped_ratio": 0.006696428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3049.0, "completions/mean_length": 559.6350708007812, "completions/mean_terminated_length": 535.7943725585938, "completions/min_length": 18.0, "completions/min_terminated_length": 18.0, "epoch": 12.914552347623214, "grad_norm": 0.1298828125, "learning_rate": 1e-06, "loss": -0.007, "num_tokens": 804852650.0, "reward": 0.5892857313156128, "reward_std": 0.20760175585746765, "rewards/verify_math_reward/mean": 0.5892857313156128, "rewards/verify_math_reward/std": 0.49223825335502625, "step": 1382 }, { "clip_ratio/high_max": 0.0017409946558473166, "clip_ratio/high_mean": 0.0005558953894251317, "clip_ratio/low_mean": 0.00034601925892729923, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009019146436912706, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 2556.0, "completions/mean_length": 577.6361694335938, "completions/mean_terminated_length": 529.8756103515625, "completions/min_length": 72.0, "completions/min_terminated_length": 72.0, "epoch": 12.923884514435695, "grad_norm": 0.1337890625, "learning_rate": 1e-06, "loss": 0.0049, "num_tokens": 805418948.0, "reward": 0.6305803656578064, "reward_std": 0.21684300899505615, "rewards/verify_math_reward/mean": 0.6305803656578064, "rewards/verify_math_reward/std": 0.4829172194004059, "step": 1383 }, { "clip_ratio/high_max": 0.001836070126046252, "clip_ratio/high_mean": 0.0005716071525512234, "clip_ratio/low_mean": 0.00031157654245816957, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008831836926219694, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3486.0, "completions/mean_length": 606.5234375, "completions/mean_terminated_length": 559.155029296875, "completions/min_length": 151.0, "completions/min_terminated_length": 151.0, "epoch": 12.933216681248178, "grad_norm": 0.12109375, "learning_rate": 1e-06, "loss": 0.008, "num_tokens": 806005665.0, "reward": 0.5558035969734192, "reward_std": 0.20342515408992767, "rewards/verify_math_reward/mean": 0.5558035969734192, "rewards/verify_math_reward/std": 0.49715372920036316, "step": 1384 }, { "clip_ratio/high_max": 0.0016403847803303506, "clip_ratio/high_mean": 0.0005073323721944689, "clip_ratio/low_mean": 0.00043890280744562915, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009462351745241904, "completions/clipped_ratio": 0.0234375, "completions/max_length": 4096.0, "completions/max_terminated_length": 3239.0, "completions/mean_length": 646.333740234375, "completions/mean_terminated_length": 563.5416870117188, "completions/min_length": 128.0, "completions/min_terminated_length": 128.0, "epoch": 12.942548848060659, "grad_norm": 0.1298828125, "learning_rate": 1e-06, "loss": -0.0047, "num_tokens": 806587972.0, "reward": 0.566964328289032, "reward_std": 0.23066525161266327, "rewards/verify_math_reward/mean": 0.5669642686843872, "rewards/verify_math_reward/std": 0.49577224254608154, "step": 1385 }, { "clip_ratio/high_max": 0.001550680462969467, "clip_ratio/high_mean": 0.0004051927962791524, "clip_ratio/low_mean": 0.0003070536570248805, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007122464498934278, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3795.0, "completions/mean_length": 615.6495971679688, "completions/mean_terminated_length": 564.4099731445312, "completions/min_length": 107.0, "completions/min_terminated_length": 107.0, "epoch": 12.951881014873141, "grad_norm": 0.12109375, "learning_rate": 1e-06, "loss": -0.0001, "num_tokens": 807172562.0, "reward": 0.5245535969734192, "reward_std": 0.1925002932548523, "rewards/verify_math_reward/mean": 0.5245535969734192, "rewards/verify_math_reward/std": 0.4996756613254547, "step": 1386 }, { "clip_ratio/high_max": 0.0013207304273237241, "clip_ratio/high_mean": 0.00037152499521653226, "clip_ratio/low_mean": 0.0002873082819405681, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0006588332821593212, "completions/clipped_ratio": 0.006696428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 2246.0, "completions/mean_length": 544.3917846679688, "completions/mean_terminated_length": 520.4483032226562, "completions/min_length": 125.0, "completions/min_terminated_length": 125.0, "epoch": 12.961213181685622, "grad_norm": 0.12451171875, "learning_rate": 1e-06, "loss": 0.0025, "num_tokens": 807732345.0, "reward": 0.5524553656578064, "reward_std": 0.18080630898475647, "rewards/verify_math_reward/mean": 0.5524553656578064, "rewards/verify_math_reward/std": 0.49751853942871094, "step": 1387 }, { "clip_ratio/high_max": 0.001588572553373524, "clip_ratio/high_mean": 0.00046897146330593387, "clip_ratio/low_mean": 0.0003560639931947662, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008250354630945367, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3299.0, "completions/mean_length": 593.8895263671875, "completions/mean_terminated_length": 542.3295288085938, "completions/min_length": 129.0, "completions/min_terminated_length": 129.0, "epoch": 12.970545348498105, "grad_norm": 0.1298828125, "learning_rate": 1e-06, "loss": 0.0038, "num_tokens": 808298438.0, "reward": 0.520089328289032, "reward_std": 0.20482411980628967, "rewards/verify_math_reward/mean": 0.5200892686843872, "rewards/verify_math_reward/std": 0.4998753070831299, "step": 1388 }, { "clip_ratio/high_max": 0.00182184676759789, "clip_ratio/high_mean": 0.0005334302263690915, "clip_ratio/low_mean": 0.0003179357873932531, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008513660141034052, "completions/clipped_ratio": 0.022321428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3071.0, "completions/mean_length": 615.411865234375, "completions/mean_terminated_length": 535.9463500976562, "completions/min_length": 169.0, "completions/min_terminated_length": 169.0, "epoch": 12.979877515310585, "grad_norm": 0.1279296875, "learning_rate": 1e-06, "loss": 0.0049, "num_tokens": 808859143.0, "reward": 0.5524553656578064, "reward_std": 0.1967414915561676, "rewards/verify_math_reward/mean": 0.5524553656578064, "rewards/verify_math_reward/std": 0.49751853942871094, "step": 1389 }, { "clip_ratio/high_max": 0.001653337037168967, "clip_ratio/high_mean": 0.0005041189310759364, "clip_ratio/low_mean": 0.00041369527457391087, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009178142099699471, "completions/clipped_ratio": 0.0200892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3034.0, "completions/mean_length": 656.328125, "completions/mean_terminated_length": 585.8109741210938, "completions/min_length": 92.0, "completions/min_terminated_length": 92.0, "epoch": 12.989209682123068, "grad_norm": 0.12890625, "learning_rate": 1e-06, "loss": 0.027, "num_tokens": 809462053.0, "reward": 0.5569196939468384, "reward_std": 0.22368991374969482, "rewards/verify_math_reward/mean": 0.5569196343421936, "rewards/verify_math_reward/std": 0.4970270097255707, "step": 1390 }, { "clip_ratio/high_max": 0.0015501457201025914, "clip_ratio/high_mean": 0.000498158777190838, "clip_ratio/low_mean": 0.00030075828976805496, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007989170762812137, "completions/clipped_ratio": 0.011363636363636354, "completions/max_length": 4096.0, "completions/max_terminated_length": 3766.0, "completions/mean_length": 645.76708984375, "completions/mean_terminated_length": 606.1091918945312, "completions/min_length": 163.0, "completions/min_terminated_length": 163.0, "epoch": 12.998541848935549, "grad_norm": 0.11279296875, "learning_rate": 1e-06, "loss": -0.0101, "num_tokens": 810060617.0, "reward": 0.5033482313156128, "reward_std": 0.2123749852180481, "rewards/verify_math_reward/mean": 0.5033482313156128, "rewards/verify_math_reward/std": 0.5002680420875549, "step": 1391 }, { "clip_ratio/high_max": 0.0014908629564160947, "clip_ratio/high_mean": 0.00046155454947438557, "clip_ratio/low_mean": 0.0004029211452234449, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008644757017464144, "completions/clipped_ratio": 0.0200892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3670.0, "completions/mean_length": 638.1596069335938, "completions/mean_terminated_length": 567.2699584960938, "completions/min_length": 124.0, "completions/min_terminated_length": 124.0, "epoch": 13.009332166812483, "grad_norm": 0.1396484375, "learning_rate": 1e-06, "loss": 0.0074, "num_tokens": 810647328.0, "reward": 0.5223214626312256, "reward_std": 0.21372976899147034, "rewards/verify_math_reward/mean": 0.5223214030265808, "rewards/verify_math_reward/std": 0.49978047609329224, "step": 1392 }, { "clip_ratio/high_max": 0.0019903739121218678, "clip_ratio/high_mean": 0.000658585734981898, "clip_ratio/low_mean": 0.00034729962862911634, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001005885374979698, "completions/clipped_ratio": 0.022321428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3891.0, "completions/mean_length": 667.21875, "completions/mean_terminated_length": 588.93603515625, "completions/min_length": 161.0, "completions/min_terminated_length": 161.0, "epoch": 13.018664333624963, "grad_norm": 0.12158203125, "learning_rate": 1e-06, "loss": -0.0045, "num_tokens": 811242604.0, "reward": 0.5848214626312256, "reward_std": 0.2144126296043396, "rewards/verify_math_reward/mean": 0.5848214030265808, "rewards/verify_math_reward/std": 0.49302801489830017, "step": 1393 }, { "clip_ratio/high_max": 0.0017515275121695595, "clip_ratio/high_mean": 0.0005732004801757284, "clip_ratio/low_mean": 0.000437591222635092, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010107917132700095, "completions/clipped_ratio": 0.010044642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3998.0, "completions/mean_length": 641.200927734375, "completions/mean_terminated_length": 606.1465454101562, "completions/min_length": 135.0, "completions/min_terminated_length": 135.0, "epoch": 13.027996500437446, "grad_norm": 0.1376953125, "learning_rate": 1e-06, "loss": 0.0104, "num_tokens": 811870784.0, "reward": 0.4810267984867096, "reward_std": 0.24164429306983948, "rewards/verify_math_reward/mean": 0.4810267984867096, "rewards/verify_math_reward/std": 0.49991896748542786, "step": 1394 }, { "clip_ratio/high_max": 0.0017871686850412516, "clip_ratio/high_mean": 0.0005313890735578752, "clip_ratio/low_mean": 0.00039288076459342847, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.000924269836559688, "completions/clipped_ratio": 0.010044642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 2221.0, "completions/mean_length": 547.8861694335938, "completions/mean_terminated_length": 511.8849792480469, "completions/min_length": 90.0, "completions/min_terminated_length": 90.0, "epoch": 13.037328667249927, "grad_norm": 0.12890625, "learning_rate": 1e-06, "loss": 0.001, "num_tokens": 812412354.0, "reward": 0.5736607313156128, "reward_std": 0.18922775983810425, "rewards/verify_math_reward/mean": 0.5736607313156128, "rewards/verify_math_reward/std": 0.4948205351829529, "step": 1395 }, { "clip_ratio/high_max": 0.0012242884558872902, "clip_ratio/high_mean": 0.00033330466567349504, "clip_ratio/low_mean": 0.0003139990266163295, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0006473036974057322, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3274.0, "completions/mean_length": 601.6160888671875, "completions/mean_terminated_length": 538.081787109375, "completions/min_length": 144.0, "completions/min_terminated_length": 144.0, "epoch": 13.04666083406241, "grad_norm": 0.11767578125, "learning_rate": 1e-06, "loss": 0.0004, "num_tokens": 812977906.0, "reward": 0.5234375, "reward_std": 0.16010887920856476, "rewards/verify_math_reward/mean": 0.5234375, "rewards/verify_math_reward/std": 0.49972933530807495, "step": 1396 }, { "clip_ratio/high_max": 0.0014899282905389555, "clip_ratio/high_mean": 0.00045767979509037104, "clip_ratio/low_mean": 0.0003265507307332882, "clip_ratio/low_min": 1.0434057003294583e-05, "clip_ratio/region_mean": 0.0007842305321901222, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 2518.0, "completions/mean_length": 578.6585083007812, "completions/mean_terminated_length": 534.9401245117188, "completions/min_length": 117.0, "completions/min_terminated_length": 117.0, "epoch": 13.05599300087489, "grad_norm": 0.123046875, "learning_rate": 1e-06, "loss": 0.0131, "num_tokens": 813544280.0, "reward": 0.6026785969734192, "reward_std": 0.21176443994045258, "rewards/verify_math_reward/mean": 0.6026785969734192, "rewards/verify_math_reward/std": 0.48961687088012695, "step": 1397 }, { "clip_ratio/high_max": 0.00198770187489572, "clip_ratio/high_mean": 0.0006603341907975846, "clip_ratio/low_mean": 0.0004361434478141746, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010964776374748908, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 2008.0, "completions/mean_length": 573.0736694335938, "completions/mean_terminated_length": 533.3115234375, "completions/min_length": 142.0, "completions/min_terminated_length": 142.0, "epoch": 13.065325167687373, "grad_norm": 0.1611328125, "learning_rate": 1e-06, "loss": -0.0036, "num_tokens": 814094682.0, "reward": 0.6049107313156128, "reward_std": 0.26373884081840515, "rewards/verify_math_reward/mean": 0.6049107313156128, "rewards/verify_math_reward/std": 0.48914292454719543, "step": 1398 }, { "clip_ratio/high_max": 0.0017021954217852908, "clip_ratio/high_mean": 0.0005294370050705766, "clip_ratio/low_mean": 0.0003776409980673634, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009070780097317765, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 3772.0, "completions/mean_length": 606.1317138671875, "completions/mean_terminated_length": 562.7548217773438, "completions/min_length": 175.0, "completions/min_terminated_length": 175.0, "epoch": 13.074657334499854, "grad_norm": 0.1396484375, "learning_rate": 1e-06, "loss": 0.0073, "num_tokens": 814681144.0, "reward": 0.546875, "reward_std": 0.21835143864154816, "rewards/verify_math_reward/mean": 0.546875, "rewards/verify_math_reward/std": 0.4980759024620056, "step": 1399 }, { "clip_ratio/high_max": 0.0017483852880104678, "clip_ratio/high_mean": 0.0004886082367647759, "clip_ratio/low_mean": 0.0003104416707628843, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007990499070729129, "completions/clipped_ratio": 0.025669642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3630.0, "completions/mean_length": 634.896240234375, "completions/mean_terminated_length": 543.710205078125, "completions/min_length": 3.0, "completions/min_terminated_length": 3.0, "epoch": 13.083989501312336, "grad_norm": 0.1298828125, "learning_rate": 1e-06, "loss": -0.0028, "num_tokens": 815252739.0, "reward": 0.5625, "reward_std": 0.22161512076854706, "rewards/verify_math_reward/mean": 0.5625, "rewards/verify_math_reward/std": 0.49635544419288635, "step": 1400 }, { "clip_ratio/high_max": 0.0018322273699595826, "clip_ratio/high_mean": 0.0005806700842185819, "clip_ratio/low_mean": 0.0003205304074072046, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009012004911710392, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 3697.0, "completions/mean_length": 640.1529541015625, "completions/mean_terminated_length": 597.1989135742188, "completions/min_length": 128.0, "completions/min_terminated_length": 128.0, "epoch": 13.093321668124817, "grad_norm": 0.126953125, "learning_rate": 1e-06, "loss": 0.0086, "num_tokens": 815863092.0, "reward": 0.5558035969734192, "reward_std": 0.2427729368209839, "rewards/verify_math_reward/mean": 0.5558035969734192, "rewards/verify_math_reward/std": 0.49715372920036316, "step": 1401 }, { "clip_ratio/high_max": 0.0016442310152342543, "clip_ratio/high_mean": 0.0005161672067970358, "clip_ratio/low_mean": 0.00031519197102625185, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008313591815749533, "completions/clipped_ratio": 0.0078125, "completions/max_length": 4096.0, "completions/max_terminated_length": 3790.0, "completions/mean_length": 600.4319458007812, "completions/mean_terminated_length": 572.9077758789062, "completions/min_length": 137.0, "completions/min_terminated_length": 137.0, "epoch": 13.1026538349373, "grad_norm": 0.12158203125, "learning_rate": 1e-06, "loss": 0.0045, "num_tokens": 816464631.0, "reward": 0.5993303656578064, "reward_std": 0.1950497031211853, "rewards/verify_math_reward/mean": 0.5993303656578064, "rewards/verify_math_reward/std": 0.49030786752700806, "step": 1402 }, { "clip_ratio/high_max": 0.001461252688386594, "clip_ratio/high_mean": 0.0003993860646005487, "clip_ratio/low_mean": 0.00027730073372822517, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0006766867950318556, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3808.0, "completions/mean_length": 595.8359375, "completions/mean_terminated_length": 544.3046264648438, "completions/min_length": 40.0, "completions/min_terminated_length": 40.0, "epoch": 13.11198600174978, "grad_norm": 0.1103515625, "learning_rate": 1e-06, "loss": -0.0031, "num_tokens": 817040612.0, "reward": 0.5223214626312256, "reward_std": 0.17084869742393494, "rewards/verify_math_reward/mean": 0.5223214030265808, "rewards/verify_math_reward/std": 0.49978047609329224, "step": 1403 }, { "clip_ratio/high_max": 0.0015426845184265403, "clip_ratio/high_mean": 0.0004398583009788126, "clip_ratio/low_mean": 0.00029567180195044784, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007355300995186553, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 2705.0, "completions/mean_length": 625.4710083007812, "completions/mean_terminated_length": 570.3832397460938, "completions/min_length": 128.0, "completions/min_terminated_length": 128.0, "epoch": 13.121318168562263, "grad_norm": 0.130859375, "learning_rate": 1e-06, "loss": 0.006, "num_tokens": 817635210.0, "reward": 0.5323660969734192, "reward_std": 0.20688749849796295, "rewards/verify_math_reward/mean": 0.5323660969734192, "rewards/verify_math_reward/std": 0.4992299973964691, "step": 1404 }, { "clip_ratio/high_max": 0.0011363478852217668, "clip_ratio/high_mean": 0.00033415246343793115, "clip_ratio/low_mean": 0.0003630368685207941, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0006971893290028675, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 4014.0, "completions/mean_length": 694.427490234375, "completions/mean_terminated_length": 636.511962890625, "completions/min_length": 139.0, "completions/min_terminated_length": 139.0, "epoch": 13.130650335374744, "grad_norm": 0.1083984375, "learning_rate": 1e-06, "loss": 0.0132, "num_tokens": 818291897.0, "reward": 0.4497767984867096, "reward_std": 0.19058279693126678, "rewards/verify_math_reward/mean": 0.4497767984867096, "rewards/verify_math_reward/std": 0.49774909019470215, "step": 1405 }, { "clip_ratio/high_max": 0.0014495681762127788, "clip_ratio/high_mean": 0.00044815292858402245, "clip_ratio/low_mean": 0.0003247402498800511, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.000772893182329426, "completions/clipped_ratio": 0.025669642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3978.0, "completions/mean_length": 674.888427734375, "completions/mean_terminated_length": 584.7560424804688, "completions/min_length": 72.0, "completions/min_terminated_length": 72.0, "epoch": 13.139982502187227, "grad_norm": 0.12060546875, "learning_rate": 1e-06, "loss": 0.0005, "num_tokens": 818896533.0, "reward": 0.5502232313156128, "reward_std": 0.20932631194591522, "rewards/verify_math_reward/mean": 0.5502232313156128, "rewards/verify_math_reward/std": 0.49774909019470215, "step": 1406 }, { "clip_ratio/high_max": 0.0020581455355568323, "clip_ratio/high_mean": 0.0005896722814213717, "clip_ratio/low_mean": 0.0003218375377400662, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009115098077927541, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 2483.0, "completions/mean_length": 592.114990234375, "completions/mean_terminated_length": 536.4977416992188, "completions/min_length": 125.0, "completions/min_terminated_length": 125.0, "epoch": 13.149314668999708, "grad_norm": 0.130859375, "learning_rate": 1e-06, "loss": 0.0038, "num_tokens": 819450604.0, "reward": 0.5725446939468384, "reward_std": 0.21711428463459015, "rewards/verify_math_reward/mean": 0.5725446343421936, "rewards/verify_math_reward/std": 0.49498558044433594, "step": 1407 }, { "clip_ratio/high_max": 0.00174430378319812, "clip_ratio/high_mean": 0.00046465968171105487, "clip_ratio/low_mean": 0.0003192824785855919, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007839421646167466, "completions/clipped_ratio": 0.030133928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 2591.0, "completions/mean_length": 665.7957763671875, "completions/mean_terminated_length": 559.2186279296875, "completions/min_length": 17.0, "completions/min_terminated_length": 17.0, "epoch": 13.15864683581219, "grad_norm": 0.12890625, "learning_rate": 1e-06, "loss": 0.0086, "num_tokens": 820030469.0, "reward": 0.5223214626312256, "reward_std": 0.19602863490581512, "rewards/verify_math_reward/mean": 0.5223214030265808, "rewards/verify_math_reward/std": 0.49978047609329224, "step": 1408 }, { "clip_ratio/high_max": 0.0016913519211811945, "clip_ratio/high_mean": 0.0004851412346624784, "clip_ratio/low_mean": 0.0002790674901689272, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007642087211934268, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3897.0, "completions/mean_length": 587.4342041015625, "completions/mean_terminated_length": 535.7791748046875, "completions/min_length": 2.0, "completions/min_terminated_length": 2.0, "epoch": 13.167979002624673, "grad_norm": 0.1201171875, "learning_rate": 1e-06, "loss": 0.0024, "num_tokens": 820610154.0, "reward": 0.4877232313156128, "reward_std": 0.17490725219249725, "rewards/verify_math_reward/mean": 0.4877232015132904, "rewards/verify_math_reward/std": 0.500128448009491, "step": 1409 }, { "clip_ratio/high_max": 0.0017227934913535137, "clip_ratio/high_mean": 0.0005312004236657231, "clip_ratio/low_mean": 0.0003712047946464736, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009024052496897639, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 2800.0, "completions/mean_length": 584.6261596679688, "completions/mean_terminated_length": 532.9297485351562, "completions/min_length": 112.0, "completions/min_terminated_length": 112.0, "epoch": 13.177311169437154, "grad_norm": 0.1328125, "learning_rate": 1e-06, "loss": -0.0039, "num_tokens": 821174931.0, "reward": 0.6350446939468384, "reward_std": 0.20339377224445343, "rewards/verify_math_reward/mean": 0.6350446343421936, "rewards/verify_math_reward/std": 0.481686532497406, "step": 1410 }, { "clip_ratio/high_max": 0.001779132331648725, "clip_ratio/high_mean": 0.0005360412642403389, "clip_ratio/low_mean": 0.0004373191009108268, "clip_ratio/low_min": 1.0632867997628637e-05, "clip_ratio/region_mean": 0.0009733603583299555, "completions/clipped_ratio": 0.0189732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3741.0, "completions/mean_length": 596.0971069335938, "completions/mean_terminated_length": 528.4083862304688, "completions/min_length": 42.0, "completions/min_terminated_length": 42.0, "epoch": 13.186643336249636, "grad_norm": 0.140625, "learning_rate": 1e-06, "loss": 0.0015, "num_tokens": 821723786.0, "reward": 0.5892857313156128, "reward_std": 0.23736734688282013, "rewards/verify_math_reward/mean": 0.5892857313156128, "rewards/verify_math_reward/std": 0.49223825335502625, "step": 1411 }, { "clip_ratio/high_max": 0.0015953919264575234, "clip_ratio/high_mean": 0.0004669561702712599, "clip_ratio/low_mean": 0.0004391115152202474, "clip_ratio/low_min": 1.5401676137116738e-05, "clip_ratio/region_mean": 0.0009060676734407025, "completions/clipped_ratio": 0.010044642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 2364.0, "completions/mean_length": 606.1015625, "completions/mean_terminated_length": 570.6910400390625, "completions/min_length": 45.0, "completions/min_terminated_length": 45.0, "epoch": 13.195975503062117, "grad_norm": 0.126953125, "learning_rate": 1e-06, "loss": 0.0017, "num_tokens": 822324837.0, "reward": 0.470982164144516, "reward_std": 0.22590160369873047, "rewards/verify_math_reward/mean": 0.4709821343421936, "rewards/verify_math_reward/std": 0.49943602085113525, "step": 1412 }, { "clip_ratio/high_max": 0.0014832056222076062, "clip_ratio/high_mean": 0.00043488904896094027, "clip_ratio/low_mean": 0.0003625766225923144, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007974656627993681, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 4049.0, "completions/mean_length": 597.9230346679688, "completions/mean_terminated_length": 558.4413452148438, "completions/min_length": 132.0, "completions/min_terminated_length": 132.0, "epoch": 13.2053076698746, "grad_norm": 0.1328125, "learning_rate": 1e-06, "loss": 0.0152, "num_tokens": 822908888.0, "reward": 0.5569196939468384, "reward_std": 0.21504385769367218, "rewards/verify_math_reward/mean": 0.5569196343421936, "rewards/verify_math_reward/std": 0.4970270097255707, "step": 1413 }, { "clip_ratio/high_max": 0.0016544329637326882, "clip_ratio/high_mean": 0.0005267449384973588, "clip_ratio/low_mean": 0.000406088698809981, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009328336482212762, "completions/clipped_ratio": 0.0189732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 2479.0, "completions/mean_length": 630.1160888671875, "completions/mean_terminated_length": 563.0853271484375, "completions/min_length": 98.0, "completions/min_terminated_length": 98.0, "epoch": 13.21463983668708, "grad_norm": 0.150390625, "learning_rate": 1e-06, "loss": -0.0033, "num_tokens": 823492536.0, "reward": 0.5446428656578064, "reward_std": 0.24904923141002655, "rewards/verify_math_reward/mean": 0.5446428656578064, "rewards/verify_math_reward/std": 0.4982811510562897, "step": 1414 }, { "clip_ratio/high_max": 0.0016178957084775902, "clip_ratio/high_mean": 0.0004918382628602558, "clip_ratio/low_mean": 0.00030420989901358553, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007960481489135418, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3425.0, "completions/mean_length": 647.59375, "completions/mean_terminated_length": 588.880859375, "completions/min_length": 144.0, "completions/min_terminated_length": 144.0, "epoch": 13.223972003499563, "grad_norm": 0.11962890625, "learning_rate": 1e-06, "loss": 0.0033, "num_tokens": 824107836.0, "reward": 0.5736607313156128, "reward_std": 0.1931743025779724, "rewards/verify_math_reward/mean": 0.5736607313156128, "rewards/verify_math_reward/std": 0.4948205351829529, "step": 1415 }, { "clip_ratio/high_max": 0.0018230609248348628, "clip_ratio/high_mean": 0.000527394566233852, "clip_ratio/low_mean": 0.0003095360673341929, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008369306497115758, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 3961.0, "completions/mean_length": 553.671875, "completions/mean_terminated_length": 509.6429443359375, "completions/min_length": 120.0, "completions/min_terminated_length": 120.0, "epoch": 13.233304170312044, "grad_norm": 0.1396484375, "learning_rate": 1e-06, "loss": -0.0047, "num_tokens": 824654734.0, "reward": 0.5569196939468384, "reward_std": 0.19846926629543304, "rewards/verify_math_reward/mean": 0.5569196343421936, "rewards/verify_math_reward/std": 0.4970270097255707, "step": 1416 }, { "clip_ratio/high_max": 0.0016410772277595242, "clip_ratio/high_mean": 0.0005012257963699085, "clip_ratio/low_mean": 0.00030725744647952524, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008084832384156471, "completions/clipped_ratio": 0.005580357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 2627.0, "completions/mean_length": 591.833740234375, "completions/mean_terminated_length": 572.1694946289062, "completions/min_length": 26.0, "completions/min_terminated_length": 26.0, "epoch": 13.242636337124527, "grad_norm": 0.1220703125, "learning_rate": 1e-06, "loss": 0.0046, "num_tokens": 825244777.0, "reward": 0.5758928656578064, "reward_std": 0.20064888894557953, "rewards/verify_math_reward/mean": 0.5758928656578064, "rewards/verify_math_reward/std": 0.49448275566101074, "step": 1417 }, { "clip_ratio/high_max": 0.0015912118806227227, "clip_ratio/high_mean": 0.00045222694188851165, "clip_ratio/low_mean": 0.0002968669411416158, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007490938860428287, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 2554.0, "completions/mean_length": 616.5569458007812, "completions/mean_terminated_length": 565.3306884765625, "completions/min_length": 118.0, "completions/min_terminated_length": 118.0, "epoch": 13.251968503937007, "grad_norm": 0.1103515625, "learning_rate": 1e-06, "loss": 0.0106, "num_tokens": 825837188.0, "reward": 0.5558035969734192, "reward_std": 0.18659871816635132, "rewards/verify_math_reward/mean": 0.5558035969734192, "rewards/verify_math_reward/std": 0.49715372920036316, "step": 1418 }, { "clip_ratio/high_max": 0.001553700647491496, "clip_ratio/high_mean": 0.0005001623684393053, "clip_ratio/low_mean": 0.0003352726730554423, "clip_ratio/low_min": 1.1007397006324027e-05, "clip_ratio/region_mean": 0.0008354350493391394, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 2727.0, "completions/mean_length": 637.638427734375, "completions/mean_terminated_length": 594.6531372070312, "completions/min_length": 97.0, "completions/min_terminated_length": 97.0, "epoch": 13.26130067074949, "grad_norm": 0.12060546875, "learning_rate": 1e-06, "loss": 0.0104, "num_tokens": 826452280.0, "reward": 0.543526828289032, "reward_std": 0.21951329708099365, "rewards/verify_math_reward/mean": 0.5435267686843872, "rewards/verify_math_reward/std": 0.49838000535964966, "step": 1419 }, { "clip_ratio/high_max": 0.0013382088554863003, "clip_ratio/high_mean": 0.0004431610839219502, "clip_ratio/low_mean": 0.0003010817285939993, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007442428141075652, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3921.0, "completions/mean_length": 586.2745971679688, "completions/mean_terminated_length": 522.4613647460938, "completions/min_length": 157.0, "completions/min_terminated_length": 157.0, "epoch": 13.27063283756197, "grad_norm": 0.1337890625, "learning_rate": 1e-06, "loss": -0.0048, "num_tokens": 827006926.0, "reward": 0.6316964626312256, "reward_std": 0.2113974541425705, "rewards/verify_math_reward/mean": 0.6316964030265808, "rewards/verify_math_reward/std": 0.4826137125492096, "step": 1420 }, { "clip_ratio/high_max": 0.0016555947495362489, "clip_ratio/high_mean": 0.00048758478237687086, "clip_ratio/low_mean": 0.00029536799843299377, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007829527758076438, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3334.0, "completions/mean_length": 552.4810791015625, "completions/mean_terminated_length": 504.37896728515625, "completions/min_length": 80.0, "completions/min_terminated_length": 80.0, "epoch": 13.279965004374453, "grad_norm": 0.12255859375, "learning_rate": 1e-06, "loss": -0.0096, "num_tokens": 827533645.0, "reward": 0.5625, "reward_std": 0.1928700953722, "rewards/verify_math_reward/mean": 0.5625, "rewards/verify_math_reward/std": 0.49635544419288635, "step": 1421 }, { "clip_ratio/high_max": 0.00183762901724549, "clip_ratio/high_mean": 0.0005509638019702834, "clip_ratio/low_mean": 0.0003238330432395742, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008747968468014733, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3772.0, "completions/mean_length": 615.818115234375, "completions/mean_terminated_length": 556.5641479492188, "completions/min_length": 70.0, "completions/min_terminated_length": 70.0, "epoch": 13.289297171186934, "grad_norm": 0.11962890625, "learning_rate": 1e-06, "loss": -0.0046, "num_tokens": 828116050.0, "reward": 0.5446428656578064, "reward_std": 0.20658259093761444, "rewards/verify_math_reward/mean": 0.5446428656578064, "rewards/verify_math_reward/std": 0.4982811510562897, "step": 1422 }, { "clip_ratio/high_max": 0.001617481862922432, "clip_ratio/high_mean": 0.0004946846997881948, "clip_ratio/low_mean": 0.00028170491623313865, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.000776389628299512, "completions/clipped_ratio": 0.0200892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3567.0, "completions/mean_length": 621.8560791015625, "completions/mean_terminated_length": 550.6321411132812, "completions/min_length": 95.0, "completions/min_terminated_length": 95.0, "epoch": 13.298629337999417, "grad_norm": 0.1279296875, "learning_rate": 1e-06, "loss": 0.0096, "num_tokens": 828688833.0, "reward": 0.59375, "reward_std": 0.21330790221691132, "rewards/verify_math_reward/mean": 0.59375, "rewards/verify_math_reward/std": 0.4914066195487976, "step": 1423 }, { "clip_ratio/high_max": 0.001688760930846911, "clip_ratio/high_mean": 0.0005318934149727284, "clip_ratio/low_mean": 0.0003530990504714282, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008849924656715302, "completions/clipped_ratio": 0.004464285714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 3614.0, "completions/mean_length": 588.5614013671875, "completions/mean_terminated_length": 572.8330078125, "completions/min_length": 106.0, "completions/min_terminated_length": 106.0, "epoch": 13.307961504811898, "grad_norm": 0.134765625, "learning_rate": 1e-06, "loss": 0.0083, "num_tokens": 829293224.0, "reward": 0.5212053656578064, "reward_std": 0.22063983976840973, "rewards/verify_math_reward/mean": 0.5212053656578064, "rewards/verify_math_reward/std": 0.49982914328575134, "step": 1424 }, { "clip_ratio/high_max": 0.0016055243195296498, "clip_ratio/high_mean": 0.00042092477588084876, "clip_ratio/low_mean": 0.0003820010576873756, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008029258333408507, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3871.0, "completions/mean_length": 624.0982666015625, "completions/mean_terminated_length": 572.9829711914062, "completions/min_length": 129.0, "completions/min_terminated_length": 129.0, "epoch": 13.31729367162438, "grad_norm": 0.12353515625, "learning_rate": 1e-06, "loss": 0.0233, "num_tokens": 829879392.0, "reward": 0.4665178656578064, "reward_std": 0.19441214203834534, "rewards/verify_math_reward/mean": 0.4665178656578064, "rewards/verify_math_reward/std": 0.49915632605552673, "step": 1425 }, { "clip_ratio/high_max": 0.001567191261528933, "clip_ratio/high_mean": 0.000487123599214101, "clip_ratio/low_mean": 0.0003807999275977636, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008679235115778283, "completions/clipped_ratio": 0.006696428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 2740.0, "completions/mean_length": 571.1964721679688, "completions/mean_terminated_length": 547.4337158203125, "completions/min_length": 95.0, "completions/min_terminated_length": 95.0, "epoch": 13.326625838436861, "grad_norm": 0.123046875, "learning_rate": 1e-06, "loss": 0.0068, "num_tokens": 830451184.0, "reward": 0.578125, "reward_std": 0.19625869393348694, "rewards/verify_math_reward/mean": 0.578125, "rewards/verify_math_reward/std": 0.4941346049308777, "step": 1426 }, { "clip_ratio/high_max": 0.0018317081994609907, "clip_ratio/high_mean": 0.0005963384990081977, "clip_ratio/low_mean": 0.00035139673855155706, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009477352523390437, "completions/clipped_ratio": 0.022321428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3920.0, "completions/mean_length": 665.9609375, "completions/mean_terminated_length": 587.6495361328125, "completions/min_length": 89.0, "completions/min_terminated_length": 89.0, "epoch": 13.335958005249344, "grad_norm": 0.1328125, "learning_rate": 1e-06, "loss": -0.0229, "num_tokens": 831052829.0, "reward": 0.5368303656578064, "reward_std": 0.23206490278244019, "rewards/verify_math_reward/mean": 0.5368303656578064, "rewards/verify_math_reward/std": 0.49892017245292664, "step": 1427 }, { "clip_ratio/high_max": 0.0015894861026026774, "clip_ratio/high_mean": 0.00048236274869850604, "clip_ratio/low_mean": 0.0003868053936457727, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008691681387062999, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 4095.0, "completions/mean_length": 627.0859375, "completions/mean_terminated_length": 579.9966430664062, "completions/min_length": 130.0, "completions/min_terminated_length": 130.0, "epoch": 13.345290172061826, "grad_norm": 0.12353515625, "learning_rate": 1e-06, "loss": 0.0038, "num_tokens": 831653826.0, "reward": 0.5424107313156128, "reward_std": 0.22409965097904205, "rewards/verify_math_reward/mean": 0.5424107313156128, "rewards/verify_math_reward/std": 0.4984763264656067, "step": 1428 }, { "clip_ratio/high_max": 0.001791535582015058, "clip_ratio/high_mean": 0.0005492749760378501, "clip_ratio/low_mean": 0.00029483045170763944, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008441054242211976, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3727.0, "completions/mean_length": 574.4721069335938, "completions/mean_terminated_length": 522.6262817382812, "completions/min_length": 114.0, "completions/min_terminated_length": 114.0, "epoch": 13.354622338874307, "grad_norm": 0.134765625, "learning_rate": 1e-06, "loss": 0.0092, "num_tokens": 832197241.0, "reward": 0.6104910969734192, "reward_std": 0.1893460899591446, "rewards/verify_math_reward/mean": 0.6104910969734192, "rewards/verify_math_reward/std": 0.48791128396987915, "step": 1429 }, { "clip_ratio/high_max": 0.0019983718993898947, "clip_ratio/high_mean": 0.0005938726353633683, "clip_ratio/low_mean": 0.00038011708011254086, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009739897222971194, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 2997.0, "completions/mean_length": 597.3225708007812, "completions/mean_terminated_length": 545.8131103515625, "completions/min_length": 130.0, "completions/min_terminated_length": 130.0, "epoch": 13.36395450568679, "grad_norm": 0.130859375, "learning_rate": 1e-06, "loss": 0.0015, "num_tokens": 832759258.0, "reward": 0.582589328289032, "reward_std": 0.21831823885440826, "rewards/verify_math_reward/mean": 0.5825892686843872, "rewards/verify_math_reward/std": 0.493407279253006, "step": 1430 }, { "clip_ratio/high_max": 0.0020299209836593946, "clip_ratio/high_mean": 0.000690493546471771, "clip_ratio/low_mean": 0.0003531034808474942, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010435970270918915, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 2821.0, "completions/mean_length": 543.1473388671875, "completions/mean_terminated_length": 503.04742431640625, "completions/min_length": 83.0, "completions/min_terminated_length": 83.0, "epoch": 13.37328667249927, "grad_norm": 0.14453125, "learning_rate": 1e-06, "loss": 0.0112, "num_tokens": 833300054.0, "reward": 0.598214328289032, "reward_std": 0.21846021711826324, "rewards/verify_math_reward/mean": 0.5982142686843872, "rewards/verify_math_reward/std": 0.49053290486335754, "step": 1431 }, { "clip_ratio/high_max": 0.0014582962166969082, "clip_ratio/high_mean": 0.0004073754737419222, "clip_ratio/low_mean": 0.0004089128991608959, "clip_ratio/low_min": 1.6267569662886672e-05, "clip_ratio/region_mean": 0.0008162883814293309, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 4060.0, "completions/mean_length": 604.2421875, "completions/mean_terminated_length": 564.8318481445312, "completions/min_length": 25.0, "completions/min_terminated_length": 25.0, "epoch": 13.382618839311753, "grad_norm": 0.1279296875, "learning_rate": 1e-06, "loss": 0.0026, "num_tokens": 833892503.0, "reward": 0.5524553656578064, "reward_std": 0.2141755372285843, "rewards/verify_math_reward/mean": 0.5524553656578064, "rewards/verify_math_reward/std": 0.49751853942871094, "step": 1432 }, { "clip_ratio/high_max": 0.0016004578310457873, "clip_ratio/high_mean": 0.0005155829067007289, "clip_ratio/low_mean": 0.00028066771028534276, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007962506188050611, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3435.0, "completions/mean_length": 611.8671875, "completions/mean_terminated_length": 548.519287109375, "completions/min_length": 163.0, "completions/min_terminated_length": 163.0, "epoch": 13.391951006124234, "grad_norm": 0.1220703125, "learning_rate": 1e-06, "loss": 0.0063, "num_tokens": 834469672.0, "reward": 0.5803571939468384, "reward_std": 0.20001837611198425, "rewards/verify_math_reward/mean": 0.5803571343421936, "rewards/verify_math_reward/std": 0.4937761425971985, "step": 1433 }, { "clip_ratio/high_max": 0.0018618465674080653, "clip_ratio/high_mean": 0.0005791701880752953, "clip_ratio/low_mean": 0.00035893432254852087, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009381045229019946, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3498.0, "completions/mean_length": 556.9710083007812, "completions/mean_terminated_length": 504.86749267578125, "completions/min_length": 111.0, "completions/min_terminated_length": 111.0, "epoch": 13.401283172936717, "grad_norm": 0.1376953125, "learning_rate": 1e-06, "loss": 0.0184, "num_tokens": 835007846.0, "reward": 0.6171875, "reward_std": 0.20035536587238312, "rewards/verify_math_reward/mean": 0.6171875, "rewards/verify_math_reward/std": 0.4863446056842804, "step": 1434 }, { "clip_ratio/high_max": 0.0018208273340860615, "clip_ratio/high_mean": 0.0005466261397941707, "clip_ratio/low_mean": 0.0003893457421781932, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009359718769701431, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3442.0, "completions/mean_length": 598.4933471679688, "completions/mean_terminated_length": 547.0010986328125, "completions/min_length": 172.0, "completions/min_terminated_length": 172.0, "epoch": 13.410615339749198, "grad_norm": 0.1259765625, "learning_rate": 1e-06, "loss": 0.0017, "num_tokens": 835570056.0, "reward": 0.5892857313156128, "reward_std": 0.20816446840763092, "rewards/verify_math_reward/mean": 0.5892857313156128, "rewards/verify_math_reward/std": 0.49223825335502625, "step": 1435 }, { "clip_ratio/high_max": 0.002047443776973523, "clip_ratio/high_mean": 0.0006194652796693845, "clip_ratio/low_mean": 0.00033381256344000576, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009532778394714114, "completions/clipped_ratio": 0.0200892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3837.0, "completions/mean_length": 628.4654541015625, "completions/mean_terminated_length": 557.3770141601562, "completions/min_length": 74.0, "completions/min_terminated_length": 74.0, "epoch": 13.41994750656168, "grad_norm": 0.1279296875, "learning_rate": 1e-06, "loss": -0.0007, "num_tokens": 836158121.0, "reward": 0.5535714626312256, "reward_std": 0.2154536247253418, "rewards/verify_math_reward/mean": 0.5535714030265808, "rewards/verify_math_reward/std": 0.4973994791507721, "step": 1436 }, { "clip_ratio/high_max": 0.0015440452807524707, "clip_ratio/high_mean": 0.0005259499757812591, "clip_ratio/low_mean": 0.0003068337643981067, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008327837385877501, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3063.0, "completions/mean_length": 630.5803833007812, "completions/mean_terminated_length": 583.5385131835938, "completions/min_length": 79.0, "completions/min_terminated_length": 79.0, "epoch": 13.429279673374161, "grad_norm": 0.11669921875, "learning_rate": 1e-06, "loss": 0.0058, "num_tokens": 836760785.0, "reward": 0.551339328289032, "reward_std": 0.2310425192117691, "rewards/verify_math_reward/mean": 0.5513392686843872, "rewards/verify_math_reward/std": 0.4976350665092468, "step": 1437 }, { "clip_ratio/high_max": 0.0014194907153068925, "clip_ratio/high_mean": 0.00044470407328844885, "clip_ratio/low_mean": 0.00031971871101177385, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.000764422770771489, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3281.0, "completions/mean_length": 586.0111694335938, "completions/mean_terminated_length": 530.2970581054688, "completions/min_length": 83.0, "completions/min_terminated_length": 83.0, "epoch": 13.438611840186644, "grad_norm": 0.119140625, "learning_rate": 1e-06, "loss": 0.0006, "num_tokens": 837320075.0, "reward": 0.5948660969734192, "reward_std": 0.19204454123973846, "rewards/verify_math_reward/mean": 0.5948660969734192, "rewards/verify_math_reward/std": 0.49119213223457336, "step": 1438 }, { "clip_ratio/high_max": 0.0017518545373604866, "clip_ratio/high_mean": 0.0004434636229007083, "clip_ratio/low_mean": 0.00035267474231659435, "clip_ratio/low_min": 1.033741318678949e-05, "clip_ratio/region_mean": 0.0007961383798829047, "completions/clipped_ratio": 0.021205357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3600.0, "completions/mean_length": 645.625, "completions/mean_terminated_length": 570.8734130859375, "completions/min_length": 11.0, "completions/min_terminated_length": 11.0, "epoch": 13.447944006999125, "grad_norm": 0.1201171875, "learning_rate": 1e-06, "loss": -0.0013, "num_tokens": 837916267.0, "reward": 0.5111607313156128, "reward_std": 0.18829189240932465, "rewards/verify_math_reward/mean": 0.5111607313156128, "rewards/verify_math_reward/std": 0.5001546144485474, "step": 1439 }, { "clip_ratio/high_max": 0.001641486342123244, "clip_ratio/high_mean": 0.0005413385501924495, "clip_ratio/low_mean": 0.00036927487428783934, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009106134311878122, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 2109.0, "completions/mean_length": 612.4096069335938, "completions/mean_terminated_length": 573.0914306640625, "completions/min_length": 136.0, "completions/min_terminated_length": 136.0, "epoch": 13.457276173811607, "grad_norm": 0.125, "learning_rate": 1e-06, "loss": 0.0213, "num_tokens": 838507378.0, "reward": 0.5870535969734192, "reward_std": 0.21797415614128113, "rewards/verify_math_reward/mean": 0.5870535969734192, "rewards/verify_math_reward/std": 0.49263834953308105, "step": 1440 }, { "clip_ratio/high_max": 0.0014924777042324422, "clip_ratio/high_mean": 0.0004368835213881539, "clip_ratio/low_mean": 0.00033707786394643335, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007739613852209004, "completions/clipped_ratio": 0.008928571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 4084.0, "completions/mean_length": 617.4754638671875, "completions/mean_terminated_length": 586.1373901367188, "completions/min_length": 38.0, "completions/min_terminated_length": 38.0, "epoch": 13.466608340624088, "grad_norm": 0.12353515625, "learning_rate": 1e-06, "loss": -0.0008, "num_tokens": 839119572.0, "reward": 0.5323660969734192, "reward_std": 0.19858945906162262, "rewards/verify_math_reward/mean": 0.5323660969734192, "rewards/verify_math_reward/std": 0.4992299973964691, "step": 1441 }, { "clip_ratio/high_max": 0.001487494842876913, "clip_ratio/high_mean": 0.00041717613407854515, "clip_ratio/low_mean": 0.0002736194227281885, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0006907955425958789, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 2231.0, "completions/mean_length": 612.1451416015625, "completions/mean_terminated_length": 552.82861328125, "completions/min_length": 5.0, "completions/min_terminated_length": 5.0, "epoch": 13.47594050743657, "grad_norm": 0.111328125, "learning_rate": 1e-06, "loss": 0.001, "num_tokens": 839693350.0, "reward": 0.5837053656578064, "reward_std": 0.1565753072500229, "rewards/verify_math_reward/mean": 0.5837053656578064, "rewards/verify_math_reward/std": 0.49321892857551575, "step": 1442 }, { "clip_ratio/high_max": 0.0020271714784030337, "clip_ratio/high_mean": 0.0006483817498974531, "clip_ratio/low_mean": 0.0003719502656167606, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010203320016444195, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 2159.0, "completions/mean_length": 574.8359375, "completions/mean_terminated_length": 527.037353515625, "completions/min_length": 94.0, "completions/min_terminated_length": 94.0, "epoch": 13.485272674249051, "grad_norm": 0.1513671875, "learning_rate": 1e-06, "loss": 0.0039, "num_tokens": 840259067.0, "reward": 0.582589328289032, "reward_std": 0.23311659693717957, "rewards/verify_math_reward/mean": 0.5825892686843872, "rewards/verify_math_reward/std": 0.4934072494506836, "step": 1443 }, { "clip_ratio/high_max": 0.001726871063510771, "clip_ratio/high_mean": 0.0005453907610899478, "clip_ratio/low_mean": 0.00031528299075489485, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008606737665104447, "completions/clipped_ratio": 0.0200892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 2624.0, "completions/mean_length": 629.575927734375, "completions/mean_terminated_length": 558.51025390625, "completions/min_length": 120.0, "completions/min_terminated_length": 120.0, "epoch": 13.494604841061534, "grad_norm": 0.12451171875, "learning_rate": 1e-06, "loss": -0.0026, "num_tokens": 840837527.0, "reward": 0.5658482313156128, "reward_std": 0.23172719776630402, "rewards/verify_math_reward/mean": 0.5658482313156128, "rewards/verify_math_reward/std": 0.49592188000679016, "step": 1444 }, { "clip_ratio/high_max": 0.001498488769357209, "clip_ratio/high_mean": 0.0004240192091629069, "clip_ratio/low_mean": 0.0003190860571748999, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007431052504216495, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3298.0, "completions/mean_length": 603.7779541015625, "completions/mean_terminated_length": 540.282958984375, "completions/min_length": 129.0, "completions/min_terminated_length": 129.0, "epoch": 13.503937007874015, "grad_norm": 0.12890625, "learning_rate": 1e-06, "loss": -0.0081, "num_tokens": 841403920.0, "reward": 0.5558035969734192, "reward_std": 0.21229758858680725, "rewards/verify_math_reward/mean": 0.5558035969734192, "rewards/verify_math_reward/std": 0.49715372920036316, "step": 1445 }, { "clip_ratio/high_max": 0.0016854322220751783, "clip_ratio/high_mean": 0.00044098968282924034, "clip_ratio/low_mean": 0.0003208507280305639, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007618404097229359, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3324.0, "completions/mean_length": 632.3270263671875, "completions/mean_terminated_length": 569.3511352539062, "completions/min_length": 135.0, "completions/min_terminated_length": 135.0, "epoch": 13.513269174686497, "grad_norm": 0.12255859375, "learning_rate": 1e-06, "loss": 0.0082, "num_tokens": 841988813.0, "reward": 0.5245535969734192, "reward_std": 0.19512708485126495, "rewards/verify_math_reward/mean": 0.5245535969734192, "rewards/verify_math_reward/std": 0.4996756911277771, "step": 1446 }, { "clip_ratio/high_max": 0.0015337583636210184, "clip_ratio/high_mean": 0.0004813868329165416, "clip_ratio/low_mean": 0.0003618764460497914, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.000843263269871386, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3126.0, "completions/mean_length": 604.5870971679688, "completions/mean_terminated_length": 549.1677856445312, "completions/min_length": 145.0, "completions/min_terminated_length": 145.0, "epoch": 13.52260134149898, "grad_norm": 0.119140625, "learning_rate": 1e-06, "loss": 0.0173, "num_tokens": 842575123.0, "reward": 0.4966517984867096, "reward_std": 0.19967137277126312, "rewards/verify_math_reward/mean": 0.4966517984867096, "rewards/verify_math_reward/std": 0.5002680420875549, "step": 1447 }, { "clip_ratio/high_max": 0.001459809553125524, "clip_ratio/high_mean": 0.0004290812088356688, "clip_ratio/low_mean": 0.00036512190786197607, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007942031129459792, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3931.0, "completions/mean_length": 609.6484375, "completions/mean_terminated_length": 558.3204956054688, "completions/min_length": 18.0, "completions/min_terminated_length": 18.0, "epoch": 13.531933508311461, "grad_norm": 0.123046875, "learning_rate": 1e-06, "loss": 0.0128, "num_tokens": 843149120.0, "reward": 0.5714285969734192, "reward_std": 0.20041997730731964, "rewards/verify_math_reward/mean": 0.5714285969734192, "rewards/verify_math_reward/std": 0.49514803290367126, "step": 1448 }, { "clip_ratio/high_max": 0.00186413984556566, "clip_ratio/high_mean": 0.0006101725743974384, "clip_ratio/low_mean": 0.0004602289575359464, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010704015476221684, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3993.0, "completions/mean_length": 579.1674194335938, "completions/mean_terminated_length": 527.3906860351562, "completions/min_length": 121.0, "completions/min_terminated_length": 121.0, "epoch": 13.541265675123944, "grad_norm": 0.142578125, "learning_rate": 1e-06, "loss": 0.0251, "num_tokens": 843704486.0, "reward": 0.5770089626312256, "reward_std": 0.2458515763282776, "rewards/verify_math_reward/mean": 0.5770089030265808, "rewards/verify_math_reward/std": 0.4943099617958069, "step": 1449 }, { "clip_ratio/high_max": 0.0016106554185171262, "clip_ratio/high_mean": 0.00058202399941365, "clip_ratio/low_mean": 0.0003739048650004406, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009559288673699484, "completions/clipped_ratio": 0.021205357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3328.0, "completions/mean_length": 622.8303833007812, "completions/mean_terminated_length": 547.5849609375, "completions/min_length": 115.0, "completions/min_terminated_length": 115.0, "epoch": 13.550597841936424, "grad_norm": 0.140625, "learning_rate": 1e-06, "loss": 0.0053, "num_tokens": 844285206.0, "reward": 0.5725446939468384, "reward_std": 0.2361224889755249, "rewards/verify_math_reward/mean": 0.5725446343421936, "rewards/verify_math_reward/std": 0.49498558044433594, "step": 1450 }, { "clip_ratio/high_max": 0.001712963504360232, "clip_ratio/high_mean": 0.0005939340901477408, "clip_ratio/low_mean": 0.0003948019037807171, "clip_ratio/low_min": 9.701955605123658e-06, "clip_ratio/region_mean": 0.0009887360083666863, "completions/clipped_ratio": 0.021205357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3746.0, "completions/mean_length": 633.4609375, "completions/mean_terminated_length": 558.44580078125, "completions/min_length": 125.0, "completions/min_terminated_length": 125.0, "epoch": 13.559930008748907, "grad_norm": 0.14453125, "learning_rate": 1e-06, "loss": 0.004, "num_tokens": 844855227.0, "reward": 0.59375, "reward_std": 0.250397264957428, "rewards/verify_math_reward/mean": 0.59375, "rewards/verify_math_reward/std": 0.4914066195487976, "step": 1451 }, { "clip_ratio/high_max": 0.0017048090921889525, "clip_ratio/high_mean": 0.0005401893226917309, "clip_ratio/low_mean": 0.0003728963090452453, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009130856333285919, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 3744.0, "completions/mean_length": 581.3314819335938, "completions/mean_terminated_length": 541.6625366210938, "completions/min_length": 134.0, "completions/min_terminated_length": 134.0, "epoch": 13.569262175561388, "grad_norm": 0.130859375, "learning_rate": 1e-06, "loss": 0.0075, "num_tokens": 845418372.0, "reward": 0.5558035969734192, "reward_std": 0.24142493307590485, "rewards/verify_math_reward/mean": 0.5558035969734192, "rewards/verify_math_reward/std": 0.49715372920036316, "step": 1452 }, { "clip_ratio/high_max": 0.0016730862444092054, "clip_ratio/high_mean": 0.0005307649726091768, "clip_ratio/low_mean": 0.0003588253082398296, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008895902801668853, "completions/clipped_ratio": 0.0078125, "completions/max_length": 4096.0, "completions/max_terminated_length": 3208.0, "completions/mean_length": 579.8125, "completions/mean_terminated_length": 552.1259765625, "completions/min_length": 105.0, "completions/min_terminated_length": 105.0, "epoch": 13.57859434237387, "grad_norm": 0.1396484375, "learning_rate": 1e-06, "loss": 0.0114, "num_tokens": 845995460.0, "reward": 0.5892857313156128, "reward_std": 0.24296332895755768, "rewards/verify_math_reward/mean": 0.5892857313156128, "rewards/verify_math_reward/std": 0.49223825335502625, "step": 1453 }, { "clip_ratio/high_max": 0.0017078154978662496, "clip_ratio/high_mean": 0.0005719310120184673, "clip_ratio/low_mean": 0.00034637552198546473, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009183065421893843, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 1959.0, "completions/mean_length": 570.5569458007812, "completions/mean_terminated_length": 526.7378540039062, "completions/min_length": 108.0, "completions/min_terminated_length": 108.0, "epoch": 13.587926509186351, "grad_norm": 0.1337890625, "learning_rate": 1e-06, "loss": -0.0037, "num_tokens": 846555135.0, "reward": 0.5658482313156128, "reward_std": 0.20906810462474823, "rewards/verify_math_reward/mean": 0.5658482313156128, "rewards/verify_math_reward/std": 0.49592188000679016, "step": 1454 }, { "clip_ratio/high_max": 0.0016499298199050827, "clip_ratio/high_mean": 0.0005033507677580928, "clip_ratio/low_mean": 0.00030773112473525543, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008110818980640033, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3552.0, "completions/mean_length": 636.849365234375, "completions/mean_terminated_length": 577.9534912109375, "completions/min_length": 127.0, "completions/min_terminated_length": 127.0, "epoch": 13.597258675998834, "grad_norm": 0.130859375, "learning_rate": 1e-06, "loss": -0.0079, "num_tokens": 847160584.0, "reward": 0.5323660969734192, "reward_std": 0.22910040616989136, "rewards/verify_math_reward/mean": 0.5323660969734192, "rewards/verify_math_reward/std": 0.4992299973964691, "step": 1455 }, { "clip_ratio/high_max": 0.001650757820243598, "clip_ratio/high_mean": 0.000492941169113692, "clip_ratio/low_mean": 0.00042119161321352294, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009141327755060047, "completions/clipped_ratio": 0.0234375, "completions/max_length": 4096.0, "completions/max_terminated_length": 3765.0, "completions/mean_length": 676.140625, "completions/mean_terminated_length": 594.06396484375, "completions/min_length": 4.0, "completions/min_terminated_length": 4.0, "epoch": 13.606590842811315, "grad_norm": 0.130859375, "learning_rate": 1e-06, "loss": 0.0045, "num_tokens": 847776766.0, "reward": 0.4977678656578064, "reward_std": 0.2218119502067566, "rewards/verify_math_reward/mean": 0.4977678656578064, "rewards/verify_math_reward/std": 0.5002743005752563, "step": 1456 }, { "clip_ratio/high_max": 0.0014967691386118531, "clip_ratio/high_mean": 0.00041895810500136577, "clip_ratio/low_mean": 0.0003660171489627828, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.000784975256465259, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 2809.0, "completions/mean_length": 641.1127319335938, "completions/mean_terminated_length": 594.2138061523438, "completions/min_length": 131.0, "completions/min_terminated_length": 131.0, "epoch": 13.615923009623797, "grad_norm": 0.1201171875, "learning_rate": 1e-06, "loss": -0.0007, "num_tokens": 848388547.0, "reward": 0.5379464626312256, "reward_std": 0.20102757215499878, "rewards/verify_math_reward/mean": 0.5379464030265808, "rewards/verify_math_reward/std": 0.4988364577293396, "step": 1457 }, { "clip_ratio/high_max": 0.0019857244224112947, "clip_ratio/high_mean": 0.0006267321759878541, "clip_ratio/low_mean": 0.0003349709638769127, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009617031373636564, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 2069.0, "completions/mean_length": 547.4420166015625, "completions/mean_terminated_length": 503.3356018066406, "completions/min_length": 104.0, "completions/min_terminated_length": 104.0, "epoch": 13.625255176436278, "grad_norm": 0.14453125, "learning_rate": 1e-06, "loss": 0.0035, "num_tokens": 848923095.0, "reward": 0.6205357313156128, "reward_std": 0.20749297738075256, "rewards/verify_math_reward/mean": 0.6205357313156128, "rewards/verify_math_reward/std": 0.4855247139930725, "step": 1458 }, { "clip_ratio/high_max": 0.0014709434744872851, "clip_ratio/high_mean": 0.0004454475731563434, "clip_ratio/low_mean": 0.00033659933069429826, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.000782046911353973, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 2127.0, "completions/mean_length": 630.044677734375, "completions/mean_terminated_length": 579.0169677734375, "completions/min_length": 149.0, "completions/min_terminated_length": 149.0, "epoch": 13.63458734324876, "grad_norm": 0.1220703125, "learning_rate": 1e-06, "loss": -0.0051, "num_tokens": 849530199.0, "reward": 0.5066964626312256, "reward_std": 0.22109587490558624, "rewards/verify_math_reward/mean": 0.5066964030265808, "rewards/verify_math_reward/std": 0.5002344250679016, "step": 1459 }, { "clip_ratio/high_max": 0.0015920209752948722, "clip_ratio/high_mean": 0.0005085988584596635, "clip_ratio/low_mean": 0.00031790173829904234, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008265006072178949, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 2603.0, "completions/mean_length": 620.5279541015625, "completions/mean_terminated_length": 581.3013916015625, "completions/min_length": 74.0, "completions/min_terminated_length": 74.0, "epoch": 13.643919510061242, "grad_norm": 0.12890625, "learning_rate": 1e-06, "loss": -0.0, "num_tokens": 850140056.0, "reward": 0.5569196939468384, "reward_std": 0.227440744638443, "rewards/verify_math_reward/mean": 0.5569196343421936, "rewards/verify_math_reward/std": 0.4970270097255707, "step": 1460 }, { "clip_ratio/high_max": 0.0019153909706801642, "clip_ratio/high_mean": 0.0006843581222710782, "clip_ratio/low_mean": 0.00037447444765348337, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010588325430944678, "completions/clipped_ratio": 0.0189732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3885.0, "completions/mean_length": 654.609375, "completions/mean_terminated_length": 588.0523071289062, "completions/min_length": 114.0, "completions/min_terminated_length": 114.0, "epoch": 13.653251676873724, "grad_norm": 0.1416015625, "learning_rate": 1e-06, "loss": 0.0048, "num_tokens": 850737466.0, "reward": 0.59375, "reward_std": 0.26144862174987793, "rewards/verify_math_reward/mean": 0.59375, "rewards/verify_math_reward/std": 0.4914066195487976, "step": 1461 }, { "clip_ratio/high_max": 0.0016420078009105055, "clip_ratio/high_mean": 0.000542934335044265, "clip_ratio/low_mean": 0.000367502664175845, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009104369910346577, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3300.0, "completions/mean_length": 595.6517944335938, "completions/mean_terminated_length": 540.0906982421875, "completions/min_length": 129.0, "completions/min_terminated_length": 129.0, "epoch": 13.662583843686207, "grad_norm": 0.1357421875, "learning_rate": 1e-06, "loss": 0.0143, "num_tokens": 851305154.0, "reward": 0.5636160969734192, "reward_std": 0.22646361589431763, "rewards/verify_math_reward/mean": 0.5636160969734192, "rewards/verify_math_reward/std": 0.49621346592903137, "step": 1462 }, { "clip_ratio/high_max": 0.001556218838231871, "clip_ratio/high_mean": 0.00041317227328363515, "clip_ratio/low_mean": 0.00039252614828910737, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008056984324866789, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3323.0, "completions/mean_length": 643.9765625, "completions/mean_terminated_length": 581.2124633789062, "completions/min_length": 20.0, "completions/min_terminated_length": 20.0, "epoch": 13.671916010498688, "grad_norm": 0.119140625, "learning_rate": 1e-06, "loss": 0.0037, "num_tokens": 851904061.0, "reward": 0.5345982313156128, "reward_std": 0.20771758258342743, "rewards/verify_math_reward/mean": 0.5345982313156128, "rewards/verify_math_reward/std": 0.4990801215171814, "step": 1463 }, { "clip_ratio/high_max": 0.0016773997294876608, "clip_ratio/high_mean": 0.0005545242436255648, "clip_ratio/low_mean": 0.0003654265331078932, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009199507826451736, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 2845.0, "completions/mean_length": 625.880615234375, "completions/mean_terminated_length": 566.7979736328125, "completions/min_length": 127.0, "completions/min_terminated_length": 127.0, "epoch": 13.68124817731117, "grad_norm": 0.1279296875, "learning_rate": 1e-06, "loss": -0.003, "num_tokens": 852488098.0, "reward": 0.5145089626312256, "reward_std": 0.2098594754934311, "rewards/verify_math_reward/mean": 0.5145089030265808, "rewards/verify_math_reward/std": 0.5000685453414917, "step": 1464 }, { "clip_ratio/high_max": 0.0015371305908047361, "clip_ratio/high_mean": 0.00045309656434255885, "clip_ratio/low_mean": 0.0004256896056631376, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008787861770542804, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 2646.0, "completions/mean_length": 590.6763916015625, "completions/mean_terminated_length": 535.0363159179688, "completions/min_length": 60.0, "completions/min_terminated_length": 60.0, "epoch": 13.690580344123651, "grad_norm": 0.13671875, "learning_rate": 1e-06, "loss": -0.0019, "num_tokens": 853056440.0, "reward": 0.5479910969734192, "reward_std": 0.24130292236804962, "rewards/verify_math_reward/mean": 0.5479910969734192, "rewards/verify_math_reward/std": 0.49796950817108154, "step": 1465 }, { "clip_ratio/high_max": 0.00148297511623241, "clip_ratio/high_mean": 0.00047140210608631605, "clip_ratio/low_mean": 0.0002479121314991062, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007193142419055221, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 3967.0, "completions/mean_length": 594.1495971679688, "completions/mean_terminated_length": 554.6253051757812, "completions/min_length": 128.0, "completions/min_terminated_length": 128.0, "epoch": 13.699912510936134, "grad_norm": 0.1337890625, "learning_rate": 1e-06, "loss": 0.0126, "num_tokens": 853626206.0, "reward": 0.5803571939468384, "reward_std": 0.21301506459712982, "rewards/verify_math_reward/mean": 0.5803571343421936, "rewards/verify_math_reward/std": 0.4937761425971985, "step": 1466 }, { "clip_ratio/high_max": 0.0017450528794142883, "clip_ratio/high_mean": 0.0005463054749270668, "clip_ratio/low_mean": 0.00040349560276808916, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009498010849711136, "completions/clipped_ratio": 0.0234375, "completions/max_length": 4096.0, "completions/max_terminated_length": 3506.0, "completions/mean_length": 656.2210083007812, "completions/mean_terminated_length": 573.666259765625, "completions/min_length": 29.0, "completions/min_terminated_length": 29.0, "epoch": 13.709244677748615, "grad_norm": 0.138671875, "learning_rate": 1e-06, "loss": 0.0021, "num_tokens": 854212148.0, "reward": 0.5647321939468384, "reward_std": 0.22443737089633942, "rewards/verify_math_reward/mean": 0.5647321343421936, "rewards/verify_math_reward/std": 0.49606895446777344, "step": 1467 }, { "clip_ratio/high_max": 0.0016002151533029974, "clip_ratio/high_mean": 0.0004723479805761599, "clip_ratio/low_mean": 0.0003160101001640214, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007883580801717471, "completions/clipped_ratio": 0.0189732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3317.0, "completions/mean_length": 605.2433471679688, "completions/mean_terminated_length": 537.7315063476562, "completions/min_length": 132.0, "completions/min_terminated_length": 132.0, "epoch": 13.718576844561097, "grad_norm": 0.1298828125, "learning_rate": 1e-06, "loss": -0.0075, "num_tokens": 854786334.0, "reward": 0.5078125, "reward_std": 0.19899989664554596, "rewards/verify_math_reward/mean": 0.5078125, "rewards/verify_math_reward/std": 0.5002182126045227, "step": 1468 }, { "clip_ratio/high_max": 0.0017225371939275647, "clip_ratio/high_mean": 0.0005235312387412705, "clip_ratio/low_mean": 0.00027352411507308716, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.000797055363364052, "completions/clipped_ratio": 0.010044642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3139.0, "completions/mean_length": 653.8739013671875, "completions/mean_terminated_length": 618.9481201171875, "completions/min_length": 150.0, "completions/min_terminated_length": 150.0, "epoch": 13.727909011373578, "grad_norm": 0.1201171875, "learning_rate": 1e-06, "loss": 0.002, "num_tokens": 855425949.0, "reward": 0.5323660969734192, "reward_std": 0.21879860758781433, "rewards/verify_math_reward/mean": 0.5323660969734192, "rewards/verify_math_reward/std": 0.4992299973964691, "step": 1469 }, { "clip_ratio/high_max": 0.0018507699023757596, "clip_ratio/high_mean": 0.0005679615915141767, "clip_ratio/low_mean": 0.00032465870572195854, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008926203026931034, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3025.0, "completions/mean_length": 629.3917846679688, "completions/mean_terminated_length": 570.368896484375, "completions/min_length": 102.0, "completions/min_terminated_length": 102.0, "epoch": 13.73724117818606, "grad_norm": 0.123046875, "learning_rate": 1e-06, "loss": 0.0013, "num_tokens": 856008156.0, "reward": 0.5892857313156128, "reward_std": 0.20347636938095093, "rewards/verify_math_reward/mean": 0.5892857313156128, "rewards/verify_math_reward/std": 0.49223825335502625, "step": 1470 }, { "clip_ratio/high_max": 0.0014815277327215881, "clip_ratio/high_mean": 0.0004707739190052962, "clip_ratio/low_mean": 0.0004055853219142591, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008763592486502603, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3152.0, "completions/mean_length": 618.7120971679688, "completions/mean_terminated_length": 555.4886474609375, "completions/min_length": 122.0, "completions/min_terminated_length": 122.0, "epoch": 13.746573344998541, "grad_norm": 0.130859375, "learning_rate": 1e-06, "loss": 0.0138, "num_tokens": 856585322.0, "reward": 0.559151828289032, "reward_std": 0.22946879267692566, "rewards/verify_math_reward/mean": 0.5591517686843872, "rewards/verify_math_reward/std": 0.496766060590744, "step": 1471 }, { "clip_ratio/high_max": 0.0020694473441835726, "clip_ratio/high_mean": 0.0006478015011452953, "clip_ratio/low_mean": 0.0003714814356499119, "clip_ratio/low_min": 1.5798786989762448e-05, "clip_ratio/region_mean": 0.0010192829267907655, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 3581.0, "completions/mean_length": 597.46875, "completions/mean_terminated_length": 557.98193359375, "completions/min_length": 151.0, "completions/min_terminated_length": 151.0, "epoch": 13.755905511811024, "grad_norm": 0.1328125, "learning_rate": 1e-06, "loss": -0.0051, "num_tokens": 857178438.0, "reward": 0.5602678656578064, "reward_std": 0.22695399820804596, "rewards/verify_math_reward/mean": 0.5602678656578064, "rewards/verify_math_reward/std": 0.4966317415237427, "step": 1472 }, { "clip_ratio/high_max": 0.0012901640693598893, "clip_ratio/high_mean": 0.000432423684742389, "clip_ratio/low_mean": 0.0003156316291779149, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.000748055313124496, "completions/clipped_ratio": 0.010044642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3801.0, "completions/mean_length": 622.8292846679688, "completions/mean_terminated_length": 587.5885009765625, "completions/min_length": 6.0, "completions/min_terminated_length": 6.0, "epoch": 13.765237678623505, "grad_norm": 0.1201171875, "learning_rate": 1e-06, "loss": 0.0134, "num_tokens": 857783621.0, "reward": 0.546875, "reward_std": 0.20542281866073608, "rewards/verify_math_reward/mean": 0.546875, "rewards/verify_math_reward/std": 0.4980759024620056, "step": 1473 }, { "clip_ratio/high_max": 0.001881788402897655, "clip_ratio/high_mean": 0.0005327477763330535, "clip_ratio/low_mean": 0.0002711350437039073, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008038828218559502, "completions/clipped_ratio": 0.0189732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3704.0, "completions/mean_length": 636.6517944335938, "completions/mean_terminated_length": 569.7474365234375, "completions/min_length": 16.0, "completions/min_terminated_length": 16.0, "epoch": 13.774569845435988, "grad_norm": 0.1259765625, "learning_rate": 1e-06, "loss": 0.0043, "num_tokens": 858382333.0, "reward": 0.5189732313156128, "reward_std": 0.21537764370441437, "rewards/verify_math_reward/mean": 0.5189732313156128, "rewards/verify_math_reward/std": 0.49991893768310547, "step": 1474 }, { "clip_ratio/high_max": 0.0013809626379952533, "clip_ratio/high_mean": 0.0003932869999516697, "clip_ratio/low_mean": 0.0003436417129023539, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.00073692872592801, "completions/clipped_ratio": 0.0200892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 4022.0, "completions/mean_length": 694.4777221679688, "completions/mean_terminated_length": 624.7426147460938, "completions/min_length": 167.0, "completions/min_terminated_length": 167.0, "epoch": 13.783902012248468, "grad_norm": 0.11328125, "learning_rate": 1e-06, "loss": 0.0118, "num_tokens": 859024601.0, "reward": 0.5189732313156128, "reward_std": 0.1976444274187088, "rewards/verify_math_reward/mean": 0.5189732313156128, "rewards/verify_math_reward/std": 0.49991893768310547, "step": 1475 }, { "clip_ratio/high_max": 0.0016969669432000956, "clip_ratio/high_mean": 0.00048578004702903854, "clip_ratio/low_mean": 0.0002999873190674407, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007857673626858741, "completions/clipped_ratio": 0.005580357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 4030.0, "completions/mean_length": 542.716552734375, "completions/mean_terminated_length": 522.7766723632812, "completions/min_length": 105.0, "completions/min_terminated_length": 105.0, "epoch": 13.793234179060951, "grad_norm": 0.1259765625, "learning_rate": 1e-06, "loss": 0.0172, "num_tokens": 859586923.0, "reward": 0.6015625, "reward_std": 0.19178099930286407, "rewards/verify_math_reward/mean": 0.6015625, "rewards/verify_math_reward/std": 0.48984986543655396, "step": 1476 }, { "clip_ratio/high_max": 0.0015827381357667036, "clip_ratio/high_mean": 0.0005133775662216067, "clip_ratio/low_mean": 0.00032172162298138574, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008350991911356687, "completions/clipped_ratio": 0.021205357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3883.0, "completions/mean_length": 682.9141235351562, "completions/mean_terminated_length": 608.9703369140625, "completions/min_length": 128.0, "completions/min_terminated_length": 128.0, "epoch": 13.802566345873432, "grad_norm": 0.1220703125, "learning_rate": 1e-06, "loss": 0.0094, "num_tokens": 860216302.0, "reward": 0.5066964626312256, "reward_std": 0.2126799076795578, "rewards/verify_math_reward/mean": 0.5066964030265808, "rewards/verify_math_reward/std": 0.5002344250679016, "step": 1477 }, { "clip_ratio/high_max": 0.0015752986255392898, "clip_ratio/high_mean": 0.0005237242266957765, "clip_ratio/low_mean": 0.00028728788254284154, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008110121170830098, "completions/clipped_ratio": 0.006696428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 2457.0, "completions/mean_length": 579.997802734375, "completions/mean_terminated_length": 556.2943725585938, "completions/min_length": 145.0, "completions/min_terminated_length": 145.0, "epoch": 13.811898512685914, "grad_norm": 0.1220703125, "learning_rate": 1e-06, "loss": 0.0097, "num_tokens": 860789844.0, "reward": 0.6316964626312256, "reward_std": 0.19219858944416046, "rewards/verify_math_reward/mean": 0.6316964030265808, "rewards/verify_math_reward/std": 0.4826137125492096, "step": 1478 }, { "clip_ratio/high_max": 0.0014719496630277717, "clip_ratio/high_mean": 0.00041756537757464685, "clip_ratio/low_mean": 0.0003222300886136509, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.000739795465960924, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3409.0, "completions/mean_length": 629.3136596679688, "completions/mean_terminated_length": 574.286865234375, "completions/min_length": 161.0, "completions/min_terminated_length": 161.0, "epoch": 13.821230679498395, "grad_norm": 0.1201171875, "learning_rate": 1e-06, "loss": -0.0004, "num_tokens": 861383901.0, "reward": 0.5178571939468384, "reward_std": 0.21068249642848969, "rewards/verify_math_reward/mean": 0.5178571343421936, "rewards/verify_math_reward/std": 0.4999600946903229, "step": 1479 }, { "clip_ratio/high_max": 0.0021846755626029335, "clip_ratio/high_mean": 0.0005392227506035852, "clip_ratio/low_mean": 0.00035902116201214085, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008982439121609787, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 3969.0, "completions/mean_length": 601.7288208007812, "completions/mean_terminated_length": 558.2971801757812, "completions/min_length": 137.0, "completions/min_terminated_length": 137.0, "epoch": 13.830562846310878, "grad_norm": 0.1259765625, "learning_rate": 1e-06, "loss": 0.0041, "num_tokens": 861962402.0, "reward": 0.6205357313156128, "reward_std": 0.17645888030529022, "rewards/verify_math_reward/mean": 0.6205357313156128, "rewards/verify_math_reward/std": 0.4855247139930725, "step": 1480 }, { "clip_ratio/high_max": 0.0016209942241403041, "clip_ratio/high_mean": 0.0004635233783574222, "clip_ratio/low_mean": 0.00024232734313045512, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007058507262627245, "completions/clipped_ratio": 0.0189732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3913.0, "completions/mean_length": 641.6986694335938, "completions/mean_terminated_length": 574.8919067382812, "completions/min_length": 129.0, "completions/min_terminated_length": 129.0, "epoch": 13.83989501312336, "grad_norm": 0.11083984375, "learning_rate": 1e-06, "loss": 0.0028, "num_tokens": 862550524.0, "reward": 0.5948660969734192, "reward_std": 0.17025348544120789, "rewards/verify_math_reward/mean": 0.5948660969734192, "rewards/verify_math_reward/std": 0.49119213223457336, "step": 1481 }, { "clip_ratio/high_max": 0.001509284583335102, "clip_ratio/high_mean": 0.0004057079880794845, "clip_ratio/low_mean": 0.0003302452535081102, "clip_ratio/low_min": 1.0589630619506352e-05, "clip_ratio/region_mean": 0.0007359532310147188, "completions/clipped_ratio": 0.025669642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 4012.0, "completions/mean_length": 730.3672485351562, "completions/mean_terminated_length": 641.6964721679688, "completions/min_length": 135.0, "completions/min_terminated_length": 135.0, "epoch": 13.849227179935841, "grad_norm": 0.11865234375, "learning_rate": 1e-06, "loss": -0.0041, "num_tokens": 863210509.0, "reward": 0.4676339626312256, "reward_std": 0.19381438195705414, "rewards/verify_math_reward/mean": 0.4676339328289032, "rewards/verify_math_reward/std": 0.4992299973964691, "step": 1482 }, { "clip_ratio/high_max": 0.0016020727962313686, "clip_ratio/high_mean": 0.00046571843358833576, "clip_ratio/low_mean": 0.00030765953374611854, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007733779739282909, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 3446.0, "completions/mean_length": 616.265625, "completions/mean_terminated_length": 576.990966796875, "completions/min_length": 115.0, "completions/min_terminated_length": 115.0, "epoch": 13.858559346748324, "grad_norm": 0.12255859375, "learning_rate": 1e-06, "loss": 0.0212, "num_tokens": 863814819.0, "reward": 0.5267857313156128, "reward_std": 0.192914679646492, "rewards/verify_math_reward/mean": 0.5267857313156128, "rewards/verify_math_reward/std": 0.4995608627796173, "step": 1483 }, { "clip_ratio/high_max": 0.0017778941637516255, "clip_ratio/high_mean": 0.0005326547875483811, "clip_ratio/low_mean": 0.0003499721511843745, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008826269177006907, "completions/clipped_ratio": 0.0078125, "completions/max_length": 4096.0, "completions/max_terminated_length": 2545.0, "completions/mean_length": 585.7154541015625, "completions/mean_terminated_length": 558.0753784179688, "completions/min_length": 122.0, "completions/min_terminated_length": 122.0, "epoch": 13.867891513560805, "grad_norm": 0.1318359375, "learning_rate": 1e-06, "loss": 0.0082, "num_tokens": 864400516.0, "reward": 0.5658482313156128, "reward_std": 0.2339816838502884, "rewards/verify_math_reward/mean": 0.5658482313156128, "rewards/verify_math_reward/std": 0.49592188000679016, "step": 1484 }, { "clip_ratio/high_max": 0.0016853345314302715, "clip_ratio/high_mean": 0.0004785012778256714, "clip_ratio/low_mean": 0.0004052058570778172, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008837071363814175, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3814.0, "completions/mean_length": 595.6517944335938, "completions/mean_terminated_length": 544.1177368164062, "completions/min_length": 3.0, "completions/min_terminated_length": 3.0, "epoch": 13.877223680373287, "grad_norm": 0.134765625, "learning_rate": 1e-06, "loss": 0.0049, "num_tokens": 864962372.0, "reward": 0.5680803656578064, "reward_std": 0.23131422698497772, "rewards/verify_math_reward/mean": 0.5680803656578064, "rewards/verify_math_reward/std": 0.4956200420856476, "step": 1485 }, { "clip_ratio/high_max": 0.0013008546666242182, "clip_ratio/high_mean": 0.00036349351762510196, "clip_ratio/low_mean": 0.0003107999082203605, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0006742934281191992, "completions/clipped_ratio": 0.03125, "completions/max_length": 4096.0, "completions/max_terminated_length": 3840.0, "completions/mean_length": 673.2779541015625, "completions/mean_terminated_length": 562.8674926757812, "completions/min_length": 150.0, "completions/min_terminated_length": 150.0, "epoch": 13.886555847185768, "grad_norm": 0.1064453125, "learning_rate": 1e-06, "loss": -0.0099, "num_tokens": 865537853.0, "reward": 0.5457589626312256, "reward_std": 0.16743122041225433, "rewards/verify_math_reward/mean": 0.5457589030265808, "rewards/verify_math_reward/std": 0.4981797933578491, "step": 1486 }, { "clip_ratio/high_max": 0.0015954834507283522, "clip_ratio/high_mean": 0.000467176688289328, "clip_ratio/low_mean": 0.00035002035974684986, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008171970466719358, "completions/clipped_ratio": 0.0189732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3150.0, "completions/mean_length": 612.8671875, "completions/mean_terminated_length": 545.5028076171875, "completions/min_length": 77.0, "completions/min_terminated_length": 77.0, "epoch": 13.89588801399825, "grad_norm": 0.119140625, "learning_rate": 1e-06, "loss": 0.0214, "num_tokens": 866106718.0, "reward": 0.5758928656578064, "reward_std": 0.19745828211307526, "rewards/verify_math_reward/mean": 0.5758928656578064, "rewards/verify_math_reward/std": 0.49448272585868835, "step": 1487 }, { "clip_ratio/high_max": 0.0015423828590428457, "clip_ratio/high_mean": 0.0004908759989348255, "clip_ratio/low_mean": 0.00025817459322752256, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007490505854548246, "completions/clipped_ratio": 0.008928571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3394.0, "completions/mean_length": 565.6908569335938, "completions/mean_terminated_length": 533.8862915039062, "completions/min_length": 134.0, "completions/min_terminated_length": 134.0, "epoch": 13.905220180810732, "grad_norm": 0.12890625, "learning_rate": 1e-06, "loss": 0.0149, "num_tokens": 866658545.0, "reward": 0.6194196939468384, "reward_std": 0.1958438754081726, "rewards/verify_math_reward/mean": 0.6194196343421936, "rewards/verify_math_reward/std": 0.48580074310302734, "step": 1488 }, { "clip_ratio/high_max": 0.0019100382014585193, "clip_ratio/high_mean": 0.0006178386640840472, "clip_ratio/low_mean": 0.00033591721921766293, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009537558908050414, "completions/clipped_ratio": 0.006696428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 2454.0, "completions/mean_length": 571.3995971679688, "completions/mean_terminated_length": 547.63818359375, "completions/min_length": 135.0, "completions/min_terminated_length": 135.0, "epoch": 13.914552347623214, "grad_norm": 0.142578125, "learning_rate": 1e-06, "loss": -0.0011, "num_tokens": 867240855.0, "reward": 0.5658482313156128, "reward_std": 0.24299722909927368, "rewards/verify_math_reward/mean": 0.5658482313156128, "rewards/verify_math_reward/std": 0.49592188000679016, "step": 1489 }, { "clip_ratio/high_max": 0.0015912617272988427, "clip_ratio/high_mean": 0.0005427971918834373, "clip_ratio/low_mean": 0.0003104864886154246, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.00085328366367321, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3327.0, "completions/mean_length": 603.4788208007812, "completions/mean_terminated_length": 556.0690307617188, "completions/min_length": 14.0, "completions/min_terminated_length": 14.0, "epoch": 13.923884514435695, "grad_norm": 0.1337890625, "learning_rate": 1e-06, "loss": 0.0112, "num_tokens": 867820820.0, "reward": 0.578125, "reward_std": 0.225075364112854, "rewards/verify_math_reward/mean": 0.578125, "rewards/verify_math_reward/std": 0.4941346049308777, "step": 1490 }, { "clip_ratio/high_max": 0.0014498585023829946, "clip_ratio/high_mean": 0.0005081321653506166, "clip_ratio/low_mean": 0.0004397898109118614, "clip_ratio/low_min": 9.987216799345333e-06, "clip_ratio/region_mean": 0.0009479219620516233, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 2611.0, "completions/mean_length": 617.09375, "completions/mean_terminated_length": 577.8284301757812, "completions/min_length": 122.0, "completions/min_terminated_length": 122.0, "epoch": 13.933216681248178, "grad_norm": 0.13671875, "learning_rate": 1e-06, "loss": 0.0169, "num_tokens": 868420024.0, "reward": 0.5290178656578064, "reward_std": 0.23811551928520203, "rewards/verify_math_reward/mean": 0.5290178656578064, "rewards/verify_math_reward/std": 0.49943602085113525, "step": 1491 }, { "clip_ratio/high_max": 0.0015793536795172258, "clip_ratio/high_mean": 0.0004564840305647522, "clip_ratio/low_mean": 0.0003703847811493688, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008268688220596232, "completions/clipped_ratio": 0.024553571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 2015.0, "completions/mean_length": 645.7232666015625, "completions/mean_terminated_length": 558.8741455078125, "completions/min_length": 147.0, "completions/min_terminated_length": 147.0, "epoch": 13.942548848060659, "grad_norm": 0.12890625, "learning_rate": 1e-06, "loss": -0.0045, "num_tokens": 868997648.0, "reward": 0.5189732313156128, "reward_std": 0.2004987597465515, "rewards/verify_math_reward/mean": 0.5189732313156128, "rewards/verify_math_reward/std": 0.49991893768310547, "step": 1492 }, { "clip_ratio/high_max": 0.0015132897642615717, "clip_ratio/high_mean": 0.0004594343321286942, "clip_ratio/low_mean": 0.0003326939034877796, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007921282267489005, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 1969.0, "completions/mean_length": 571.8192138671875, "completions/mean_terminated_length": 523.9796752929688, "completions/min_length": 32.0, "completions/min_terminated_length": 32.0, "epoch": 13.951881014873141, "grad_norm": 0.1279296875, "learning_rate": 1e-06, "loss": -0.0025, "num_tokens": 869548150.0, "reward": 0.6071428656578064, "reward_std": 0.20824255049228668, "rewards/verify_math_reward/mean": 0.6071428656578064, "rewards/verify_math_reward/std": 0.48865827918052673, "step": 1493 }, { "clip_ratio/high_max": 0.0014417561378650134, "clip_ratio/high_mean": 0.00041261564365413506, "clip_ratio/low_mean": 0.0003567041931091808, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007693198467677576, "completions/clipped_ratio": 0.0189732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 2938.0, "completions/mean_length": 661.607177734375, "completions/mean_terminated_length": 595.1854248046875, "completions/min_length": 80.0, "completions/min_terminated_length": 80.0, "epoch": 13.961213181685622, "grad_norm": 0.11328125, "learning_rate": 1e-06, "loss": 0.0008, "num_tokens": 870167582.0, "reward": 0.5345982313156128, "reward_std": 0.20200368762016296, "rewards/verify_math_reward/mean": 0.5345982313156128, "rewards/verify_math_reward/std": 0.4990801215171814, "step": 1494 }, { "clip_ratio/high_max": 0.0016464744785480434, "clip_ratio/high_mean": 0.0004716879473107838, "clip_ratio/low_mean": 0.00029222255068361846, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007639104969712207, "completions/clipped_ratio": 0.029017857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 4033.0, "completions/mean_length": 694.708740234375, "completions/mean_terminated_length": 593.0609130859375, "completions/min_length": 18.0, "completions/min_terminated_length": 18.0, "epoch": 13.970545348498105, "grad_norm": 0.1142578125, "learning_rate": 1e-06, "loss": 0.0054, "num_tokens": 870764633.0, "reward": 0.4955357313156128, "reward_std": 0.18475833535194397, "rewards/verify_math_reward/mean": 0.4955357015132904, "rewards/verify_math_reward/std": 0.500259280204773, "step": 1495 }, { "clip_ratio/high_max": 0.0019481853469187627, "clip_ratio/high_mean": 0.000563488020816294, "clip_ratio/low_mean": 0.00029821233408711123, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008617003582003235, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 2294.0, "completions/mean_length": 565.5670166015625, "completions/mean_terminated_length": 517.642578125, "completions/min_length": 26.0, "completions/min_terminated_length": 26.0, "epoch": 13.979877515310585, "grad_norm": 0.126953125, "learning_rate": 1e-06, "loss": -0.0048, "num_tokens": 871307429.0, "reward": 0.5993303656578064, "reward_std": 0.1954641044139862, "rewards/verify_math_reward/mean": 0.5993303656578064, "rewards/verify_math_reward/std": 0.49030786752700806, "step": 1496 }, { "clip_ratio/high_max": 0.0011723246516339714, "clip_ratio/high_mean": 0.00036326770009509346, "clip_ratio/low_mean": 0.00035125915496792004, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007145268627937185, "completions/clipped_ratio": 0.024553571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3855.0, "completions/mean_length": 714.7020263671875, "completions/mean_terminated_length": 629.5892333984375, "completions/min_length": 95.0, "completions/min_terminated_length": 95.0, "epoch": 13.989209682123068, "grad_norm": 0.12060546875, "learning_rate": 1e-06, "loss": 0.0037, "num_tokens": 871950738.0, "reward": 0.4754464626312256, "reward_std": 0.2298799455165863, "rewards/verify_math_reward/mean": 0.4754464328289032, "rewards/verify_math_reward/std": 0.4996756315231323, "step": 1497 }, { "clip_ratio/high_max": 0.00160207767112297, "clip_ratio/high_mean": 0.0005525343447061459, "clip_ratio/low_mean": 0.0003215502217699395, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008740845696593169, "completions/clipped_ratio": 0.017045454545454586, "completions/max_length": 4096.0, "completions/max_terminated_length": 1851.0, "completions/mean_length": 629.821044921875, "completions/mean_terminated_length": 569.7138671875, "completions/min_length": 184.0, "completions/min_terminated_length": 184.0, "epoch": 13.998541848935549, "grad_norm": 0.12451171875, "learning_rate": 1e-06, "loss": -0.0063, "num_tokens": 872546118.0, "reward": 0.6037946939468384, "reward_std": 0.22375266253948212, "rewards/verify_math_reward/mean": 0.6037946343421936, "rewards/verify_math_reward/std": 0.48938122391700745, "step": 1498 }, { "clip_ratio/high_max": 0.0019307765560370171, "clip_ratio/high_mean": 0.0005768025667975962, "clip_ratio/low_mean": 0.00034231782956339885, "clip_ratio/low_min": 9.4126507974579e-06, "clip_ratio/region_mean": 0.0009191203935188241, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 3810.0, "completions/mean_length": 612.2846069335938, "completions/mean_terminated_length": 568.9841918945312, "completions/min_length": 135.0, "completions/min_terminated_length": 135.0, "epoch": 14.009332166812483, "grad_norm": 0.13671875, "learning_rate": 1e-06, "loss": 0.0176, "num_tokens": 873137581.0, "reward": 0.494419664144516, "reward_std": 0.21564865112304688, "rewards/verify_math_reward/mean": 0.4944196343421936, "rewards/verify_math_reward/std": 0.5002480745315552, "step": 1499 }, { "clip_ratio/high_max": 0.0016640719213683042, "clip_ratio/high_mean": 0.0005200427185627632, "clip_ratio/low_mean": 0.0003144358354347787, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008344785464942106, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3013.0, "completions/mean_length": 580.2310791015625, "completions/mean_terminated_length": 524.4251708984375, "completions/min_length": 112.0, "completions/min_terminated_length": 112.0, "epoch": 14.018664333624963, "grad_norm": 0.1416015625, "learning_rate": 1e-06, "loss": -0.0141, "num_tokens": 873695812.0, "reward": 0.6071428656578064, "reward_std": 0.22702956199645996, "rewards/verify_math_reward/mean": 0.6071428656578064, "rewards/verify_math_reward/std": 0.48865827918052673, "step": 1500 }, { "clip_ratio/high_max": 0.0015412053862746689, "clip_ratio/high_mean": 0.0004509609387923774, "clip_ratio/low_mean": 0.00028261597503842495, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007335769096243894, "completions/clipped_ratio": 0.008928571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 2156.0, "completions/mean_length": 617.919677734375, "completions/mean_terminated_length": 586.5855712890625, "completions/min_length": 152.0, "completions/min_terminated_length": 152.0, "epoch": 14.027996500437446, "grad_norm": 0.1162109375, "learning_rate": 1e-06, "loss": 0.0079, "num_tokens": 874315228.0, "reward": 0.5011160969734192, "reward_std": 0.19643978774547577, "rewards/verify_math_reward/mean": 0.5011160969734192, "rewards/verify_math_reward/std": 0.5002780556678772, "step": 1501 }, { "clip_ratio/high_max": 0.001706761853711214, "clip_ratio/high_mean": 0.0005059305342456355, "clip_ratio/low_mean": 0.00043664882434768515, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009425793559785234, "completions/clipped_ratio": 0.025669642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 2793.0, "completions/mean_length": 664.1585083007812, "completions/mean_terminated_length": 573.7434692382812, "completions/min_length": 116.0, "completions/min_terminated_length": 116.0, "epoch": 14.037328667249927, "grad_norm": 0.13671875, "learning_rate": 1e-06, "loss": -0.0016, "num_tokens": 874897234.0, "reward": 0.5502232313156128, "reward_std": 0.2329639196395874, "rewards/verify_math_reward/mean": 0.5502232313156128, "rewards/verify_math_reward/std": 0.49774909019470215, "step": 1502 }, { "clip_ratio/high_max": 0.0017753144729795167, "clip_ratio/high_mean": 0.0005143590854004287, "clip_ratio/low_mean": 0.00023039474388042436, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007447538259839348, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 2837.0, "completions/mean_length": 608.5089721679688, "completions/mean_terminated_length": 553.1519165039062, "completions/min_length": 124.0, "completions/min_terminated_length": 124.0, "epoch": 14.04666083406241, "grad_norm": 0.12109375, "learning_rate": 1e-06, "loss": 0.0035, "num_tokens": 875475194.0, "reward": 0.559151828289032, "reward_std": 0.20069055259227753, "rewards/verify_math_reward/mean": 0.5591517686843872, "rewards/verify_math_reward/std": 0.496766060590744, "step": 1503 }, { "clip_ratio/high_max": 0.001343896225989738, "clip_ratio/high_mean": 0.000393055019003441, "clip_ratio/low_mean": 0.00036993329968026956, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007629883161826001, "completions/clipped_ratio": 0.0189732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 2906.0, "completions/mean_length": 617.0301513671875, "completions/mean_terminated_length": 549.7462768554688, "completions/min_length": 124.0, "completions/min_terminated_length": 124.0, "epoch": 14.05599300087489, "grad_norm": 0.1240234375, "learning_rate": 1e-06, "loss": -0.0005, "num_tokens": 876043581.0, "reward": 0.5524553656578064, "reward_std": 0.19246003031730652, "rewards/verify_math_reward/mean": 0.5524553656578064, "rewards/verify_math_reward/std": 0.49751853942871094, "step": 1504 }, { "clip_ratio/high_max": 0.0017084121882362524, "clip_ratio/high_mean": 0.0005181743806588202, "clip_ratio/low_mean": 0.00025313037485830137, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007713047525612637, "completions/clipped_ratio": 0.024553571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 2929.0, "completions/mean_length": 654.3326416015625, "completions/mean_terminated_length": 567.7001953125, "completions/min_length": 167.0, "completions/min_terminated_length": 167.0, "epoch": 14.065325167687373, "grad_norm": 0.1259765625, "learning_rate": 1e-06, "loss": -0.0192, "num_tokens": 876627191.0, "reward": 0.5613839626312256, "reward_std": 0.19340112805366516, "rewards/verify_math_reward/mean": 0.5613839030265808, "rewards/verify_math_reward/std": 0.496494859457016, "step": 1505 }, { "clip_ratio/high_max": 0.0014563648728653789, "clip_ratio/high_mean": 0.0003780712620482518, "clip_ratio/low_mean": 0.0002658752102888684, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0006439464754066648, "completions/clipped_ratio": 0.0189732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 4059.0, "completions/mean_length": 615.8795166015625, "completions/mean_terminated_length": 548.5733642578125, "completions/min_length": 134.0, "completions/min_terminated_length": 134.0, "epoch": 14.074657334499854, "grad_norm": 0.1142578125, "learning_rate": 1e-06, "loss": -0.0019, "num_tokens": 877204155.0, "reward": 0.5837053656578064, "reward_std": 0.15139120817184448, "rewards/verify_math_reward/mean": 0.5837053656578064, "rewards/verify_math_reward/std": 0.49321892857551575, "step": 1506 }, { "clip_ratio/high_max": 0.001548316153275664, "clip_ratio/high_mean": 0.0005630202476822888, "clip_ratio/low_mean": 0.0003481325754819409, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009111528224821086, "completions/clipped_ratio": 0.022321428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3035.0, "completions/mean_length": 663.372802734375, "completions/mean_terminated_length": 585.0022583007812, "completions/min_length": 110.0, "completions/min_terminated_length": 110.0, "epoch": 14.083989501312336, "grad_norm": 0.1357421875, "learning_rate": 1e-06, "loss": -0.006, "num_tokens": 877804793.0, "reward": 0.5736607313156128, "reward_std": 0.238377645611763, "rewards/verify_math_reward/mean": 0.5736607313156128, "rewards/verify_math_reward/std": 0.4948205351829529, "step": 1507 }, { "clip_ratio/high_max": 0.0017636547017900739, "clip_ratio/high_mean": 0.0005906411392970767, "clip_ratio/low_mean": 0.00040212186763710633, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009927630071615567, "completions/clipped_ratio": 0.008928571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3345.0, "completions/mean_length": 561.1127319335938, "completions/mean_terminated_length": 529.2669067382812, "completions/min_length": 16.0, "completions/min_terminated_length": 16.0, "epoch": 14.093321668124817, "grad_norm": 0.150390625, "learning_rate": 1e-06, "loss": 0.0002, "num_tokens": 878362686.0, "reward": 0.5848214626312256, "reward_std": 0.2366524040699005, "rewards/verify_math_reward/mean": 0.5848214030265808, "rewards/verify_math_reward/std": 0.49302801489830017, "step": 1508 }, { "clip_ratio/high_max": 0.001944740686667501, "clip_ratio/high_mean": 0.0005664587365572515, "clip_ratio/low_mean": 0.00035113504850414756, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009175937939289724, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3527.0, "completions/mean_length": 631.0390625, "completions/mean_terminated_length": 576.0396728515625, "completions/min_length": 149.0, "completions/min_terminated_length": 149.0, "epoch": 14.1026538349373, "grad_norm": 0.123046875, "learning_rate": 1e-06, "loss": 0.0212, "num_tokens": 878962185.0, "reward": 0.5814732313156128, "reward_std": 0.2064424306154251, "rewards/verify_math_reward/mean": 0.5814732313156128, "rewards/verify_math_reward/std": 0.4935929775238037, "step": 1509 }, { "clip_ratio/high_max": 0.0017148217593785375, "clip_ratio/high_mean": 0.000559840604864803, "clip_ratio/low_mean": 0.0002501155640857178, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008099561564449687, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 1773.0, "completions/mean_length": 596.7522583007812, "completions/mean_terminated_length": 545.2344360351562, "completions/min_length": 116.0, "completions/min_terminated_length": 116.0, "epoch": 14.11198600174978, "grad_norm": 0.1328125, "learning_rate": 1e-06, "loss": -0.0018, "num_tokens": 879525859.0, "reward": 0.6428571939468384, "reward_std": 0.21237428486347198, "rewards/verify_math_reward/mean": 0.6428571343421936, "rewards/verify_math_reward/std": 0.4794250428676605, "step": 1510 }, { "clip_ratio/high_max": 0.0014186469570631743, "clip_ratio/high_mean": 0.000417981873511053, "clip_ratio/low_mean": 0.0004800662431989622, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008980481243270333, "completions/clipped_ratio": 0.021205357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3425.0, "completions/mean_length": 676.396240234375, "completions/mean_terminated_length": 602.311279296875, "completions/min_length": 112.0, "completions/min_terminated_length": 112.0, "epoch": 14.121318168562263, "grad_norm": 0.1181640625, "learning_rate": 1e-06, "loss": 0.0105, "num_tokens": 880137638.0, "reward": 0.527901828289032, "reward_std": 0.22485104203224182, "rewards/verify_math_reward/mean": 0.5279017686843872, "rewards/verify_math_reward/std": 0.49949970841407776, "step": 1511 }, { "clip_ratio/high_max": 0.0020417628329596482, "clip_ratio/high_mean": 0.0006389721718278452, "clip_ratio/low_mean": 0.0003225239290713944, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.000961496096351766, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 3525.0, "completions/mean_length": 563.3214721679688, "completions/mean_terminated_length": 523.44921875, "completions/min_length": 20.0, "completions/min_terminated_length": 20.0, "epoch": 14.130650335374744, "grad_norm": 0.1376953125, "learning_rate": 1e-06, "loss": -0.0049, "num_tokens": 880681734.0, "reward": 0.606026828289032, "reward_std": 0.21507565677165985, "rewards/verify_math_reward/mean": 0.6060267686843872, "rewards/verify_math_reward/std": 0.48890194296836853, "step": 1512 }, { "clip_ratio/high_max": 0.0017885157867567614, "clip_ratio/high_mean": 0.0005276078309179866, "clip_ratio/low_mean": 0.0003105719601990131, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008381797870242735, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3106.0, "completions/mean_length": 590.6138916015625, "completions/mean_terminated_length": 534.9727783203125, "completions/min_length": 16.0, "completions/min_terminated_length": 16.0, "epoch": 14.139982502187227, "grad_norm": 0.130859375, "learning_rate": 1e-06, "loss": 0.0102, "num_tokens": 881247428.0, "reward": 0.5223214626312256, "reward_std": 0.179951474070549, "rewards/verify_math_reward/mean": 0.5223214030265808, "rewards/verify_math_reward/std": 0.49978047609329224, "step": 1513 }, { "clip_ratio/high_max": 0.0013858373376933741, "clip_ratio/high_mean": 0.0003639224679545805, "clip_ratio/low_mean": 0.0002737126949341473, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.000637635162775041, "completions/clipped_ratio": 0.0189732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3457.0, "completions/mean_length": 661.2533569335938, "completions/mean_terminated_length": 594.8247680664062, "completions/min_length": 136.0, "completions/min_terminated_length": 136.0, "epoch": 14.149314668999708, "grad_norm": 0.11279296875, "learning_rate": 1e-06, "loss": -0.0007, "num_tokens": 881864447.0, "reward": 0.5524553656578064, "reward_std": 0.15703065693378448, "rewards/verify_math_reward/mean": 0.5524553656578064, "rewards/verify_math_reward/std": 0.49751853942871094, "step": 1514 }, { "clip_ratio/high_max": 0.001705503991615842, "clip_ratio/high_mean": 0.0005129117788555959, "clip_ratio/low_mean": 0.00037262371779434034, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008855355026753386, "completions/clipped_ratio": 0.024553571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3994.0, "completions/mean_length": 697.1250610351562, "completions/mean_terminated_length": 611.5697631835938, "completions/min_length": 126.0, "completions/min_terminated_length": 126.0, "epoch": 14.15864683581219, "grad_norm": 0.1298828125, "learning_rate": 1e-06, "loss": -0.0098, "num_tokens": 882489399.0, "reward": 0.515625, "reward_std": 0.2118447721004486, "rewards/verify_math_reward/mean": 0.515625, "rewards/verify_math_reward/std": 0.5000349283218384, "step": 1515 }, { "clip_ratio/high_max": 0.0014235807529985323, "clip_ratio/high_mean": 0.00043980203270166385, "clip_ratio/low_mean": 0.00032022949631027586, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007600315229865373, "completions/clipped_ratio": 0.010044642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3874.0, "completions/mean_length": 579.6495971679688, "completions/mean_terminated_length": 543.9706420898438, "completions/min_length": 83.0, "completions/min_terminated_length": 83.0, "epoch": 14.167979002624673, "grad_norm": 0.115234375, "learning_rate": 1e-06, "loss": 0.0084, "num_tokens": 883061165.0, "reward": 0.6305803656578064, "reward_std": 0.17577557265758514, "rewards/verify_math_reward/mean": 0.6305803656578064, "rewards/verify_math_reward/std": 0.48291724920272827, "step": 1516 }, { "clip_ratio/high_max": 0.001512436803750461, "clip_ratio/high_mean": 0.0004945711592654334, "clip_ratio/low_mean": 0.00040261408162223233, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008971852421382209, "completions/clipped_ratio": 0.008928571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 2490.0, "completions/mean_length": 557.388427734375, "completions/mean_terminated_length": 525.509033203125, "completions/min_length": 83.0, "completions/min_terminated_length": 83.0, "epoch": 14.177311169437154, "grad_norm": 0.142578125, "learning_rate": 1e-06, "loss": 0.005, "num_tokens": 883616009.0, "reward": 0.5647321939468384, "reward_std": 0.24862739443778992, "rewards/verify_math_reward/mean": 0.5647321343421936, "rewards/verify_math_reward/std": 0.49606895446777344, "step": 1517 }, { "clip_ratio/high_max": 0.0017349476565868827, "clip_ratio/high_mean": 0.00048150656016332505, "clip_ratio/low_mean": 0.0003248850423460681, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008063916006904037, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 2785.0, "completions/mean_length": 644.3928833007812, "completions/mean_terminated_length": 589.60546875, "completions/min_length": 3.0, "completions/min_terminated_length": 3.0, "epoch": 14.186643336249636, "grad_norm": 0.1259765625, "learning_rate": 1e-06, "loss": -0.0009, "num_tokens": 884221481.0, "reward": 0.5636160969734192, "reward_std": 0.21388381719589233, "rewards/verify_math_reward/mean": 0.5636160969734192, "rewards/verify_math_reward/std": 0.49621346592903137, "step": 1518 }, { "clip_ratio/high_max": 0.0019056037363043288, "clip_ratio/high_mean": 0.0006080793355067726, "clip_ratio/low_mean": 0.00029451267869262665, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009025920026033418, "completions/clipped_ratio": 0.0323660714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3892.0, "completions/mean_length": 676.5111694335938, "completions/mean_terminated_length": 562.1337890625, "completions/min_length": 91.0, "completions/min_terminated_length": 91.0, "epoch": 14.195975503062117, "grad_norm": 0.123046875, "learning_rate": 1e-06, "loss": -0.0178, "num_tokens": 884801843.0, "reward": 0.559151828289032, "reward_std": 0.20891445875167847, "rewards/verify_math_reward/mean": 0.5591517686843872, "rewards/verify_math_reward/std": 0.496766060590744, "step": 1519 }, { "clip_ratio/high_max": 0.0019129214106214931, "clip_ratio/high_mean": 0.000607894981840218, "clip_ratio/low_mean": 0.00033826994956598355, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009461649306103936, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 3500.0, "completions/mean_length": 575.0569458007812, "completions/mean_terminated_length": 535.317138671875, "completions/min_length": 106.0, "completions/min_terminated_length": 106.0, "epoch": 14.2053076698746, "grad_norm": 0.140625, "learning_rate": 1e-06, "loss": 0.0034, "num_tokens": 885365446.0, "reward": 0.5625, "reward_std": 0.24115532636642456, "rewards/verify_math_reward/mean": 0.5625, "rewards/verify_math_reward/std": 0.49635544419288635, "step": 1520 }, { "clip_ratio/high_max": 0.0014671868520963471, "clip_ratio/high_mean": 0.000437759687201833, "clip_ratio/low_mean": 0.0003206332690979252, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007583929705106129, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 2606.0, "completions/mean_length": 607.2020263671875, "completions/mean_terminated_length": 547.8013916015625, "completions/min_length": 146.0, "completions/min_terminated_length": 146.0, "epoch": 14.21463983668708, "grad_norm": 0.1279296875, "learning_rate": 1e-06, "loss": 0.0074, "num_tokens": 885933395.0, "reward": 0.5580357313156128, "reward_std": 0.1994110643863678, "rewards/verify_math_reward/mean": 0.5580357313156128, "rewards/verify_math_reward/std": 0.49689781665802, "step": 1521 }, { "clip_ratio/high_max": 0.0015295231569325551, "clip_ratio/high_mean": 0.00046846458701566007, "clip_ratio/low_mean": 0.00036087475041313155, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.000829339332995005, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 2890.0, "completions/mean_length": 636.3002319335938, "completions/mean_terminated_length": 585.3646240234375, "completions/min_length": 83.0, "completions/min_terminated_length": 83.0, "epoch": 14.223972003499563, "grad_norm": 0.12353515625, "learning_rate": 1e-06, "loss": 0.0199, "num_tokens": 886531808.0, "reward": 0.5401785969734192, "reward_std": 0.22315604984760284, "rewards/verify_math_reward/mean": 0.5401785969734192, "rewards/verify_math_reward/std": 0.49866142868995667, "step": 1522 }, { "clip_ratio/high_max": 0.0016179913473024499, "clip_ratio/high_mean": 0.00044897663519805064, "clip_ratio/low_mean": 0.0004330608826421667, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008820375146569859, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 3295.0, "completions/mean_length": 633.3002319335938, "completions/mean_terminated_length": 594.2178344726562, "completions/min_length": 36.0, "completions/min_terminated_length": 36.0, "epoch": 14.233304170312044, "grad_norm": 0.125, "learning_rate": 1e-06, "loss": 0.0078, "num_tokens": 887146581.0, "reward": 0.527901828289032, "reward_std": 0.2144811451435089, "rewards/verify_math_reward/mean": 0.5279017686843872, "rewards/verify_math_reward/std": 0.49949970841407776, "step": 1523 }, { "clip_ratio/high_max": 0.0014640017852798337, "clip_ratio/high_mean": 0.0003699388769291545, "clip_ratio/low_mean": 0.0003018170318682678, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0006717559081153013, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 3755.0, "completions/mean_length": 626.224365234375, "completions/mean_terminated_length": 583.09716796875, "completions/min_length": 111.0, "completions/min_terminated_length": 111.0, "epoch": 14.242636337124527, "grad_norm": 0.11328125, "learning_rate": 1e-06, "loss": 0.0039, "num_tokens": 887751078.0, "reward": 0.5546875, "reward_std": 0.1699160635471344, "rewards/verify_math_reward/mean": 0.5546875, "rewards/verify_math_reward/std": 0.4972778558731079, "step": 1524 }, { "clip_ratio/high_max": 0.0020246311214577872, "clip_ratio/high_mean": 0.0006806456217418599, "clip_ratio/low_mean": 0.00035072735158792057, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001031372984925838, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 3654.0, "completions/mean_length": 604.34375, "completions/mean_terminated_length": 564.9345703125, "completions/min_length": 129.0, "completions/min_terminated_length": 129.0, "epoch": 14.251968503937007, "grad_norm": 0.140625, "learning_rate": 1e-06, "loss": 0.0067, "num_tokens": 888352490.0, "reward": 0.59375, "reward_std": 0.24180610477924347, "rewards/verify_math_reward/mean": 0.59375, "rewards/verify_math_reward/std": 0.4914066195487976, "step": 1525 }, { "clip_ratio/high_max": 0.0014662166013295064, "clip_ratio/high_mean": 0.0004219607668574099, "clip_ratio/low_mean": 0.0004305838472191681, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008525446137355175, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3986.0, "completions/mean_length": 604.0703125, "completions/mean_terminated_length": 552.6602172851562, "completions/min_length": 105.0, "completions/min_terminated_length": 105.0, "epoch": 14.26130067074949, "grad_norm": 0.1298828125, "learning_rate": 1e-06, "loss": 0.008, "num_tokens": 888930729.0, "reward": 0.5680803656578064, "reward_std": 0.21996724605560303, "rewards/verify_math_reward/mean": 0.5680803656578064, "rewards/verify_math_reward/std": 0.4956200420856476, "step": 1526 }, { "clip_ratio/high_max": 0.0015160213315539295, "clip_ratio/high_mean": 0.0004319961130931915, "clip_ratio/low_mean": 0.00036158949774289795, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007935856101539684, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 3891.0, "completions/mean_length": 614.4475708007812, "completions/mean_terminated_length": 571.1740112304688, "completions/min_length": 115.0, "completions/min_terminated_length": 115.0, "epoch": 14.27063283756197, "grad_norm": 0.1376953125, "learning_rate": 1e-06, "loss": -0.0038, "num_tokens": 889534034.0, "reward": 0.520089328289032, "reward_std": 0.21192918717861176, "rewards/verify_math_reward/mean": 0.5200892686843872, "rewards/verify_math_reward/std": 0.4998753070831299, "step": 1527 }, { "clip_ratio/high_max": 0.001307474672103126, "clip_ratio/high_mean": 0.0003457803753690314, "clip_ratio/low_mean": 0.000377805635025652, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007235860175569542, "completions/clipped_ratio": 0.03125, "completions/max_length": 4096.0, "completions/max_terminated_length": 2879.0, "completions/mean_length": 714.0904541015625, "completions/mean_terminated_length": 604.9965209960938, "completions/min_length": 126.0, "completions/min_terminated_length": 126.0, "epoch": 14.279965004374453, "grad_norm": 0.123046875, "learning_rate": 1e-06, "loss": -0.0014, "num_tokens": 890151883.0, "reward": 0.486607164144516, "reward_std": 0.20978349447250366, "rewards/verify_math_reward/mean": 0.4866071343421936, "rewards/verify_math_reward/std": 0.500099778175354, "step": 1528 }, { "clip_ratio/high_max": 0.0016604755001026206, "clip_ratio/high_mean": 0.0005210502342833934, "clip_ratio/low_mean": 0.0003724878737330073, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008935381174524082, "completions/clipped_ratio": 0.0200892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3635.0, "completions/mean_length": 674.6908569335938, "completions/mean_terminated_length": 604.5501098632812, "completions/min_length": 18.0, "completions/min_terminated_length": 18.0, "epoch": 14.289297171186934, "grad_norm": 0.1279296875, "learning_rate": 1e-06, "loss": 0.0011, "num_tokens": 890763398.0, "reward": 0.4977678656578064, "reward_std": 0.23904915153980255, "rewards/verify_math_reward/mean": 0.4977678656578064, "rewards/verify_math_reward/std": 0.5002742409706116, "step": 1529 }, { "clip_ratio/high_max": 0.0019644675176095916, "clip_ratio/high_mean": 0.0006172429698381166, "clip_ratio/low_mean": 0.0003118324134447903, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009290753714594757, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 2008.0, "completions/mean_length": 565.693115234375, "completions/mean_terminated_length": 525.84765625, "completions/min_length": 136.0, "completions/min_terminated_length": 136.0, "epoch": 14.298629337999417, "grad_norm": 0.1337890625, "learning_rate": 1e-06, "loss": -0.0029, "num_tokens": 891301867.0, "reward": 0.6640625, "reward_std": 0.224068284034729, "rewards/verify_math_reward/mean": 0.6640625, "rewards/verify_math_reward/std": 0.4725809693336487, "step": 1530 }, { "clip_ratio/high_max": 0.0016890158858586801, "clip_ratio/high_mean": 0.0005197786613280186, "clip_ratio/low_mean": 0.0003198698020696611, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008396484690820216, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 4091.0, "completions/mean_length": 623.0580444335938, "completions/mean_terminated_length": 567.9320068359375, "completions/min_length": 101.0, "completions/min_terminated_length": 101.0, "epoch": 14.307961504811898, "grad_norm": 0.1201171875, "learning_rate": 1e-06, "loss": 0.0037, "num_tokens": 891890287.0, "reward": 0.5580357313156128, "reward_std": 0.20061568915843964, "rewards/verify_math_reward/mean": 0.5580357313156128, "rewards/verify_math_reward/std": 0.49689781665802, "step": 1531 }, { "clip_ratio/high_max": 0.0018087048829329433, "clip_ratio/high_mean": 0.0005703125470972736, "clip_ratio/low_mean": 0.0003972547054900133, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009675672645244049, "completions/clipped_ratio": 0.0078125, "completions/max_length": 4096.0, "completions/max_terminated_length": 3859.0, "completions/mean_length": 586.3449096679688, "completions/mean_terminated_length": 558.7098388671875, "completions/min_length": 55.0, "completions/min_terminated_length": 55.0, "epoch": 14.31729367162438, "grad_norm": 0.1318359375, "learning_rate": 1e-06, "loss": 0.0187, "num_tokens": 892480324.0, "reward": 0.520089328289032, "reward_std": 0.218841090798378, "rewards/verify_math_reward/mean": 0.5200892686843872, "rewards/verify_math_reward/std": 0.4998753070831299, "step": 1532 }, { "clip_ratio/high_max": 0.001723562831102754, "clip_ratio/high_mean": 0.000552482175521618, "clip_ratio/low_mean": 0.00030707415839970054, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008595563467679312, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3111.0, "completions/mean_length": 598.7779541015625, "completions/mean_terminated_length": 539.2338256835938, "completions/min_length": 151.0, "completions/min_terminated_length": 151.0, "epoch": 14.326625838436861, "grad_norm": 0.13671875, "learning_rate": 1e-06, "loss": 0.0111, "num_tokens": 893035453.0, "reward": 0.574776828289032, "reward_std": 0.21271267533302307, "rewards/verify_math_reward/mean": 0.5747767686843872, "rewards/verify_math_reward/std": 0.49465295672416687, "step": 1533 }, { "clip_ratio/high_max": 0.0015161935980358976, "clip_ratio/high_mean": 0.000461618922145135, "clip_ratio/low_mean": 0.00037800682150646026, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008396257480853819, "completions/clipped_ratio": 0.0267857142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3858.0, "completions/mean_length": 694.4520263671875, "completions/mean_terminated_length": 600.8314208984375, "completions/min_length": 131.0, "completions/min_terminated_length": 131.0, "epoch": 14.335958005249344, "grad_norm": 0.1279296875, "learning_rate": 1e-06, "loss": -0.001, "num_tokens": 893647722.0, "reward": 0.5167410969734192, "reward_std": 0.20857815444469452, "rewards/verify_math_reward/mean": 0.5167410969734192, "rewards/verify_math_reward/std": 0.4999987483024597, "step": 1534 }, { "clip_ratio/high_max": 0.0014743680394531111, "clip_ratio/high_mean": 0.0004271749584177087, "clip_ratio/low_mean": 0.00032111649215949, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007482914534193696, "completions/clipped_ratio": 0.0234375, "completions/max_length": 4096.0, "completions/max_terminated_length": 3724.0, "completions/mean_length": 653.700927734375, "completions/mean_terminated_length": 571.085693359375, "completions/min_length": 126.0, "completions/min_terminated_length": 126.0, "epoch": 14.345290172061826, "grad_norm": 0.1240234375, "learning_rate": 1e-06, "loss": -0.0067, "num_tokens": 894238542.0, "reward": 0.5424107313156128, "reward_std": 0.20249292254447937, "rewards/verify_math_reward/mean": 0.5424107313156128, "rewards/verify_math_reward/std": 0.4984763562679291, "step": 1535 }, { "clip_ratio/high_max": 0.001402763675287133, "clip_ratio/high_mean": 0.00044309864460956305, "clip_ratio/low_mean": 0.00038792643181295716, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008310250668728258, "completions/clipped_ratio": 0.0189732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3323.0, "completions/mean_length": 640.7433471679688, "completions/mean_terminated_length": 573.9180908203125, "completions/min_length": 134.0, "completions/min_terminated_length": 134.0, "epoch": 14.354622338874307, "grad_norm": 0.12353515625, "learning_rate": 1e-06, "loss": 0.0083, "num_tokens": 894825952.0, "reward": 0.5502232313156128, "reward_std": 0.21447864174842834, "rewards/verify_math_reward/mean": 0.5502232313156128, "rewards/verify_math_reward/std": 0.49774909019470215, "step": 1536 }, { "clip_ratio/high_max": 0.0017578473380126525, "clip_ratio/high_mean": 0.0005143431053511449, "clip_ratio/low_mean": 0.00043021276769650285, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009445558580409852, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 3419.0, "completions/mean_length": 627.75, "completions/mean_terminated_length": 584.641845703125, "completions/min_length": 154.0, "completions/min_terminated_length": 154.0, "epoch": 14.36395450568679, "grad_norm": 0.123046875, "learning_rate": 1e-06, "loss": -0.003, "num_tokens": 895442592.0, "reward": 0.4899553656578064, "reward_std": 0.22188794612884521, "rewards/verify_math_reward/mean": 0.4899553656578064, "rewards/verify_math_reward/std": 0.5001782774925232, "step": 1537 }, { "clip_ratio/high_max": 0.0017130379001173424, "clip_ratio/high_mean": 0.000581580497964751, "clip_ratio/low_mean": 0.00029908007115864166, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008806605637801113, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3451.0, "completions/mean_length": 632.888427734375, "completions/mean_terminated_length": 569.9227294921875, "completions/min_length": 96.0, "completions/min_terminated_length": 96.0, "epoch": 14.37328667249927, "grad_norm": 0.126953125, "learning_rate": 1e-06, "loss": -0.0136, "num_tokens": 896024572.0, "reward": 0.5491071939468384, "reward_std": 0.21320053935050964, "rewards/verify_math_reward/mean": 0.5491071343421936, "rewards/verify_math_reward/std": 0.49786055088043213, "step": 1538 }, { "clip_ratio/high_max": 0.0015094607515493408, "clip_ratio/high_mean": 0.00046719284432583663, "clip_ratio/low_mean": 0.0003417732586967759, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008089661032499862, "completions/clipped_ratio": 0.008928571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3947.0, "completions/mean_length": 604.482177734375, "completions/mean_terminated_length": 573.0270385742188, "completions/min_length": 86.0, "completions/min_terminated_length": 86.0, "epoch": 14.382618839311753, "grad_norm": 0.125, "learning_rate": 1e-06, "loss": 0.0192, "num_tokens": 896617996.0, "reward": 0.5446428656578064, "reward_std": 0.208957239985466, "rewards/verify_math_reward/mean": 0.5446428656578064, "rewards/verify_math_reward/std": 0.4982811510562897, "step": 1539 }, { "clip_ratio/high_max": 0.0017951807185454527, "clip_ratio/high_mean": 0.0005923280787101248, "clip_ratio/low_mean": 0.0004002520242920582, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.000992580105048546, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 2145.0, "completions/mean_length": 544.2745971679688, "completions/mean_terminated_length": 496.06109619140625, "completions/min_length": 130.0, "completions/min_terminated_length": 130.0, "epoch": 14.391951006124234, "grad_norm": 0.140625, "learning_rate": 1e-06, "loss": 0.0074, "num_tokens": 897139114.0, "reward": 0.5859375, "reward_std": 0.2293960303068161, "rewards/verify_math_reward/mean": 0.5859375, "rewards/verify_math_reward/std": 0.4928344786167145, "step": 1540 }, { "clip_ratio/high_max": 0.0014198419776221272, "clip_ratio/high_mean": 0.00038499406605296826, "clip_ratio/low_mean": 0.0003924798751313574, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007774739442538703, "completions/clipped_ratio": 0.024553571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 2815.0, "completions/mean_length": 694.1395263671875, "completions/mean_terminated_length": 608.5091552734375, "completions/min_length": 144.0, "completions/min_terminated_length": 144.0, "epoch": 14.401283172936717, "grad_norm": 0.1220703125, "learning_rate": 1e-06, "loss": -0.0035, "num_tokens": 897765903.0, "reward": 0.527901828289032, "reward_std": 0.20493358373641968, "rewards/verify_math_reward/mean": 0.5279017686843872, "rewards/verify_math_reward/std": 0.49949970841407776, "step": 1541 }, { "clip_ratio/high_max": 0.0017295615270995768, "clip_ratio/high_mean": 0.0005449113423310337, "clip_ratio/low_mean": 0.0003918287829947076, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009367401244162465, "completions/clipped_ratio": 0.0078125, "completions/max_length": 4096.0, "completions/max_terminated_length": 3664.0, "completions/mean_length": 595.9631958007812, "completions/mean_terminated_length": 568.4038696289062, "completions/min_length": 136.0, "completions/min_terminated_length": 136.0, "epoch": 14.410615339749198, "grad_norm": 0.134765625, "learning_rate": 1e-06, "loss": 0.0224, "num_tokens": 898351894.0, "reward": 0.5613839626312256, "reward_std": 0.24423283338546753, "rewards/verify_math_reward/mean": 0.5613839030265808, "rewards/verify_math_reward/std": 0.496494859457016, "step": 1542 }, { "clip_ratio/high_max": 0.001838911666709464, "clip_ratio/high_mean": 0.0005933181932959997, "clip_ratio/low_mean": 0.00037655709672890225, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009698753055999987, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3924.0, "completions/mean_length": 631.5647583007812, "completions/mean_terminated_length": 584.5362548828125, "completions/min_length": 24.0, "completions/min_terminated_length": 24.0, "epoch": 14.41994750656168, "grad_norm": 0.1279296875, "learning_rate": 1e-06, "loss": 0.0013, "num_tokens": 898950288.0, "reward": 0.59375, "reward_std": 0.21470825374126434, "rewards/verify_math_reward/mean": 0.59375, "rewards/verify_math_reward/std": 0.4914066195487976, "step": 1543 }, { "clip_ratio/high_max": 0.0012954257008459535, "clip_ratio/high_mean": 0.00034241293542436324, "clip_ratio/low_mean": 0.0003398050796477037, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0006822180166636826, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3070.0, "completions/mean_length": 617.8616333007812, "completions/mean_terminated_length": 570.6470947265625, "completions/min_length": 122.0, "completions/min_terminated_length": 122.0, "epoch": 14.429279673374161, "grad_norm": 0.1171875, "learning_rate": 1e-06, "loss": 0.0068, "num_tokens": 899556868.0, "reward": 0.551339328289032, "reward_std": 0.1962871253490448, "rewards/verify_math_reward/mean": 0.5513392686843872, "rewards/verify_math_reward/std": 0.4976350665092468, "step": 1544 }, { "clip_ratio/high_max": 0.0013910240759287262, "clip_ratio/high_mean": 0.000438392488831596, "clip_ratio/low_mean": 0.0003174454025156592, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007558378993053338, "completions/clipped_ratio": 0.03125, "completions/max_length": 4096.0, "completions/max_terminated_length": 3826.0, "completions/mean_length": 675.0267944335938, "completions/mean_terminated_length": 564.6727905273438, "completions/min_length": 105.0, "completions/min_terminated_length": 105.0, "epoch": 14.438611840186644, "grad_norm": 0.1279296875, "learning_rate": 1e-06, "loss": -0.0031, "num_tokens": 900136068.0, "reward": 0.5245535969734192, "reward_std": 0.21297159790992737, "rewards/verify_math_reward/mean": 0.5245535969734192, "rewards/verify_math_reward/std": 0.4996756911277771, "step": 1545 }, { "clip_ratio/high_max": 0.0017639706875343109, "clip_ratio/high_mean": 0.0005594972026301548, "clip_ratio/low_mean": 0.00042790657562363776, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.00098740377870854, "completions/clipped_ratio": 0.0234375, "completions/max_length": 4096.0, "completions/max_terminated_length": 2865.0, "completions/mean_length": 656.6585083007812, "completions/mean_terminated_length": 574.1142578125, "completions/min_length": 128.0, "completions/min_terminated_length": 128.0, "epoch": 14.447944006999125, "grad_norm": 0.1337890625, "learning_rate": 1e-06, "loss": 0.0032, "num_tokens": 900721298.0, "reward": 0.515625, "reward_std": 0.22413566708564758, "rewards/verify_math_reward/mean": 0.515625, "rewards/verify_math_reward/std": 0.5000349283218384, "step": 1546 }, { "clip_ratio/high_max": 0.001680259183558519, "clip_ratio/high_mean": 0.0004201324322821165, "clip_ratio/low_mean": 0.00038109770162009227, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008012301468625083, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 2832.0, "completions/mean_length": 589.318115234375, "completions/mean_terminated_length": 541.716064453125, "completions/min_length": 143.0, "completions/min_terminated_length": 143.0, "epoch": 14.457276173811607, "grad_norm": 0.1318359375, "learning_rate": 1e-06, "loss": 0.0075, "num_tokens": 901294247.0, "reward": 0.5379464626312256, "reward_std": 0.17548204958438873, "rewards/verify_math_reward/mean": 0.5379464030265808, "rewards/verify_math_reward/std": 0.4988364577293396, "step": 1547 }, { "clip_ratio/high_max": 0.0017964202270377427, "clip_ratio/high_mean": 0.0005530574530894228, "clip_ratio/low_mean": 0.00026572718252282357, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.000818784626062552, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 2608.0, "completions/mean_length": 622.359375, "completions/mean_terminated_length": 571.2185668945312, "completions/min_length": 157.0, "completions/min_terminated_length": 157.0, "epoch": 14.466608340624088, "grad_norm": 0.11328125, "learning_rate": 1e-06, "loss": 0.005, "num_tokens": 901901089.0, "reward": 0.5245535969734192, "reward_std": 0.19384829699993134, "rewards/verify_math_reward/mean": 0.5245535969734192, "rewards/verify_math_reward/std": 0.4996756613254547, "step": 1548 }, { "clip_ratio/high_max": 0.0016368790902561159, "clip_ratio/high_mean": 0.0005065319853656547, "clip_ratio/low_mean": 0.0003591351855902758, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008656671734570409, "completions/clipped_ratio": 0.022321428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3413.0, "completions/mean_length": 638.8303833007812, "completions/mean_terminated_length": 559.8995361328125, "completions/min_length": 106.0, "completions/min_terminated_length": 106.0, "epoch": 14.47594050743657, "grad_norm": 0.130859375, "learning_rate": 1e-06, "loss": -0.0033, "num_tokens": 902472841.0, "reward": 0.5479910969734192, "reward_std": 0.2291657030582428, "rewards/verify_math_reward/mean": 0.5479910969734192, "rewards/verify_math_reward/std": 0.49796950817108154, "step": 1549 }, { "clip_ratio/high_max": 0.0016680218705005245, "clip_ratio/high_mean": 0.0004951860539677, "clip_ratio/low_mean": 0.00037240606957311684, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008675921235408168, "completions/clipped_ratio": 0.021205357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 4031.0, "completions/mean_length": 632.2388916015625, "completions/mean_terminated_length": 557.197265625, "completions/min_length": 22.0, "completions/min_terminated_length": 22.0, "epoch": 14.485272674249051, "grad_norm": 0.12451171875, "learning_rate": 1e-06, "loss": 0.0002, "num_tokens": 903060583.0, "reward": 0.53125, "reward_std": 0.21188825368881226, "rewards/verify_math_reward/mean": 0.53125, "rewards/verify_math_reward/std": 0.4993011951446533, "step": 1550 }, { "clip_ratio/high_max": 0.0013455009857352707, "clip_ratio/high_mean": 0.00035143964259987115, "clip_ratio/low_mean": 0.0003274626013762827, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0006789022273778755, "completions/clipped_ratio": 0.010044642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3216.0, "completions/mean_length": 586.0078125, "completions/mean_terminated_length": 550.3934326171875, "completions/min_length": 4.0, "completions/min_terminated_length": 4.0, "epoch": 14.494604841061534, "grad_norm": 0.125, "learning_rate": 1e-06, "loss": 0.009, "num_tokens": 903633926.0, "reward": 0.5915178656578064, "reward_std": 0.17983242869377136, "rewards/verify_math_reward/mean": 0.5915178656578064, "rewards/verify_math_reward/std": 0.49182769656181335, "step": 1551 }, { "clip_ratio/high_max": 0.0012904408258691547, "clip_ratio/high_mean": 0.00040748293201886554, "clip_ratio/low_mean": 0.00039662205495005765, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008041049754865526, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 2931.0, "completions/mean_length": 601.3795166015625, "completions/mean_terminated_length": 557.9435424804688, "completions/min_length": 125.0, "completions/min_terminated_length": 125.0, "epoch": 14.503937007874015, "grad_norm": 0.1181640625, "learning_rate": 1e-06, "loss": 0.0113, "num_tokens": 904221722.0, "reward": 0.5412946939468384, "reward_std": 0.19283728301525116, "rewards/verify_math_reward/mean": 0.5412946343421936, "rewards/verify_math_reward/std": 0.49857014417648315, "step": 1552 }, { "clip_ratio/high_max": 0.0016895394037419464, "clip_ratio/high_mean": 0.0005341190271792584, "clip_ratio/low_mean": 0.0002519272152312624, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007860462460484996, "completions/clipped_ratio": 0.0200892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 2706.0, "completions/mean_length": 631.677490234375, "completions/mean_terminated_length": 560.6549072265625, "completions/min_length": 105.0, "completions/min_terminated_length": 105.0, "epoch": 14.513269174686497, "grad_norm": 0.1201171875, "learning_rate": 1e-06, "loss": -0.0067, "num_tokens": 904802641.0, "reward": 0.5814732313156128, "reward_std": 0.1949409544467926, "rewards/verify_math_reward/mean": 0.5814732313156128, "rewards/verify_math_reward/std": 0.4935929775238037, "step": 1553 }, { "clip_ratio/high_max": 0.0016963273910732823, "clip_ratio/high_mean": 0.0005249796301995957, "clip_ratio/low_mean": 0.000345862801509611, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008708424311407725, "completions/clipped_ratio": 0.021205357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 2056.0, "completions/mean_length": 637.279052734375, "completions/mean_terminated_length": 562.3466186523438, "completions/min_length": 60.0, "completions/min_terminated_length": 60.0, "epoch": 14.52260134149898, "grad_norm": 0.1357421875, "learning_rate": 1e-06, "loss": -0.0071, "num_tokens": 905386283.0, "reward": 0.5290178656578064, "reward_std": 0.22007529437541962, "rewards/verify_math_reward/mean": 0.5290178656578064, "rewards/verify_math_reward/std": 0.49943602085113525, "step": 1554 }, { "clip_ratio/high_max": 0.0011884248078786186, "clip_ratio/high_mean": 0.00032524324228688783, "clip_ratio/low_mean": 0.0002867367578573976, "clip_ratio/low_min": 1.1015156815119553e-05, "clip_ratio/region_mean": 0.0006119799986663566, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 2368.0, "completions/mean_length": 638.232177734375, "completions/mean_terminated_length": 591.2941284179688, "completions/min_length": 121.0, "completions/min_terminated_length": 121.0, "epoch": 14.531933508311461, "grad_norm": 0.11083984375, "learning_rate": 1e-06, "loss": 0.0141, "num_tokens": 906007515.0, "reward": 0.5558035969734192, "reward_std": 0.17101122438907623, "rewards/verify_math_reward/mean": 0.5558035969734192, "rewards/verify_math_reward/std": 0.49715372920036316, "step": 1555 }, { "clip_ratio/high_max": 0.0018121606326531037, "clip_ratio/high_mean": 0.0005071863884040795, "clip_ratio/low_mean": 0.00033820467319856107, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008453910540993093, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3292.0, "completions/mean_length": 659.630615234375, "completions/mean_terminated_length": 597.151123046875, "completions/min_length": 102.0, "completions/min_terminated_length": 102.0, "epoch": 14.541265675123944, "grad_norm": 0.12109375, "learning_rate": 1e-06, "loss": 0.0269, "num_tokens": 906628352.0, "reward": 0.5212053656578064, "reward_std": 0.20688749849796295, "rewards/verify_math_reward/mean": 0.5212053656578064, "rewards/verify_math_reward/std": 0.49982914328575134, "step": 1556 }, { "clip_ratio/high_max": 0.001812286400308949, "clip_ratio/high_mean": 0.0005917690667729403, "clip_ratio/low_mean": 0.00045150386313252966, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010432729268359253, "completions/clipped_ratio": 0.0189732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 2107.0, "completions/mean_length": 606.5346069335938, "completions/mean_terminated_length": 539.0477905273438, "completions/min_length": 94.0, "completions/min_terminated_length": 94.0, "epoch": 14.550597841936424, "grad_norm": 0.1552734375, "learning_rate": 1e-06, "loss": 0.0162, "num_tokens": 907188623.0, "reward": 0.5714285969734192, "reward_std": 0.25025638937950134, "rewards/verify_math_reward/mean": 0.5714285969734192, "rewards/verify_math_reward/std": 0.49514803290367126, "step": 1557 }, { "clip_ratio/high_max": 0.0014588254935006262, "clip_ratio/high_mean": 0.00040958315935313294, "clip_ratio/low_mean": 0.00036816072054079996, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007777438922857982, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3046.0, "completions/mean_length": 562.5580444335938, "completions/mean_terminated_length": 514.5927734375, "completions/min_length": 107.0, "completions/min_terminated_length": 107.0, "epoch": 14.559930008748907, "grad_norm": 0.12353515625, "learning_rate": 1e-06, "loss": 0.0026, "num_tokens": 907722931.0, "reward": 0.6171875, "reward_std": 0.18882180750370026, "rewards/verify_math_reward/mean": 0.6171875, "rewards/verify_math_reward/std": 0.4863446056842804, "step": 1558 }, { "clip_ratio/high_max": 0.0015287041123883682, "clip_ratio/high_mean": 0.00048510482167785085, "clip_ratio/low_mean": 0.00028622887361962057, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007713337035966106, "completions/clipped_ratio": 0.021205357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3203.0, "completions/mean_length": 600.9955444335938, "completions/mean_terminated_length": 525.2770385742188, "completions/min_length": 29.0, "completions/min_terminated_length": 29.0, "epoch": 14.569262175561388, "grad_norm": 0.1318359375, "learning_rate": 1e-06, "loss": 0.0093, "num_tokens": 908273959.0, "reward": 0.5569196939468384, "reward_std": 0.19741688668727875, "rewards/verify_math_reward/mean": 0.5569196343421936, "rewards/verify_math_reward/std": 0.49702703952789307, "step": 1559 }, { "clip_ratio/high_max": 0.001763489262884832, "clip_ratio/high_mean": 0.0005315247715316218, "clip_ratio/low_mean": 0.00033591098451779544, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008674357559357304, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3159.0, "completions/mean_length": 652.1194458007812, "completions/mean_terminated_length": 605.3699340820312, "completions/min_length": 42.0, "completions/min_terminated_length": 42.0, "epoch": 14.57859434237387, "grad_norm": 0.11865234375, "learning_rate": 1e-06, "loss": 0.0077, "num_tokens": 908897274.0, "reward": 0.5457589626312256, "reward_std": 0.22405827045440674, "rewards/verify_math_reward/mean": 0.5457589030265808, "rewards/verify_math_reward/std": 0.4981797933578491, "step": 1560 }, { "clip_ratio/high_max": 0.0016889206062842277, "clip_ratio/high_mean": 0.0005480726462110397, "clip_ratio/low_mean": 0.0002917422873451869, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.000839814947994455, "completions/clipped_ratio": 0.024553571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3376.0, "completions/mean_length": 605.4933471679688, "completions/mean_terminated_length": 517.6315307617188, "completions/min_length": 144.0, "completions/min_terminated_length": 144.0, "epoch": 14.587926509186351, "grad_norm": 0.125, "learning_rate": 1e-06, "loss": -0.0163, "num_tokens": 909437004.0, "reward": 0.5837053656578064, "reward_std": 0.1840072125196457, "rewards/verify_math_reward/mean": 0.5837053656578064, "rewards/verify_math_reward/std": 0.49321895837783813, "step": 1561 }, { "clip_ratio/high_max": 0.0015501676207350101, "clip_ratio/high_mean": 0.00047069335960259195, "clip_ratio/low_mean": 0.00039319797213011043, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008638913323011366, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 2071.0, "completions/mean_length": 582.390625, "completions/mean_terminated_length": 534.694580078125, "completions/min_length": 100.0, "completions/min_terminated_length": 100.0, "epoch": 14.597258675998834, "grad_norm": 0.13671875, "learning_rate": 1e-06, "loss": 0.0042, "num_tokens": 910003650.0, "reward": 0.5457589626312256, "reward_std": 0.21857966482639313, "rewards/verify_math_reward/mean": 0.5457589030265808, "rewards/verify_math_reward/std": 0.4981797933578491, "step": 1562 }, { "clip_ratio/high_max": 0.0018689792268560268, "clip_ratio/high_mean": 0.0005871383873454761, "clip_ratio/low_mean": 0.0003853880889437278, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009725264917506138, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 2656.0, "completions/mean_length": 592.1395263671875, "completions/mean_terminated_length": 540.5537719726562, "completions/min_length": 121.0, "completions/min_terminated_length": 121.0, "epoch": 14.606590842811315, "grad_norm": 0.1337890625, "learning_rate": 1e-06, "loss": -0.0047, "num_tokens": 910566279.0, "reward": 0.5658482313156128, "reward_std": 0.23889870941638947, "rewards/verify_math_reward/mean": 0.5658482313156128, "rewards/verify_math_reward/std": 0.49592188000679016, "step": 1563 }, { "clip_ratio/high_max": 0.0018714175803324906, "clip_ratio/high_mean": 0.0006118261198935215, "clip_ratio/low_mean": 0.00037246720228267804, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.000984293320470897, "completions/clipped_ratio": 0.0200892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3159.0, "completions/mean_length": 636.6484375, "completions/mean_terminated_length": 565.7278442382812, "completions/min_length": 176.0, "completions/min_terminated_length": 176.0, "epoch": 14.615923009623797, "grad_norm": 0.1357421875, "learning_rate": 1e-06, "loss": 0.0031, "num_tokens": 911152444.0, "reward": 0.5758928656578064, "reward_std": 0.23022131621837616, "rewards/verify_math_reward/mean": 0.5758928656578064, "rewards/verify_math_reward/std": 0.49448272585868835, "step": 1564 }, { "clip_ratio/high_max": 0.0018047129633487202, "clip_ratio/high_mean": 0.0005651459600812814, "clip_ratio/low_mean": 0.000288212067061977, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008533580221410375, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3039.0, "completions/mean_length": 593.2053833007812, "completions/mean_terminated_length": 545.6561279296875, "completions/min_length": 77.0, "completions/min_terminated_length": 77.0, "epoch": 14.625255176436278, "grad_norm": 0.126953125, "learning_rate": 1e-06, "loss": 0.0007, "num_tokens": 911723364.0, "reward": 0.5613839626312256, "reward_std": 0.20665885508060455, "rewards/verify_math_reward/mean": 0.5613839030265808, "rewards/verify_math_reward/std": 0.496494859457016, "step": 1565 }, { "clip_ratio/high_max": 0.0013756183079749462, "clip_ratio/high_mean": 0.00042291028353247384, "clip_ratio/low_mean": 0.00026492827737456537, "clip_ratio/low_min": 1.036484263750026e-05, "clip_ratio/region_mean": 0.000687838563862897, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 3333.0, "completions/mean_length": 600.4877319335938, "completions/mean_terminated_length": 557.0407104492188, "completions/min_length": 80.0, "completions/min_terminated_length": 80.0, "epoch": 14.63458734324876, "grad_norm": 0.1259765625, "learning_rate": 1e-06, "loss": 0.0058, "num_tokens": 912301193.0, "reward": 0.5703125, "reward_std": 0.200128972530365, "rewards/verify_math_reward/mean": 0.5703125, "rewards/verify_math_reward/std": 0.49530795216560364, "step": 1566 }, { "clip_ratio/high_max": 0.001985565988434246, "clip_ratio/high_mean": 0.0006315414407254138, "clip_ratio/low_mean": 0.00035548601363188936, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009870274561762926, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 2446.0, "completions/mean_length": 560.1663208007812, "completions/mean_terminated_length": 512.1685791015625, "completions/min_length": 165.0, "completions/min_terminated_length": 165.0, "epoch": 14.643919510061242, "grad_norm": 0.1396484375, "learning_rate": 1e-06, "loss": 0.0005, "num_tokens": 912850022.0, "reward": 0.5602678656578064, "reward_std": 0.24596740305423737, "rewards/verify_math_reward/mean": 0.5602678656578064, "rewards/verify_math_reward/std": 0.4966317415237427, "step": 1567 }, { "clip_ratio/high_max": 0.0019799693473032676, "clip_ratio/high_mean": 0.0005882592213311, "clip_ratio/low_mean": 0.0004541148209682433, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010423740386613645, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 3979.0, "completions/mean_length": 632.4910888671875, "completions/mean_terminated_length": 589.4418334960938, "completions/min_length": 102.0, "completions/min_terminated_length": 102.0, "epoch": 14.653251676873724, "grad_norm": 0.1396484375, "learning_rate": 1e-06, "loss": 0.0085, "num_tokens": 913454286.0, "reward": 0.5569196939468384, "reward_std": 0.23180390894412994, "rewards/verify_math_reward/mean": 0.5569196343421936, "rewards/verify_math_reward/std": 0.4970270097255707, "step": 1568 }, { "clip_ratio/high_max": 0.001248057238626643, "clip_ratio/high_mean": 0.0003339070048014037, "clip_ratio/low_mean": 0.00030964711925207666, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0006435541272367118, "completions/clipped_ratio": 0.021205357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 4092.0, "completions/mean_length": 661.3092041015625, "completions/mean_terminated_length": 586.8973388671875, "completions/min_length": 90.0, "completions/min_terminated_length": 90.0, "epoch": 14.662583843686207, "grad_norm": 0.11328125, "learning_rate": 1e-06, "loss": -0.0097, "num_tokens": 914069971.0, "reward": 0.5234375, "reward_std": 0.19058139622211456, "rewards/verify_math_reward/mean": 0.5234375, "rewards/verify_math_reward/std": 0.49972933530807495, "step": 1569 }, { "clip_ratio/high_max": 0.0012808902492906782, "clip_ratio/high_mean": 0.0003640243446625391, "clip_ratio/low_mean": 0.0003344646167988685, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0006984889596424182, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 3824.0, "completions/mean_length": 608.2756958007812, "completions/mean_terminated_length": 568.9108276367188, "completions/min_length": 78.0, "completions/min_terminated_length": 78.0, "epoch": 14.671916010498688, "grad_norm": 0.115234375, "learning_rate": 1e-06, "loss": 0.0115, "num_tokens": 914661106.0, "reward": 0.5457589626312256, "reward_std": 0.18160048127174377, "rewards/verify_math_reward/mean": 0.5457589030265808, "rewards/verify_math_reward/std": 0.4981797933578491, "step": 1570 }, { "clip_ratio/high_max": 0.0016296030889861868, "clip_ratio/high_mean": 0.0005215273788508057, "clip_ratio/low_mean": 0.00042272938185305975, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.000944256765251339, "completions/clipped_ratio": 0.0200892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 4077.0, "completions/mean_length": 631.9330444335938, "completions/mean_terminated_length": 560.9157104492188, "completions/min_length": 114.0, "completions/min_terminated_length": 114.0, "epoch": 14.68124817731117, "grad_norm": 0.1337890625, "learning_rate": 1e-06, "loss": 0.0078, "num_tokens": 915245878.0, "reward": 0.5301339626312256, "reward_std": 0.2310122549533844, "rewards/verify_math_reward/mean": 0.5301339030265808, "rewards/verify_math_reward/std": 0.49936988949775696, "step": 1571 }, { "clip_ratio/high_max": 0.0017332785919279559, "clip_ratio/high_mean": 0.0004814774433725688, "clip_ratio/low_mean": 0.00019448248360731668, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0006759599291399354, "completions/clipped_ratio": 0.010044642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3823.0, "completions/mean_length": 568.4498291015625, "completions/mean_terminated_length": 532.6572265625, "completions/min_length": 95.0, "completions/min_terminated_length": 95.0, "epoch": 14.690580344123651, "grad_norm": 0.11376953125, "learning_rate": 1e-06, "loss": -0.0134, "num_tokens": 915815841.0, "reward": 0.5814732313156128, "reward_std": 0.1716756522655487, "rewards/verify_math_reward/mean": 0.5814732313156128, "rewards/verify_math_reward/std": 0.4935929775238037, "step": 1572 }, { "clip_ratio/high_max": 0.001720041304906772, "clip_ratio/high_mean": 0.00047953514967957744, "clip_ratio/low_mean": 0.0004103460438500406, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.000889881195689668, "completions/clipped_ratio": 0.0189732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3873.0, "completions/mean_length": 698.4129638671875, "completions/mean_terminated_length": 632.7030639648438, "completions/min_length": 84.0, "completions/min_terminated_length": 84.0, "epoch": 14.699912510936134, "grad_norm": 0.130859375, "learning_rate": 1e-06, "loss": 0.0, "num_tokens": 916463227.0, "reward": 0.4531250298023224, "reward_std": 0.22560739517211914, "rewards/verify_math_reward/mean": 0.453125, "rewards/verify_math_reward/std": 0.4980759024620056, "step": 1573 }, { "clip_ratio/high_max": 0.0018501778213249054, "clip_ratio/high_mean": 0.0005637085314447177, "clip_ratio/low_mean": 0.0003098486677117762, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008735572077966935, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3247.0, "completions/mean_length": 580.5435791015625, "completions/mean_terminated_length": 532.8224487304688, "completions/min_length": 78.0, "completions/min_terminated_length": 78.0, "epoch": 14.709244677748615, "grad_norm": 0.1259765625, "learning_rate": 1e-06, "loss": 0.0093, "num_tokens": 917021978.0, "reward": 0.5814732313156128, "reward_std": 0.19512639939785004, "rewards/verify_math_reward/mean": 0.5814732313156128, "rewards/verify_math_reward/std": 0.4935929775238037, "step": 1574 }, { "clip_ratio/high_max": 0.0021636232231685426, "clip_ratio/high_mean": 0.0006965985867282143, "clip_ratio/low_mean": 0.0003783344146768286, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010749330049293349, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3958.0, "completions/mean_length": 599.5178833007812, "completions/mean_terminated_length": 548.040771484375, "completions/min_length": 11.0, "completions/min_terminated_length": 11.0, "epoch": 14.718576844561097, "grad_norm": 0.138671875, "learning_rate": 1e-06, "loss": -0.0009, "num_tokens": 917587474.0, "reward": 0.5613839626312256, "reward_std": 0.24277111887931824, "rewards/verify_math_reward/mean": 0.5613839030265808, "rewards/verify_math_reward/std": 0.496494859457016, "step": 1575 }, { "clip_ratio/high_max": 0.0016982999331958126, "clip_ratio/high_mean": 0.0004861666807300935, "clip_ratio/low_mean": 0.00034483374474802986, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008310004268423654, "completions/clipped_ratio": 0.0078125, "completions/max_length": 4096.0, "completions/max_terminated_length": 2598.0, "completions/mean_length": 595.630615234375, "completions/mean_terminated_length": 568.0686645507812, "completions/min_length": 91.0, "completions/min_terminated_length": 91.0, "epoch": 14.727909011373578, "grad_norm": 0.125, "learning_rate": 1e-06, "loss": 0.0065, "num_tokens": 918199175.0, "reward": 0.5301339626312256, "reward_std": 0.21676772832870483, "rewards/verify_math_reward/mean": 0.5301339030265808, "rewards/verify_math_reward/std": 0.49936985969543457, "step": 1576 }, { "clip_ratio/high_max": 0.0014718273487233091, "clip_ratio/high_mean": 0.00039991727658161835, "clip_ratio/low_mean": 0.0002965557399647878, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0006964730146137299, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 3368.0, "completions/mean_length": 588.2924194335938, "completions/mean_terminated_length": 544.6937866210938, "completions/min_length": 125.0, "completions/min_terminated_length": 125.0, "epoch": 14.73724117818606, "grad_norm": 0.11474609375, "learning_rate": 1e-06, "loss": 0.0169, "num_tokens": 918780557.0, "reward": 0.6037946939468384, "reward_std": 0.20165739953517914, "rewards/verify_math_reward/mean": 0.6037946343421936, "rewards/verify_math_reward/std": 0.48938122391700745, "step": 1577 }, { "clip_ratio/high_max": 0.0016819774118630448, "clip_ratio/high_mean": 0.0004913263937851298, "clip_ratio/low_mean": 0.00047859606638667174, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.000969922464719275, "completions/clipped_ratio": 0.010044642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3364.0, "completions/mean_length": 574.7433471679688, "completions/mean_terminated_length": 539.0146484375, "completions/min_length": 4.0, "completions/min_terminated_length": 4.0, "epoch": 14.746573344998541, "grad_norm": 0.1494140625, "learning_rate": 1e-06, "loss": 0.0145, "num_tokens": 919339215.0, "reward": 0.5491071939468384, "reward_std": 0.25359535217285156, "rewards/verify_math_reward/mean": 0.5491071343421936, "rewards/verify_math_reward/std": 0.49786055088043213, "step": 1578 }, { "clip_ratio/high_max": 0.001475791541452054, "clip_ratio/high_mean": 0.00045782445386066684, "clip_ratio/low_mean": 0.0004203136682008335, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008781381284279632, "completions/clipped_ratio": 0.010044642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3699.0, "completions/mean_length": 575.7545166015625, "completions/mean_terminated_length": 540.0360717773438, "completions/min_length": 129.0, "completions/min_terminated_length": 129.0, "epoch": 14.755905511811024, "grad_norm": 0.1298828125, "learning_rate": 1e-06, "loss": -0.007, "num_tokens": 919913315.0, "reward": 0.582589328289032, "reward_std": 0.2067016214132309, "rewards/verify_math_reward/mean": 0.5825892686843872, "rewards/verify_math_reward/std": 0.4934072494506836, "step": 1579 }, { "clip_ratio/high_max": 0.0016975590842776, "clip_ratio/high_mean": 0.0005550756031880155, "clip_ratio/low_mean": 0.00040159013474294625, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009566657299728831, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 2218.0, "completions/mean_length": 569.7277221679688, "completions/mean_terminated_length": 529.9277954101562, "completions/min_length": 159.0, "completions/min_terminated_length": 159.0, "epoch": 14.765237678623505, "grad_norm": 0.142578125, "learning_rate": 1e-06, "loss": -0.006, "num_tokens": 920473623.0, "reward": 0.6026785969734192, "reward_std": 0.23367930948734283, "rewards/verify_math_reward/mean": 0.6026785969734192, "rewards/verify_math_reward/std": 0.48961687088012695, "step": 1580 }, { "clip_ratio/high_max": 0.0016341464697688934, "clip_ratio/high_mean": 0.0004837096932988061, "clip_ratio/low_mean": 0.0003861727238927415, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008698824212842737, "completions/clipped_ratio": 0.030133928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3783.0, "completions/mean_length": 707.7689819335938, "completions/mean_terminated_length": 602.4959716796875, "completions/min_length": 126.0, "completions/min_terminated_length": 126.0, "epoch": 14.774569845435988, "grad_norm": 0.12060546875, "learning_rate": 1e-06, "loss": 0.0023, "num_tokens": 921098728.0, "reward": 0.4542410969734192, "reward_std": 0.21353641152381897, "rewards/verify_math_reward/mean": 0.4542410671710968, "rewards/verify_math_reward/std": 0.4981798231601715, "step": 1581 }, { "clip_ratio/high_max": 0.001602337195436121, "clip_ratio/high_mean": 0.0004422149468155112, "clip_ratio/low_mean": 0.0003250001595915819, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007672151150472928, "completions/clipped_ratio": 0.006696428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 2832.0, "completions/mean_length": 585.0201416015625, "completions/mean_terminated_length": 561.3505859375, "completions/min_length": 135.0, "completions/min_terminated_length": 135.0, "epoch": 14.783902012248468, "grad_norm": 0.12109375, "learning_rate": 1e-06, "loss": -0.0034, "num_tokens": 921690698.0, "reward": 0.5524553656578064, "reward_std": 0.173563152551651, "rewards/verify_math_reward/mean": 0.5524553656578064, "rewards/verify_math_reward/std": 0.49751853942871094, "step": 1582 }, { "clip_ratio/high_max": 0.001917310130011174, "clip_ratio/high_mean": 0.0005945376769886934, "clip_ratio/low_mean": 0.0003221652618776716, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009167029320451547, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3482.0, "completions/mean_length": 585.6361694335938, "completions/mean_terminated_length": 525.8683471679688, "completions/min_length": 11.0, "completions/min_terminated_length": 11.0, "epoch": 14.793234179060951, "grad_norm": 0.1376953125, "learning_rate": 1e-06, "loss": -0.0039, "num_tokens": 922238828.0, "reward": 0.598214328289032, "reward_std": 0.20861980319023132, "rewards/verify_math_reward/mean": 0.5982142686843872, "rewards/verify_math_reward/std": 0.49053290486335754, "step": 1583 }, { "clip_ratio/high_max": 0.0015323374082072405, "clip_ratio/high_mean": 0.00041175250066771696, "clip_ratio/low_mean": 0.0003433801232404221, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007551326198154129, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 2749.0, "completions/mean_length": 606.1138916015625, "completions/mean_terminated_length": 546.6947021484375, "completions/min_length": 143.0, "completions/min_terminated_length": 143.0, "epoch": 14.802566345873432, "grad_norm": 0.12109375, "learning_rate": 1e-06, "loss": -0.0005, "num_tokens": 922806450.0, "reward": 0.5368303656578064, "reward_std": 0.18799060583114624, "rewards/verify_math_reward/mean": 0.5368303656578064, "rewards/verify_math_reward/std": 0.49892017245292664, "step": 1584 }, { "clip_ratio/high_max": 0.0016291528081637807, "clip_ratio/high_mean": 0.0005152303215254506, "clip_ratio/low_mean": 0.00036970703752103873, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008849373716657283, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3813.0, "completions/mean_length": 666.3772583007812, "completions/mean_terminated_length": 604.0204467773438, "completions/min_length": 90.0, "completions/min_terminated_length": 90.0, "epoch": 14.811898512685914, "grad_norm": 0.130859375, "learning_rate": 1e-06, "loss": 0.0023, "num_tokens": 923433980.0, "reward": 0.5368303656578064, "reward_std": 0.22361180186271667, "rewards/verify_math_reward/mean": 0.5368303656578064, "rewards/verify_math_reward/std": 0.49892017245292664, "step": 1585 }, { "clip_ratio/high_max": 0.0017048433710442623, "clip_ratio/high_mean": 0.00052554421813511, "clip_ratio/low_mean": 0.0002599278892603252, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.000785472115239827, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3703.0, "completions/mean_length": 619.6484375, "completions/mean_terminated_length": 564.46826171875, "completions/min_length": 141.0, "completions/min_terminated_length": 141.0, "epoch": 14.821230679498395, "grad_norm": 0.1240234375, "learning_rate": 1e-06, "loss": 0.0162, "num_tokens": 924024497.0, "reward": 0.5758928656578064, "reward_std": 0.21181045472621918, "rewards/verify_math_reward/mean": 0.5758928656578064, "rewards/verify_math_reward/std": 0.49448272585868835, "step": 1586 }, { "clip_ratio/high_max": 0.0014261988408179604, "clip_ratio/high_mean": 0.0004011380869997083, "clip_ratio/low_mean": 0.000325291187550647, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007264292767104052, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3336.0, "completions/mean_length": 670.779052734375, "completions/mean_terminated_length": 612.4608764648438, "completions/min_length": 109.0, "completions/min_terminated_length": 109.0, "epoch": 14.830562846310878, "grad_norm": 0.111328125, "learning_rate": 1e-06, "loss": 0.002, "num_tokens": 924649251.0, "reward": 0.4854910969734192, "reward_std": 0.18457356095314026, "rewards/verify_math_reward/mean": 0.4854910671710968, "rewards/verify_math_reward/std": 0.5000686049461365, "step": 1587 }, { "clip_ratio/high_max": 0.0016468872136101709, "clip_ratio/high_mean": 0.0005263573989395809, "clip_ratio/low_mean": 0.0003818326463260746, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009081900470846449, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3947.0, "completions/mean_length": 569.9096069335938, "completions/mean_terminated_length": 522.0441284179688, "completions/min_length": 140.0, "completions/min_terminated_length": 140.0, "epoch": 14.83989501312336, "grad_norm": 0.1298828125, "learning_rate": 1e-06, "loss": 0.0079, "num_tokens": 925203282.0, "reward": 0.6071428656578064, "reward_std": 0.20208247005939484, "rewards/verify_math_reward/mean": 0.6071428656578064, "rewards/verify_math_reward/std": 0.48865827918052673, "step": 1588 }, { "clip_ratio/high_max": 0.0017774242696759757, "clip_ratio/high_mean": 0.000528467898220697, "clip_ratio/low_mean": 0.00023485599547257152, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007633238951711974, "completions/clipped_ratio": 0.0267857142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3914.0, "completions/mean_length": 690.646240234375, "completions/mean_terminated_length": 596.9208374023438, "completions/min_length": 3.0, "completions/min_terminated_length": 3.0, "epoch": 14.849227179935841, "grad_norm": 0.125, "learning_rate": 1e-06, "loss": 0.0063, "num_tokens": 925812605.0, "reward": 0.5256696939468384, "reward_std": 0.18021291494369507, "rewards/verify_math_reward/mean": 0.5256696343421936, "rewards/verify_math_reward/std": 0.4996195137500763, "step": 1589 }, { "clip_ratio/high_max": 0.001768240523233544, "clip_ratio/high_mean": 0.0004815827736592837, "clip_ratio/low_mean": 0.0003349495824522819, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008165323512230316, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 2205.0, "completions/mean_length": 627.7745971679688, "completions/mean_terminated_length": 580.694580078125, "completions/min_length": 107.0, "completions/min_terminated_length": 107.0, "epoch": 14.858559346748324, "grad_norm": 0.1181640625, "learning_rate": 1e-06, "loss": -0.004, "num_tokens": 926418723.0, "reward": 0.5234375, "reward_std": 0.19558288156986237, "rewards/verify_math_reward/mean": 0.5234375, "rewards/verify_math_reward/std": 0.49972933530807495, "step": 1590 }, { "clip_ratio/high_max": 0.0016090367535070982, "clip_ratio/high_mean": 0.0004916613288514782, "clip_ratio/low_mean": 0.00034506639576648013, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008367277287106845, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 2581.0, "completions/mean_length": 606.8917846679688, "completions/mean_terminated_length": 555.523193359375, "completions/min_length": 26.0, "completions/min_terminated_length": 26.0, "epoch": 14.867891513560805, "grad_norm": 0.134765625, "learning_rate": 1e-06, "loss": 0.0171, "num_tokens": 926997466.0, "reward": 0.5691964626312256, "reward_std": 0.22180058062076569, "rewards/verify_math_reward/mean": 0.5691964030265808, "rewards/verify_math_reward/std": 0.4954652488231659, "step": 1591 }, { "clip_ratio/high_max": 0.001990718868910335, "clip_ratio/high_mean": 0.0005809984504594468, "clip_ratio/low_mean": 0.0003284147120439229, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009094131546589779, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3798.0, "completions/mean_length": 558.453125, "completions/mean_terminated_length": 510.4321594238281, "completions/min_length": 94.0, "completions/min_terminated_length": 94.0, "epoch": 14.877223680373287, "grad_norm": 0.150390625, "learning_rate": 1e-06, "loss": 0.0004, "num_tokens": 927536520.0, "reward": 0.6049107313156128, "reward_std": 0.23217158019542694, "rewards/verify_math_reward/mean": 0.6049107313156128, "rewards/verify_math_reward/std": 0.48914292454719543, "step": 1592 }, { "clip_ratio/high_max": 0.0012845343526350916, "clip_ratio/high_mean": 0.00038729559696548677, "clip_ratio/low_mean": 0.00037624197398145043, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007635375782228948, "completions/clipped_ratio": 0.0279017857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3890.0, "completions/mean_length": 677.6506958007812, "completions/mean_terminated_length": 579.5350341796875, "completions/min_length": 123.0, "completions/min_terminated_length": 123.0, "epoch": 14.886555847185768, "grad_norm": 0.1279296875, "learning_rate": 1e-06, "loss": 0.0034, "num_tokens": 928137527.0, "reward": 0.559151828289032, "reward_std": 0.19516101479530334, "rewards/verify_math_reward/mean": 0.5591517686843872, "rewards/verify_math_reward/std": 0.496766060590744, "step": 1593 }, { "clip_ratio/high_max": 0.0015147674203035422, "clip_ratio/high_mean": 0.00042596659727678343, "clip_ratio/low_mean": 0.00025001416520353814, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0006759807629350689, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 2427.0, "completions/mean_length": 580.3861694335938, "completions/mean_terminated_length": 532.6629028320312, "completions/min_length": 18.0, "completions/min_terminated_length": 18.0, "epoch": 14.89588801399825, "grad_norm": 0.126953125, "learning_rate": 1e-06, "loss": -0.0089, "num_tokens": 928700681.0, "reward": 0.5502232313156128, "reward_std": 0.1609041541814804, "rewards/verify_math_reward/mean": 0.5502232313156128, "rewards/verify_math_reward/std": 0.49774909019470215, "step": 1594 }, { "clip_ratio/high_max": 0.001434171803339268, "clip_ratio/high_mean": 0.0004297477705677011, "clip_ratio/low_mean": 0.0003625786466727732, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007923264347482473, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 4096.0, "completions/mean_length": 626.703125, "completions/mean_terminated_length": 563.625, "completions/min_length": 164.0, "completions/min_terminated_length": 164.0, "epoch": 14.905220180810732, "grad_norm": 0.1240234375, "learning_rate": 1e-06, "loss": 0.009, "num_tokens": 929287303.0, "reward": 0.5792410969734192, "reward_std": 0.18359535932540894, "rewards/verify_math_reward/mean": 0.5792410969734192, "rewards/verify_math_reward/std": 0.49395665526390076, "step": 1595 }, { "clip_ratio/high_max": 0.0013942216082796222, "clip_ratio/high_mean": 0.0004281837921098486, "clip_ratio/low_mean": 0.000468025960117302, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008962097595031082, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 2784.0, "completions/mean_length": 619.0692138671875, "completions/mean_terminated_length": 571.87109375, "completions/min_length": 154.0, "completions/min_terminated_length": 154.0, "epoch": 14.914552347623214, "grad_norm": 0.12109375, "learning_rate": 1e-06, "loss": 0.0049, "num_tokens": 929878533.0, "reward": 0.53125, "reward_std": 0.20940369367599487, "rewards/verify_math_reward/mean": 0.53125, "rewards/verify_math_reward/std": 0.4993011951446533, "step": 1596 }, { "clip_ratio/high_max": 0.0016481798802487901, "clip_ratio/high_mean": 0.00043252003808902373, "clip_ratio/low_mean": 0.00028706260150102025, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007195826401584782, "completions/clipped_ratio": 0.022321428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 2290.0, "completions/mean_length": 605.9085083007812, "completions/mean_terminated_length": 526.2260131835938, "completions/min_length": 115.0, "completions/min_terminated_length": 115.0, "epoch": 14.923884514435695, "grad_norm": 0.1220703125, "learning_rate": 1e-06, "loss": -0.0044, "num_tokens": 930421187.0, "reward": 0.6104910969734192, "reward_std": 0.1880647838115692, "rewards/verify_math_reward/mean": 0.6104910969734192, "rewards/verify_math_reward/std": 0.48791125416755676, "step": 1597 }, { "clip_ratio/high_max": 0.001700075298685988, "clip_ratio/high_mean": 0.00046269676079191413, "clip_ratio/low_mean": 0.00032024430538513116, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007829410697013373, "completions/clipped_ratio": 0.0234375, "completions/max_length": 4096.0, "completions/max_terminated_length": 3756.0, "completions/mean_length": 653.2199096679688, "completions/mean_terminated_length": 570.5931396484375, "completions/min_length": 142.0, "completions/min_terminated_length": 142.0, "epoch": 14.933216681248178, "grad_norm": 0.126953125, "learning_rate": 1e-06, "loss": 0.0014, "num_tokens": 931004440.0, "reward": 0.546875, "reward_std": 0.20069099962711334, "rewards/verify_math_reward/mean": 0.546875, "rewards/verify_math_reward/std": 0.4980759024620056, "step": 1598 }, { "clip_ratio/high_max": 0.001689458915279829, "clip_ratio/high_mean": 0.0005262118459086196, "clip_ratio/low_mean": 0.000389939800470529, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009161516509266221, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 4025.0, "completions/mean_length": 629.8638916015625, "completions/mean_terminated_length": 566.8431396484375, "completions/min_length": 142.0, "completions/min_terminated_length": 142.0, "epoch": 14.942548848060659, "grad_norm": 0.13671875, "learning_rate": 1e-06, "loss": 0.0067, "num_tokens": 931595734.0, "reward": 0.5680803656578064, "reward_std": 0.22202850878238678, "rewards/verify_math_reward/mean": 0.5680803656578064, "rewards/verify_math_reward/std": 0.4956200420856476, "step": 1599 }, { "clip_ratio/high_max": 0.00129362222742202, "clip_ratio/high_mean": 0.0003828839010111551, "clip_ratio/low_mean": 0.0002956844238042322, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0006785683294765477, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3837.0, "completions/mean_length": 661.578125, "completions/mean_terminated_length": 611.0147094726562, "completions/min_length": 167.0, "completions/min_terminated_length": 167.0, "epoch": 14.951881014873141, "grad_norm": 0.10693359375, "learning_rate": 1e-06, "loss": 0.008, "num_tokens": 932235660.0, "reward": 0.4754464626312256, "reward_std": 0.18861931562423706, "rewards/verify_math_reward/mean": 0.4754464328289032, "rewards/verify_math_reward/std": 0.4996756315231323, "step": 1600 }, { "epoch": 14.951881014873141, "step": 1600, "total_flos": 0.0, "train_loss": 0.002559207767341967, "train_runtime": 118820.1312, "train_samples_per_second": 12.065, "train_steps_per_second": 0.013 } ], "logging_steps": 1, "max_steps": 1600, "num_input_tokens_seen": 932235660, "num_train_epochs": 15, "save_steps": 80, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }