{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 3.73724117818606, "eval_steps": 500, "global_step": 1600, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0189732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3871.0, "completions/mean_length": 599.1395263671875, "completions/mean_terminated_length": 531.5096435546875, "completions/min_length": 22.0, "completions/min_terminated_length": 22.0, "epoch": 0.0023330417031204435, "grad_norm": 0.17382356524467468, "learning_rate": 1e-06, "loss": -0.0056, "num_tokens": 553701.0, "reward": 0.5345982313156128, "reward_std": 0.28053027391433716, "rewards/verify_math_reward/mean": 0.5345982313156128, "rewards/verify_math_reward/std": 0.4990801215171814, "step": 1 }, { "clip_ratio/high_max": 0.0025609144286136143, "clip_ratio/high_mean": 0.0012831026178901084, "clip_ratio/low_mean": 0.0007269994894159026, "clip_ratio/low_min": 6.535281045216834e-05, "clip_ratio/region_mean": 0.002010102092754096, "epoch": 0.004666083406240887, "grad_norm": 0.16922008991241455, "learning_rate": 1e-06, "loss": -0.0056, "step": 2 }, { "clip_ratio/high_max": 0.0030311504669953138, "clip_ratio/high_mean": 0.001450449646654306, "clip_ratio/low_mean": 0.0007953367839945713, "clip_ratio/low_min": 9.599875556887127e-05, "clip_ratio/region_mean": 0.002245786425191909, "epoch": 0.00699912510936133, "grad_norm": 0.1380765438079834, "learning_rate": 1e-06, "loss": -0.0057, "step": 3 }, { "clip_ratio/high_max": 0.003355782086146064, "clip_ratio/high_mean": 0.0015627889151801355, "clip_ratio/low_mean": 0.0008980658276414033, "clip_ratio/low_min": 7.3875863563444e-05, "clip_ratio/region_mean": 0.00246085473918356, "epoch": 0.009332166812481774, "grad_norm": 0.13198421895503998, "learning_rate": 1e-06, "loss": -0.0058, "step": 4 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3141.0, "completions/mean_length": 599.083740234375, "completions/mean_terminated_length": 535.50341796875, "completions/min_length": 27.0, "completions/min_terminated_length": 27.0, "epoch": 0.011665208515602217, "grad_norm": 0.13359643518924713, "learning_rate": 1e-06, "loss": -0.0029, "num_tokens": 1112200.0, "reward": 0.4955357313156128, "reward_std": 0.2189922332763672, "rewards/verify_math_reward/mean": 0.4955357015132904, "rewards/verify_math_reward/std": 0.500259280204773, "step": 5 }, { "clip_ratio/high_max": 0.0025913924910128117, "clip_ratio/high_mean": 0.0010766915256681386, "clip_ratio/low_mean": 0.0007588433654746041, "clip_ratio/low_min": 7.571186142740771e-05, "clip_ratio/region_mean": 0.0018355348438490182, "epoch": 0.01399825021872266, "grad_norm": 0.11599036306142807, "learning_rate": 1e-06, "loss": -0.0029, "step": 6 }, { "clip_ratio/high_max": 0.002602317646960728, "clip_ratio/high_mean": 0.0011754830084100831, "clip_ratio/low_mean": 0.0008298801294586156, "clip_ratio/low_min": 6.036487411620328e-05, "clip_ratio/region_mean": 0.002005363130592741, "epoch": 0.016331291921843103, "grad_norm": 0.11496569216251373, "learning_rate": 1e-06, "loss": -0.0029, "step": 7 }, { "clip_ratio/high_max": 0.002375962787482422, "clip_ratio/high_mean": 0.0011225981816096464, "clip_ratio/low_mean": 0.0009531918294669595, "clip_ratio/low_min": 7.924567307782127e-05, "clip_ratio/region_mean": 0.002075790034723468, "epoch": 0.018664333624963548, "grad_norm": 0.11361610889434814, "learning_rate": 1e-06, "loss": -0.003, "step": 8 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 3492.0, "completions/mean_length": 588.154052734375, "completions/mean_terminated_length": 544.5537109375, "completions/min_length": 22.0, "completions/min_terminated_length": 22.0, "epoch": 0.02099737532808399, "grad_norm": 0.13597965240478516, "learning_rate": 1e-06, "loss": 0.011, "num_tokens": 1692674.0, "reward": 0.520089328289032, "reward_std": 0.2241021692752838, "rewards/verify_math_reward/mean": 0.5200892686843872, "rewards/verify_math_reward/std": 0.4998753070831299, "step": 9 }, { "clip_ratio/high_max": 0.002350593960727565, "clip_ratio/high_mean": 0.0011665630190691445, "clip_ratio/low_mean": 0.0007388781177724013, "clip_ratio/low_min": 5.6859054893720895e-05, "clip_ratio/region_mean": 0.0019054411750403233, "epoch": 0.023330417031204434, "grad_norm": 0.12881502509117126, "learning_rate": 1e-06, "loss": 0.011, "step": 10 }, { "clip_ratio/high_max": 0.002593190991319716, "clip_ratio/high_mean": 0.0012210552631586324, "clip_ratio/low_mean": 0.0010014533309004037, "clip_ratio/low_min": 0.00010162933085666737, "clip_ratio/region_mean": 0.002222508621343877, "epoch": 0.025663458734324875, "grad_norm": 0.1299465298652649, "learning_rate": 1e-06, "loss": 0.0109, "step": 11 }, { "clip_ratio/high_max": 0.002700060489587486, "clip_ratio/high_mean": 0.0013187923905206844, "clip_ratio/low_mean": 0.0011697756344801746, "clip_ratio/low_min": 0.00014083438418310834, "clip_ratio/region_mean": 0.0024885680322768167, "epoch": 0.02799650043744532, "grad_norm": 0.1286158263683319, "learning_rate": 1e-06, "loss": 0.0108, "step": 12 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 3622.0, "completions/mean_length": 577.8092041015625, "completions/mean_terminated_length": 534.0802612304688, "completions/min_length": 116.0, "completions/min_terminated_length": 116.0, "epoch": 0.03032954214056576, "grad_norm": 0.1363631933927536, "learning_rate": 1e-06, "loss": -0.0041, "num_tokens": 2244767.0, "reward": 0.5792410969734192, "reward_std": 0.19858942925930023, "rewards/verify_math_reward/mean": 0.5792410969734192, "rewards/verify_math_reward/std": 0.49395665526390076, "step": 13 }, { "clip_ratio/high_max": 0.002584369220130611, "clip_ratio/high_mean": 0.0009461219378863461, "clip_ratio/low_mean": 0.0005696465343589807, "clip_ratio/low_min": 1.152923778136028e-05, "clip_ratio/region_mean": 0.0015157684611040168, "epoch": 0.032662583843686206, "grad_norm": 0.12548068165779114, "learning_rate": 1e-06, "loss": -0.0041, "step": 14 }, { "clip_ratio/high_max": 0.0028011447975586634, "clip_ratio/high_mean": 0.0010542050367803313, "clip_ratio/low_mean": 0.0006140220211818814, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0016682270652381703, "epoch": 0.03499562554680665, "grad_norm": 0.12155156582593918, "learning_rate": 1e-06, "loss": -0.0042, "step": 15 }, { "clip_ratio/high_max": 0.002752818356384523, "clip_ratio/high_mean": 0.001091941143386066, "clip_ratio/low_mean": 0.0006843827104603406, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0017763238502084278, "epoch": 0.037328667249927096, "grad_norm": 0.1183493584394455, "learning_rate": 1e-06, "loss": -0.0043, "step": 16 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 4026.0, "completions/mean_length": 591.40625, "completions/mean_terminated_length": 551.8510131835938, "completions/min_length": 62.0, "completions/min_terminated_length": 62.0, "epoch": 0.039661708953047534, "grad_norm": 0.13591092824935913, "learning_rate": 1e-06, "loss": 0.0206, "num_tokens": 2817275.0, "reward": 0.5491071939468384, "reward_std": 0.2336895763874054, "rewards/verify_math_reward/mean": 0.5491071343421936, "rewards/verify_math_reward/std": 0.49786055088043213, "step": 17 }, { "clip_ratio/high_max": 0.002568566858826671, "clip_ratio/high_mean": 0.001051692681357963, "clip_ratio/low_mean": 0.0006753919278708054, "clip_ratio/low_min": 4.836162861465709e-05, "clip_ratio/region_mean": 0.0017270846146857366, "epoch": 0.04199475065616798, "grad_norm": 0.13091467320919037, "learning_rate": 1e-06, "loss": 0.0207, "step": 18 }, { "clip_ratio/high_max": 0.002690263205295196, "clip_ratio/high_mean": 0.0011286328535788925, "clip_ratio/low_mean": 0.0008892396599549102, "clip_ratio/low_min": 7.194422960310476e-05, "clip_ratio/region_mean": 0.0020178725026198663, "epoch": 0.04432779235928842, "grad_norm": 0.12827104330062866, "learning_rate": 1e-06, "loss": 0.0205, "step": 19 }, { "clip_ratio/high_max": 0.002711487060878426, "clip_ratio/high_mean": 0.0011862955725518987, "clip_ratio/low_mean": 0.0009825384677242255, "clip_ratio/low_min": 7.760569678794127e-05, "clip_ratio/region_mean": 0.002168834034819156, "epoch": 0.04666083406240887, "grad_norm": 0.1250333935022354, "learning_rate": 1e-06, "loss": 0.0204, "step": 20 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010044642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3930.0, "completions/mean_length": 597.328125, "completions/mean_terminated_length": 561.82861328125, "completions/min_length": 139.0, "completions/min_terminated_length": 139.0, "epoch": 0.048993875765529306, "grad_norm": 0.13305212557315826, "learning_rate": 1e-06, "loss": 0.0112, "num_tokens": 3406041.0, "reward": 0.5569196939468384, "reward_std": 0.23521842062473297, "rewards/verify_math_reward/mean": 0.5569196343421936, "rewards/verify_math_reward/std": 0.49702703952789307, "step": 21 }, { "clip_ratio/high_max": 0.0025573696475476027, "clip_ratio/high_mean": 0.001075486849003937, "clip_ratio/low_mean": 0.0007244412427098723, "clip_ratio/low_min": 4.8875235734158196e-05, "clip_ratio/region_mean": 0.0017999280971707776, "epoch": 0.05132691746864975, "grad_norm": 0.12963628768920898, "learning_rate": 1e-06, "loss": 0.0113, "step": 22 }, { "clip_ratio/high_max": 0.002402301055553835, "clip_ratio/high_mean": 0.0010440900186949875, "clip_ratio/low_mean": 0.0007428098979289643, "clip_ratio/low_min": 1.6009220416890457e-05, "clip_ratio/region_mean": 0.001786899912985973, "epoch": 0.053659959171770195, "grad_norm": 0.12822706997394562, "learning_rate": 1e-06, "loss": 0.0112, "step": 23 }, { "clip_ratio/high_max": 0.0029438420169753954, "clip_ratio/high_mean": 0.0012320683017605916, "clip_ratio/low_mean": 0.0010378825863881502, "clip_ratio/low_min": 0.00010859549092856469, "clip_ratio/region_mean": 0.0022699508917867206, "epoch": 0.05599300087489064, "grad_norm": 0.12256379425525665, "learning_rate": 1e-06, "loss": 0.011, "step": 24 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 3524.0, "completions/mean_length": 557.4074096679688, "completions/mean_terminated_length": 517.4683837890625, "completions/min_length": 95.0, "completions/min_terminated_length": 95.0, "epoch": 0.058326042578011085, "grad_norm": 0.12277572602033615, "learning_rate": 1e-06, "loss": 0.0067, "num_tokens": 3962974.0, "reward": 0.590401828289032, "reward_std": 0.1910264939069748, "rewards/verify_math_reward/mean": 0.5904017686843872, "rewards/verify_math_reward/std": 0.49203425645828247, "step": 25 }, { "clip_ratio/high_max": 0.002548000389651861, "clip_ratio/high_mean": 0.0009133048224612139, "clip_ratio/low_mean": 0.0005774957135145087, "clip_ratio/low_min": 1.4692054719489533e-05, "clip_ratio/region_mean": 0.0014908005541656166, "epoch": 0.06065908428113152, "grad_norm": 0.11695949733257294, "learning_rate": 1e-06, "loss": 0.0067, "step": 26 }, { "clip_ratio/high_max": 0.0025447930820519105, "clip_ratio/high_mean": 0.0010029514705820475, "clip_ratio/low_mean": 0.0006416668966267025, "clip_ratio/low_min": 4.5577687160403e-05, "clip_ratio/region_mean": 0.0016446183799416758, "epoch": 0.06299212598425197, "grad_norm": 0.11669906228780746, "learning_rate": 1e-06, "loss": 0.0066, "step": 27 }, { "clip_ratio/high_max": 0.0029656054903171025, "clip_ratio/high_mean": 0.001066582088242285, "clip_ratio/low_mean": 0.0007978226713021286, "clip_ratio/low_min": 4.5577687160403e-05, "clip_ratio/region_mean": 0.0018644047595444135, "epoch": 0.06532516768737241, "grad_norm": 0.11243632435798645, "learning_rate": 1e-06, "loss": 0.0064, "step": 28 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 2533.0, "completions/mean_length": 631.7199096679688, "completions/mean_terminated_length": 580.7168579101562, "completions/min_length": 75.0, "completions/min_terminated_length": 75.0, "epoch": 0.06765820939049286, "grad_norm": 0.12044700980186462, "learning_rate": 1e-06, "loss": 0.0012, "num_tokens": 4550139.0, "reward": 0.5647321939468384, "reward_std": 0.20309659838676453, "rewards/verify_math_reward/mean": 0.5647321343421936, "rewards/verify_math_reward/std": 0.49606895446777344, "step": 29 }, { "clip_ratio/high_max": 0.0021035382014815696, "clip_ratio/high_mean": 0.0009388953476445749, "clip_ratio/low_mean": 0.0005977314849587856, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015366268416983075, "epoch": 0.0699912510936133, "grad_norm": 0.11372141540050507, "learning_rate": 1e-06, "loss": 0.0013, "step": 30 }, { "clip_ratio/high_max": 0.0022959384114074055, "clip_ratio/high_mean": 0.0010049418779090047, "clip_ratio/low_mean": 0.0006751287419319851, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0016800706107460428, "epoch": 0.07232429279673375, "grad_norm": 0.11178834736347198, "learning_rate": 1e-06, "loss": 0.0011, "step": 31 }, { "clip_ratio/high_max": 0.0024577917793067172, "clip_ratio/high_mean": 0.001125254395446973, "clip_ratio/low_mean": 0.0008153450526151573, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0019405994535190985, "epoch": 0.07465733449985419, "grad_norm": 0.10968935489654541, "learning_rate": 1e-06, "loss": 0.001, "step": 32 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.03125, "completions/max_length": 4096.0, "completions/max_terminated_length": 3437.0, "completions/mean_length": 671.9520263671875, "completions/mean_terminated_length": 561.4988403320312, "completions/min_length": 152.0, "completions/min_terminated_length": 152.0, "epoch": 0.07699037620297462, "grad_norm": 0.13099364936351776, "learning_rate": 1e-06, "loss": 0.009, "num_tokens": 5126496.0, "reward": 0.546875, "reward_std": 0.21860219538211823, "rewards/verify_math_reward/mean": 0.546875, "rewards/verify_math_reward/std": 0.4980759024620056, "step": 33 }, { "clip_ratio/high_max": 0.002361340018978808, "clip_ratio/high_mean": 0.00098375396191841, "clip_ratio/low_mean": 0.0006880051660118625, "clip_ratio/low_min": 4.6551635932701174e-05, "clip_ratio/region_mean": 0.0016717591497581452, "epoch": 0.07932341790609507, "grad_norm": 0.12682709097862244, "learning_rate": 1e-06, "loss": 0.0091, "step": 34 }, { "clip_ratio/high_max": 0.002584736888820771, "clip_ratio/high_mean": 0.001108783901145216, "clip_ratio/low_mean": 0.0008017359123186907, "clip_ratio/low_min": 5.207386584515916e-05, "clip_ratio/region_mean": 0.0019105197898170445, "epoch": 0.08165645960921551, "grad_norm": 0.12357613444328308, "learning_rate": 1e-06, "loss": 0.0089, "step": 35 }, { "clip_ratio/high_max": 0.002877717837691307, "clip_ratio/high_mean": 0.0012224330275785178, "clip_ratio/low_mean": 0.0009173835496767424, "clip_ratio/low_min": 0.00010644804569892585, "clip_ratio/region_mean": 0.002139816584531218, "epoch": 0.08398950131233596, "grad_norm": 0.11715061962604523, "learning_rate": 1e-06, "loss": 0.0088, "step": 36 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3607.0, "completions/mean_length": 630.1473388671875, "completions/mean_terminated_length": 567.1317749023438, "completions/min_length": 148.0, "completions/min_terminated_length": 148.0, "epoch": 0.0863225430154564, "grad_norm": 0.12196908891201019, "learning_rate": 1e-06, "loss": -0.0024, "num_tokens": 5720812.0, "reward": 0.5022321939468384, "reward_std": 0.22015085816383362, "rewards/verify_math_reward/mean": 0.5022321343421936, "rewards/verify_math_reward/std": 0.5002742409706116, "step": 37 }, { "clip_ratio/high_max": 0.002276025843457319, "clip_ratio/high_mean": 0.0009157403219433036, "clip_ratio/low_mean": 0.0007647342172276694, "clip_ratio/low_min": 7.142465256038122e-05, "clip_ratio/region_mean": 0.0016804745537228882, "epoch": 0.08865558471857685, "grad_norm": 0.11864572018384933, "learning_rate": 1e-06, "loss": -0.0024, "step": 38 }, { "clip_ratio/high_max": 0.0023989728579181246, "clip_ratio/high_mean": 0.0009364187317260075, "clip_ratio/low_mean": 0.0008477529045194387, "clip_ratio/low_min": 7.700277819822077e-05, "clip_ratio/region_mean": 0.0017841715962276794, "epoch": 0.09098862642169729, "grad_norm": 0.11392717808485031, "learning_rate": 1e-06, "loss": -0.0025, "step": 39 }, { "clip_ratio/high_max": 0.0025696083685033955, "clip_ratio/high_mean": 0.001035327139106812, "clip_ratio/low_mean": 0.0010244471923215315, "clip_ratio/low_min": 9.850909191300161e-05, "clip_ratio/region_mean": 0.002059774298686534, "epoch": 0.09332166812481774, "grad_norm": 0.11024210602045059, "learning_rate": 1e-06, "loss": -0.0026, "step": 40 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3335.0, "completions/mean_length": 644.5670166015625, "completions/mean_terminated_length": 581.8135986328125, "completions/min_length": 110.0, "completions/min_terminated_length": 110.0, "epoch": 0.09565470982793818, "grad_norm": 0.10723958909511566, "learning_rate": 1e-06, "loss": 0.0167, "num_tokens": 6317152.0, "reward": 0.5133928656578064, "reward_std": 0.14391589164733887, "rewards/verify_math_reward/mean": 0.5133928656578064, "rewards/verify_math_reward/std": 0.500099778175354, "step": 41 }, { "clip_ratio/high_max": 0.0018419230764266104, "clip_ratio/high_mean": 0.0007109696744009852, "clip_ratio/low_mean": 0.0003871322642226005, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010981019368045963, "epoch": 0.09798775153105861, "grad_norm": 0.10396261513233185, "learning_rate": 1e-06, "loss": 0.0167, "step": 42 }, { "clip_ratio/high_max": 0.0021622480162477586, "clip_ratio/high_mean": 0.0007905355168986716, "clip_ratio/low_mean": 0.0004530861242528772, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012436216456990223, "epoch": 0.10032079323417906, "grad_norm": 0.10050157457590103, "learning_rate": 1e-06, "loss": 0.0166, "step": 43 }, { "clip_ratio/high_max": 0.002194111566495849, "clip_ratio/high_mean": 0.0008102206265903078, "clip_ratio/low_mean": 0.0005761826105299406, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013864032407582272, "epoch": 0.1026538349372995, "grad_norm": 0.09578145295381546, "learning_rate": 1e-06, "loss": 0.0164, "step": 44 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.030133928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 4093.0, "completions/mean_length": 648.09375, "completions/mean_terminated_length": 540.9666137695312, "completions/min_length": 91.0, "completions/min_terminated_length": 91.0, "epoch": 0.10498687664041995, "grad_norm": 0.12563996016979218, "learning_rate": 1e-06, "loss": -0.0022, "num_tokens": 6889732.0, "reward": 0.5569196939468384, "reward_std": 0.16311657428741455, "rewards/verify_math_reward/mean": 0.5569196343421936, "rewards/verify_math_reward/std": 0.49702703952789307, "step": 45 }, { "clip_ratio/high_max": 0.0026375235320301726, "clip_ratio/high_mean": 0.0008978002188086975, "clip_ratio/low_mean": 0.0006766468613932375, "clip_ratio/low_min": 9.575608601153363e-06, "clip_ratio/region_mean": 0.0015744471093057655, "epoch": 0.10731991834354039, "grad_norm": 0.11787506937980652, "learning_rate": 1e-06, "loss": -0.0021, "step": 46 }, { "clip_ratio/high_max": 0.0025555259562679566, "clip_ratio/high_mean": 0.0008320110337081132, "clip_ratio/low_mean": 0.0007191349095592159, "clip_ratio/low_min": 1.68964579643216e-05, "clip_ratio/region_mean": 0.0015511459459958132, "epoch": 0.10965296004666084, "grad_norm": 0.1151903048157692, "learning_rate": 1e-06, "loss": -0.0023, "step": 47 }, { "clip_ratio/high_max": 0.002599589031888172, "clip_ratio/high_mean": 0.0009456603711441858, "clip_ratio/low_mean": 0.0008999099036373082, "clip_ratio/low_min": 5.10406734974822e-05, "clip_ratio/region_mean": 0.0018455702447681688, "epoch": 0.11198600174978128, "grad_norm": 0.10982903093099594, "learning_rate": 1e-06, "loss": -0.0024, "step": 48 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 4039.0, "completions/mean_length": 645.6730346679688, "completions/mean_terminated_length": 598.8359985351562, "completions/min_length": 155.0, "completions/min_terminated_length": 155.0, "epoch": 0.11431904345290173, "grad_norm": 0.12311972677707672, "learning_rate": 1e-06, "loss": 0.0122, "num_tokens": 7507095.0, "reward": 0.5636160969734192, "reward_std": 0.21196311712265015, "rewards/verify_math_reward/mean": 0.5636160969734192, "rewards/verify_math_reward/std": 0.49621346592903137, "step": 49 }, { "clip_ratio/high_max": 0.002299583655258175, "clip_ratio/high_mean": 0.0009062394237844273, "clip_ratio/low_mean": 0.0006187675317050889, "clip_ratio/low_min": 1.3067112377029844e-05, "clip_ratio/region_mean": 0.0015250069700414315, "epoch": 0.11665208515602217, "grad_norm": 0.11594700813293457, "learning_rate": 1e-06, "loss": 0.0122, "step": 50 }, { "clip_ratio/high_max": 0.0024076446061371826, "clip_ratio/high_mean": 0.0010336171981180087, "clip_ratio/low_mean": 0.000668985388983856, "clip_ratio/low_min": 2.8923385798407253e-05, "clip_ratio/region_mean": 0.0017026025670929812, "epoch": 0.1189851268591426, "grad_norm": 0.11414211243391037, "learning_rate": 1e-06, "loss": 0.0121, "step": 51 }, { "clip_ratio/high_max": 0.0028142996670794673, "clip_ratio/high_mean": 0.001169845290860394, "clip_ratio/low_mean": 0.0008582515183661599, "clip_ratio/low_min": 7.0213213803072e-05, "clip_ratio/region_mean": 0.00202809688198613, "epoch": 0.12131816856226305, "grad_norm": 0.1078699603676796, "learning_rate": 1e-06, "loss": 0.0119, "step": 52 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 3575.0, "completions/mean_length": 590.1842041015625, "completions/mean_terminated_length": 546.6090698242188, "completions/min_length": 147.0, "completions/min_terminated_length": 147.0, "epoch": 0.12365121026538349, "grad_norm": 0.12721331417560577, "learning_rate": 1e-06, "loss": 0.0042, "num_tokens": 8086956.0, "reward": 0.5535714626312256, "reward_std": 0.200234517455101, "rewards/verify_math_reward/mean": 0.5535714030265808, "rewards/verify_math_reward/std": 0.4973994493484497, "step": 53 }, { "clip_ratio/high_max": 0.0025019785534823313, "clip_ratio/high_mean": 0.0009520767889625859, "clip_ratio/low_mean": 0.0006159243412184878, "clip_ratio/low_min": 2.973258506244747e-05, "clip_ratio/region_mean": 0.0015680011347285472, "epoch": 0.12598425196850394, "grad_norm": 0.12222371995449066, "learning_rate": 1e-06, "loss": 0.0042, "step": 54 }, { "clip_ratio/high_max": 0.0024724491013330407, "clip_ratio/high_mean": 0.000984601605523494, "clip_ratio/low_mean": 0.0007224220125863212, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0017070236172003206, "epoch": 0.1283172936716244, "grad_norm": 0.11799755692481995, "learning_rate": 1e-06, "loss": 0.0041, "step": 55 }, { "clip_ratio/high_max": 0.0027482093282742426, "clip_ratio/high_mean": 0.0010388134269305738, "clip_ratio/low_mean": 0.0008494130725011928, "clip_ratio/low_min": 5.6785106608003844e-05, "clip_ratio/region_mean": 0.0018882264703279361, "epoch": 0.13065033537474482, "grad_norm": 0.11242830008268356, "learning_rate": 1e-06, "loss": 0.0039, "step": 56 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 3977.0, "completions/mean_length": 629.5736694335938, "completions/mean_terminated_length": 586.4881591796875, "completions/min_length": 140.0, "completions/min_terminated_length": 140.0, "epoch": 0.13298337707786526, "grad_norm": 0.11814715713262558, "learning_rate": 1e-06, "loss": 0.0075, "num_tokens": 8699462.0, "reward": 0.5725446939468384, "reward_std": 0.19328376650810242, "rewards/verify_math_reward/mean": 0.5725446343421936, "rewards/verify_math_reward/std": 0.49498558044433594, "step": 57 }, { "clip_ratio/high_max": 0.0020316590926086064, "clip_ratio/high_mean": 0.0008365630928892642, "clip_ratio/low_mean": 0.0005969938374619232, "clip_ratio/low_min": 1.750700357661117e-05, "clip_ratio/region_mean": 0.0014335569394461345, "epoch": 0.13531641878098571, "grad_norm": 0.11250767856836319, "learning_rate": 1e-06, "loss": 0.0075, "step": 58 }, { "clip_ratio/high_max": 0.002291575281560654, "clip_ratio/high_mean": 0.0009231151416315697, "clip_ratio/low_mean": 0.0007549819310952444, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0016780970909167081, "epoch": 0.13764946048410615, "grad_norm": 0.107053242623806, "learning_rate": 1e-06, "loss": 0.0074, "step": 59 }, { "clip_ratio/high_max": 0.0023772748027113266, "clip_ratio/high_mean": 0.0010050107612187276, "clip_ratio/low_mean": 0.0009433678860659711, "clip_ratio/low_min": 1.930203870870173e-05, "clip_ratio/region_mean": 0.0019483785872580484, "epoch": 0.1399825021872266, "grad_norm": 0.1032634973526001, "learning_rate": 1e-06, "loss": 0.0072, "step": 60 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.030133928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 2731.0, "completions/mean_length": 668.349365234375, "completions/mean_terminated_length": 561.8515625, "completions/min_length": 115.0, "completions/min_terminated_length": 115.0, "epoch": 0.14231554389034703, "grad_norm": 0.1245390996336937, "learning_rate": 1e-06, "loss": 0.0081, "num_tokens": 9279175.0, "reward": 0.5792410969734192, "reward_std": 0.19094978272914886, "rewards/verify_math_reward/mean": 0.5792410969734192, "rewards/verify_math_reward/std": 0.49395665526390076, "step": 61 }, { "clip_ratio/high_max": 0.002226275155408075, "clip_ratio/high_mean": 0.0008956184683484025, "clip_ratio/low_mean": 0.0007161258799897041, "clip_ratio/low_min": 1.0660071893653367e-05, "clip_ratio/region_mean": 0.0016117443519760855, "epoch": 0.1446485855934675, "grad_norm": 0.12006866931915283, "learning_rate": 1e-06, "loss": 0.0081, "step": 62 }, { "clip_ratio/high_max": 0.0022636133653577417, "clip_ratio/high_mean": 0.0009492589269939344, "clip_ratio/low_mean": 0.0007973573538038181, "clip_ratio/low_min": 5.425930430646986e-05, "clip_ratio/region_mean": 0.0017466162898926996, "epoch": 0.14698162729658792, "grad_norm": 0.1166459321975708, "learning_rate": 1e-06, "loss": 0.0079, "step": 63 }, { "clip_ratio/high_max": 0.002635468394146301, "clip_ratio/high_mean": 0.0011539265706232982, "clip_ratio/low_mean": 0.0010054489321191795, "clip_ratio/low_min": 2.7662247703119647e-05, "clip_ratio/region_mean": 0.002159375486371573, "epoch": 0.14931466899970838, "grad_norm": 0.1114407554268837, "learning_rate": 1e-06, "loss": 0.0078, "step": 64 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 2740.0, "completions/mean_length": 616.849365234375, "completions/mean_terminated_length": 565.6273803710938, "completions/min_length": 125.0, "completions/min_terminated_length": 125.0, "epoch": 0.15164771070282881, "grad_norm": 0.13642925024032593, "learning_rate": 1e-06, "loss": 0.0063, "num_tokens": 9864248.0, "reward": 0.5725446939468384, "reward_std": 0.2336365431547165, "rewards/verify_math_reward/mean": 0.5725446343421936, "rewards/verify_math_reward/std": 0.49498558044433594, "step": 65 }, { "clip_ratio/high_max": 0.0023575330451421905, "clip_ratio/high_mean": 0.0009699675883894088, "clip_ratio/low_mean": 0.0008386670797335682, "clip_ratio/low_min": 7.830562481103698e-05, "clip_ratio/region_mean": 0.0018086346608470194, "epoch": 0.15398075240594924, "grad_norm": 0.15203692018985748, "learning_rate": 1e-06, "loss": 0.0063, "step": 66 }, { "clip_ratio/high_max": 0.0025920921616489068, "clip_ratio/high_mean": 0.0010163355782424333, "clip_ratio/low_mean": 0.0009887217292998685, "clip_ratio/low_min": 0.00010267821653542342, "clip_ratio/region_mean": 0.0020050572784384713, "epoch": 0.1563137941090697, "grad_norm": 0.1244925856590271, "learning_rate": 1e-06, "loss": 0.0062, "step": 67 }, { "clip_ratio/high_max": 0.0031369516655104235, "clip_ratio/high_mean": 0.0012011902363155968, "clip_ratio/low_mean": 0.0011840397492051125, "clip_ratio/low_min": 0.00015028488360258052, "clip_ratio/region_mean": 0.0023852300219004974, "epoch": 0.15864683581219013, "grad_norm": 0.11972557753324509, "learning_rate": 1e-06, "loss": 0.0059, "step": 68 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.029017857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3791.0, "completions/mean_length": 688.8136596679688, "completions/mean_terminated_length": 586.9896240234375, "completions/min_length": 123.0, "completions/min_terminated_length": 123.0, "epoch": 0.1609798775153106, "grad_norm": 0.12308245152235031, "learning_rate": 1e-06, "loss": -0.001, "num_tokens": 10456161.0, "reward": 0.5446428656578064, "reward_std": 0.19772039353847504, "rewards/verify_math_reward/mean": 0.5446428656578064, "rewards/verify_math_reward/std": 0.49828118085861206, "step": 69 }, { "clip_ratio/high_max": 0.001885887460957747, "clip_ratio/high_mean": 0.0007849838602851378, "clip_ratio/low_mean": 0.0005411317088146461, "clip_ratio/low_min": 8.458519005216658e-06, "clip_ratio/region_mean": 0.0013261156018415932, "epoch": 0.16331291921843102, "grad_norm": 0.11639565974473953, "learning_rate": 1e-06, "loss": -0.001, "step": 70 }, { "clip_ratio/high_max": 0.002387862179602962, "clip_ratio/high_mean": 0.0010235370718874037, "clip_ratio/low_mean": 0.0006843969658802962, "clip_ratio/low_min": 8.458519005216658e-06, "clip_ratio/region_mean": 0.001707934025034774, "epoch": 0.16564596092155148, "grad_norm": 0.11047334223985672, "learning_rate": 1e-06, "loss": -0.0012, "step": 71 }, { "clip_ratio/high_max": 0.0025295156738138758, "clip_ratio/high_mean": 0.0010514781570236664, "clip_ratio/low_mean": 0.0008469059102935717, "clip_ratio/low_min": 2.7537083951756358e-05, "clip_ratio/region_mean": 0.0018983840564033017, "epoch": 0.1679790026246719, "grad_norm": 0.10466562211513519, "learning_rate": 1e-06, "loss": -0.0014, "step": 72 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.024553571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3097.0, "completions/mean_length": 644.6038208007812, "completions/mean_terminated_length": 557.7265014648438, "completions/min_length": 97.0, "completions/min_terminated_length": 97.0, "epoch": 0.17031204432779237, "grad_norm": 0.14219598472118378, "learning_rate": 1e-06, "loss": -0.0195, "num_tokens": 11038070.0, "reward": 0.640625, "reward_std": 0.2386026829481125, "rewards/verify_math_reward/mean": 0.640625, "rewards/verify_math_reward/std": 0.48008525371551514, "step": 73 }, { "clip_ratio/high_max": 0.002510826707293745, "clip_ratio/high_mean": 0.001187145317089744, "clip_ratio/low_mean": 0.0007252296836668393, "clip_ratio/low_min": 2.792806026263861e-05, "clip_ratio/region_mean": 0.0019123750171274878, "epoch": 0.1726450860309128, "grad_norm": 0.1322600543498993, "learning_rate": 1e-06, "loss": -0.0195, "step": 74 }, { "clip_ratio/high_max": 0.002839380715158768, "clip_ratio/high_mean": 0.0013101468794047832, "clip_ratio/low_mean": 0.0008481293607474072, "clip_ratio/low_min": 2.6692290703067556e-05, "clip_ratio/region_mean": 0.0021582761910394765, "epoch": 0.17497812773403323, "grad_norm": 0.12900695204734802, "learning_rate": 1e-06, "loss": -0.0196, "step": 75 }, { "clip_ratio/high_max": 0.003310332729597576, "clip_ratio/high_mean": 0.0014637492677138653, "clip_ratio/low_mean": 0.0010298111446900293, "clip_ratio/low_min": 8.216709466069005e-05, "clip_ratio/region_mean": 0.002493560328730382, "epoch": 0.1773111694371537, "grad_norm": 0.12132413685321808, "learning_rate": 1e-06, "loss": -0.0199, "step": 76 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 3011.0, "completions/mean_length": 563.390625, "completions/mean_terminated_length": 519.4824829101562, "completions/min_length": 103.0, "completions/min_terminated_length": 103.0, "epoch": 0.17964421114027412, "grad_norm": 0.1358417123556137, "learning_rate": 1e-06, "loss": 0.0005, "num_tokens": 11588980.0, "reward": 0.6674107313156128, "reward_std": 0.1969301551580429, "rewards/verify_math_reward/mean": 0.6674107313156128, "rewards/verify_math_reward/std": 0.47140392661094666, "step": 77 }, { "clip_ratio/high_max": 0.002246248383016791, "clip_ratio/high_mean": 0.0010571951315796468, "clip_ratio/low_mean": 0.0007074121949699474, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0017646073247306049, "epoch": 0.18197725284339458, "grad_norm": 0.13148048520088196, "learning_rate": 1e-06, "loss": 0.0005, "step": 78 }, { "clip_ratio/high_max": 0.002653501018357929, "clip_ratio/high_mean": 0.0012056281484547071, "clip_ratio/low_mean": 0.0007674205699004233, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001973048747458961, "epoch": 0.184310294546515, "grad_norm": 0.12555159628391266, "learning_rate": 1e-06, "loss": 0.0003, "step": 79 }, { "clip_ratio/high_max": 0.0027306380434310995, "clip_ratio/high_mean": 0.001287678038352169, "clip_ratio/low_mean": 0.001008126371743856, "clip_ratio/low_min": 3.465483678155579e-05, "clip_ratio/region_mean": 0.002295804355526343, "epoch": 0.18664333624963547, "grad_norm": 0.12070609629154205, "learning_rate": 1e-06, "loss": 0.0, "step": 80 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.021205357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3817.0, "completions/mean_length": 605.34375, "completions/mean_terminated_length": 529.719482421875, "completions/min_length": 145.0, "completions/min_terminated_length": 145.0, "epoch": 0.1889763779527559, "grad_norm": 0.12303871661424637, "learning_rate": 1e-06, "loss": -0.01, "num_tokens": 12135736.0, "reward": 0.6026785969734192, "reward_std": 0.15120504796504974, "rewards/verify_math_reward/mean": 0.6026785969734192, "rewards/verify_math_reward/std": 0.48961687088012695, "step": 81 }, { "clip_ratio/high_max": 0.002153479154003435, "clip_ratio/high_mean": 0.0007754087901048479, "clip_ratio/low_mean": 0.00042436131116119213, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011997700712527148, "epoch": 0.19130941965587636, "grad_norm": 0.11511857062578201, "learning_rate": 1e-06, "loss": -0.01, "step": 82 }, { "clip_ratio/high_max": 0.0023171911198005546, "clip_ratio/high_mean": 0.0008453816790279234, "clip_ratio/low_mean": 0.0005161670669622254, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013615487441711593, "epoch": 0.1936424613589968, "grad_norm": 0.1138274073600769, "learning_rate": 1e-06, "loss": -0.0102, "step": 83 }, { "clip_ratio/high_max": 0.003078581838053651, "clip_ratio/high_mean": 0.0010828716767719015, "clip_ratio/low_mean": 0.0007412048998958198, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0018240765421069227, "epoch": 0.19597550306211722, "grad_norm": 0.1063850075006485, "learning_rate": 1e-06, "loss": -0.0104, "step": 84 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 4047.0, "completions/mean_length": 656.4074096679688, "completions/mean_terminated_length": 593.8693237304688, "completions/min_length": 126.0, "completions/min_terminated_length": 126.0, "epoch": 0.19830854476523768, "grad_norm": 0.11717595160007477, "learning_rate": 1e-06, "loss": 0.0159, "num_tokens": 12753773.0, "reward": 0.5189732313156128, "reward_std": 0.20298078656196594, "rewards/verify_math_reward/mean": 0.5189732313156128, "rewards/verify_math_reward/std": 0.49991893768310547, "step": 85 }, { "clip_ratio/high_max": 0.0019150459083903115, "clip_ratio/high_mean": 0.0008873847637005383, "clip_ratio/low_mean": 0.0006112498958827928, "clip_ratio/low_min": 1.3598781151813455e-05, "clip_ratio/region_mean": 0.0014986347014200874, "epoch": 0.2006415864683581, "grad_norm": 0.10815272480249405, "learning_rate": 1e-06, "loss": 0.0159, "step": 86 }, { "clip_ratio/high_max": 0.0022102415750850923, "clip_ratio/high_mean": 0.001022938682581298, "clip_ratio/low_mean": 0.0006917814953339985, "clip_ratio/low_min": 4.954419182467973e-05, "clip_ratio/region_mean": 0.0017147201506304555, "epoch": 0.20297462817147857, "grad_norm": 0.10520819574594498, "learning_rate": 1e-06, "loss": 0.0157, "step": 87 }, { "clip_ratio/high_max": 0.0025819114307523705, "clip_ratio/high_mean": 0.0011637151474133134, "clip_ratio/low_mean": 0.0008546071730961557, "clip_ratio/low_min": 6.731956182193244e-05, "clip_ratio/region_mean": 0.0020183223095955327, "epoch": 0.205307669874599, "grad_norm": 0.10069582611322403, "learning_rate": 1e-06, "loss": 0.0156, "step": 88 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.022321428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 4096.0, "completions/mean_length": 646.3605346679688, "completions/mean_terminated_length": 567.6015625, "completions/min_length": 134.0, "completions/min_terminated_length": 134.0, "epoch": 0.20764071157771946, "grad_norm": 0.12426190078258514, "learning_rate": 1e-06, "loss": -0.0106, "num_tokens": 13335328.0, "reward": 0.6473214626312256, "reward_std": 0.16972355544567108, "rewards/verify_math_reward/mean": 0.6473214030265808, "rewards/verify_math_reward/std": 0.47807058691978455, "step": 89 }, { "clip_ratio/high_max": 0.002509818885300774, "clip_ratio/high_mean": 0.0009599238892405992, "clip_ratio/low_mean": 0.0004967362110619433, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014566600766556803, "epoch": 0.2099737532808399, "grad_norm": 0.11910506337881088, "learning_rate": 1e-06, "loss": -0.0106, "step": 90 }, { "clip_ratio/high_max": 0.0026411540602566674, "clip_ratio/high_mean": 0.0010548686313995859, "clip_ratio/low_mean": 0.0006366391753545031, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0016915078012971207, "epoch": 0.21230679498396035, "grad_norm": 0.1120922639966011, "learning_rate": 1e-06, "loss": -0.0108, "step": 91 }, { "clip_ratio/high_max": 0.0030780687666265294, "clip_ratio/high_mean": 0.001172436432170798, "clip_ratio/low_mean": 0.0008386353092646459, "clip_ratio/low_min": 3.645732067525387e-05, "clip_ratio/region_mean": 0.0020110717887291685, "epoch": 0.21463983668708078, "grad_norm": 0.10448692739009857, "learning_rate": 1e-06, "loss": -0.011, "step": 92 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0189732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3928.0, "completions/mean_length": 653.9654541015625, "completions/mean_terminated_length": 587.3958740234375, "completions/min_length": 146.0, "completions/min_terminated_length": 146.0, "epoch": 0.2169728783902012, "grad_norm": 0.13105586171150208, "learning_rate": 1e-06, "loss": 0.0066, "num_tokens": 13935945.0, "reward": 0.527901828289032, "reward_std": 0.2027878612279892, "rewards/verify_math_reward/mean": 0.5279017686843872, "rewards/verify_math_reward/std": 0.49949970841407776, "step": 93 }, { "clip_ratio/high_max": 0.0024171297263819724, "clip_ratio/high_mean": 0.0008566286196582951, "clip_ratio/low_mean": 0.0005390885689848801, "clip_ratio/low_min": 2.592824057501275e-05, "clip_ratio/region_mean": 0.0013957171686342917, "epoch": 0.21930592009332167, "grad_norm": 0.12656445801258087, "learning_rate": 1e-06, "loss": 0.0066, "step": 94 }, { "clip_ratio/high_max": 0.003062793315621093, "clip_ratio/high_mean": 0.0010422228115203325, "clip_ratio/low_mean": 0.0006456348837673431, "clip_ratio/low_min": 1.549714943394065e-05, "clip_ratio/region_mean": 0.0016878577080206014, "epoch": 0.2216389617964421, "grad_norm": 0.12080687284469604, "learning_rate": 1e-06, "loss": 0.0064, "step": 95 }, { "clip_ratio/high_max": 0.0033086536641349085, "clip_ratio/high_mean": 0.0011667379476421047, "clip_ratio/low_mean": 0.0007989465611899504, "clip_ratio/low_min": 1.5082046047609765e-05, "clip_ratio/region_mean": 0.0019656845688587055, "epoch": 0.22397200349956256, "grad_norm": 0.11328689008951187, "learning_rate": 1e-06, "loss": 0.0062, "step": 96 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.024553571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3622.0, "completions/mean_length": 676.9810791015625, "completions/mean_terminated_length": 590.9187622070312, "completions/min_length": 84.0, "completions/min_terminated_length": 84.0, "epoch": 0.226305045202683, "grad_norm": 0.12749934196472168, "learning_rate": 1e-06, "loss": 0.0026, "num_tokens": 14537640.0, "reward": 0.5636160969734192, "reward_std": 0.19403307139873505, "rewards/verify_math_reward/mean": 0.5636160969734192, "rewards/verify_math_reward/std": 0.49621346592903137, "step": 97 }, { "clip_ratio/high_max": 0.002169629053241806, "clip_ratio/high_mean": 0.0008261291204689769, "clip_ratio/low_mean": 0.0006823650764999911, "clip_ratio/low_min": 4.0115533920470625e-05, "clip_ratio/region_mean": 0.001508494224253809, "epoch": 0.22863808690580345, "grad_norm": 0.11909526586532593, "learning_rate": 1e-06, "loss": 0.0026, "step": 98 }, { "clip_ratio/high_max": 0.0022699568289681338, "clip_ratio/high_mean": 0.0009342714256490581, "clip_ratio/low_mean": 0.0007688670775678474, "clip_ratio/low_min": 3.474796176305972e-05, "clip_ratio/region_mean": 0.001703138532320736, "epoch": 0.23097112860892388, "grad_norm": 0.112027607858181, "learning_rate": 1e-06, "loss": 0.0024, "step": 99 }, { "clip_ratio/high_max": 0.002555411745561287, "clip_ratio/high_mean": 0.0010414771641080733, "clip_ratio/low_mean": 0.0010076383150590118, "clip_ratio/low_min": 4.980094854545314e-05, "clip_ratio/region_mean": 0.002049115522822831, "epoch": 0.23330417031204434, "grad_norm": 0.10489960759878159, "learning_rate": 1e-06, "loss": 0.0021, "step": 100 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3894.0, "completions/mean_length": 628.59375, "completions/mean_terminated_length": 565.5499877929688, "completions/min_length": 181.0, "completions/min_terminated_length": 181.0, "epoch": 0.23563721201516477, "grad_norm": 0.14488396048545837, "learning_rate": 1e-06, "loss": 0.001, "num_tokens": 15122796.0, "reward": 0.5345982313156128, "reward_std": 0.21665894985198975, "rewards/verify_math_reward/mean": 0.5345982313156128, "rewards/verify_math_reward/std": 0.4990801215171814, "step": 101 }, { "clip_ratio/high_max": 0.0022061730924178846, "clip_ratio/high_mean": 0.0009643534776841989, "clip_ratio/low_mean": 0.0007172951954999007, "clip_ratio/low_min": 1.4227179235604126e-05, "clip_ratio/region_mean": 0.0016816486786410678, "epoch": 0.2379702537182852, "grad_norm": 0.13462558388710022, "learning_rate": 1e-06, "loss": 0.001, "step": 102 }, { "clip_ratio/high_max": 0.0023603892914252356, "clip_ratio/high_mean": 0.0010227733255305793, "clip_ratio/low_mean": 0.0008960159102571197, "clip_ratio/low_min": 6.526774632220622e-05, "clip_ratio/region_mean": 0.001918789272167487, "epoch": 0.24030329542140566, "grad_norm": 0.12845584750175476, "learning_rate": 1e-06, "loss": 0.0007, "step": 103 }, { "clip_ratio/high_max": 0.003229281399399042, "clip_ratio/high_mean": 0.0013347600124689052, "clip_ratio/low_mean": 0.0011825241326732794, "clip_ratio/low_min": 0.00016134392353706062, "clip_ratio/region_mean": 0.0025172841851599514, "epoch": 0.2426363371245261, "grad_norm": 0.12217263132333755, "learning_rate": 1e-06, "loss": 0.0004, "step": 104 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3994.0, "completions/mean_length": 610.200927734375, "completions/mean_terminated_length": 546.8226928710938, "completions/min_length": 167.0, "completions/min_terminated_length": 167.0, "epoch": 0.24496937882764655, "grad_norm": 0.1429988294839859, "learning_rate": 1e-06, "loss": -0.0091, "num_tokens": 15692120.0, "reward": 0.606026828289032, "reward_std": 0.20786207914352417, "rewards/verify_math_reward/mean": 0.6060267686843872, "rewards/verify_math_reward/std": 0.48890194296836853, "step": 105 }, { "clip_ratio/high_max": 0.0027260779970674776, "clip_ratio/high_mean": 0.0010892810350924265, "clip_ratio/low_mean": 0.0006691913213217049, "clip_ratio/low_min": 2.6415588763484266e-05, "clip_ratio/region_mean": 0.0017584723536856472, "epoch": 0.24730242053076698, "grad_norm": 0.13557489216327667, "learning_rate": 1e-06, "loss": -0.0091, "step": 106 }, { "clip_ratio/high_max": 0.003239470657717902, "clip_ratio/high_mean": 0.0012547226724564098, "clip_ratio/low_mean": 0.0008387892130485852, "clip_ratio/low_min": 2.230407244496746e-05, "clip_ratio/region_mean": 0.0020935118591296487, "epoch": 0.24963546223388744, "grad_norm": 0.12885208427906036, "learning_rate": 1e-06, "loss": -0.0093, "step": 107 }, { "clip_ratio/high_max": 0.003756755613721907, "clip_ratio/high_mean": 0.0014417400561796967, "clip_ratio/low_mean": 0.0010771977049444104, "clip_ratio/low_min": 5.801628685730975e-05, "clip_ratio/region_mean": 0.0025189377774950117, "epoch": 0.25196850393700787, "grad_norm": 0.11893340945243835, "learning_rate": 1e-06, "loss": -0.0096, "step": 108 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3880.0, "completions/mean_length": 616.1674194335938, "completions/mean_terminated_length": 560.9320068359375, "completions/min_length": 152.0, "completions/min_terminated_length": 152.0, "epoch": 0.2543015456401283, "grad_norm": 0.1383434236049652, "learning_rate": 1e-06, "loss": 0.0051, "num_tokens": 16276854.0, "reward": 0.5680803656578064, "reward_std": 0.19114552438259125, "rewards/verify_math_reward/mean": 0.5680803656578064, "rewards/verify_math_reward/std": 0.4956200420856476, "step": 109 }, { "clip_ratio/high_max": 0.0023861555018811487, "clip_ratio/high_mean": 0.0008340432177647017, "clip_ratio/low_mean": 0.0006876127763462136, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015216559804684948, "epoch": 0.2566345873432488, "grad_norm": 0.1269102692604065, "learning_rate": 1e-06, "loss": 0.0051, "step": 110 }, { "clip_ratio/high_max": 0.002608327748021111, "clip_ratio/high_mean": 0.0009782186771190027, "clip_ratio/low_mean": 0.0008567730164941167, "clip_ratio/low_min": 2.7736112315324135e-05, "clip_ratio/region_mean": 0.001834991700889077, "epoch": 0.2589676290463692, "grad_norm": 0.12042663991451263, "learning_rate": 1e-06, "loss": 0.0049, "step": 111 }, { "clip_ratio/high_max": 0.0034152946100221016, "clip_ratio/high_mean": 0.001158751347247744, "clip_ratio/low_mean": 0.001146242648246698, "clip_ratio/low_min": 6.449000829888973e-05, "clip_ratio/region_mean": 0.0023049940573400818, "epoch": 0.26130067074948965, "grad_norm": 0.11390484869480133, "learning_rate": 1e-06, "loss": 0.0046, "step": 112 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0267857142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3910.0, "completions/mean_length": 660.779052734375, "completions/mean_terminated_length": 566.2316284179688, "completions/min_length": 134.0, "completions/min_terminated_length": 134.0, "epoch": 0.2636337124526101, "grad_norm": 0.138559490442276, "learning_rate": 1e-06, "loss": -0.01, "num_tokens": 16859592.0, "reward": 0.5412946939468384, "reward_std": 0.2048134207725525, "rewards/verify_math_reward/mean": 0.5412946343421936, "rewards/verify_math_reward/std": 0.49857014417648315, "step": 113 }, { "clip_ratio/high_max": 0.0021220286726020277, "clip_ratio/high_mean": 0.0008864953342708759, "clip_ratio/low_mean": 0.000801610280177556, "clip_ratio/low_min": 1.3721185496251564e-05, "clip_ratio/region_mean": 0.001688105687208008, "epoch": 0.2659667541557305, "grad_norm": 0.13961747288703918, "learning_rate": 1e-06, "loss": -0.01, "step": 114 }, { "clip_ratio/high_max": 0.0025299960470874794, "clip_ratio/high_mean": 0.0010865610820474103, "clip_ratio/low_mean": 0.0009482974965067115, "clip_ratio/low_min": 4.569198699755361e-05, "clip_ratio/region_mean": 0.002034858596744016, "epoch": 0.268299795858851, "grad_norm": 0.121913842856884, "learning_rate": 1e-06, "loss": -0.0102, "step": 115 }, { "clip_ratio/high_max": 0.00305376138567226, "clip_ratio/high_mean": 0.0012620480774785392, "clip_ratio/low_mean": 0.0011841662380902562, "clip_ratio/low_min": 4.5887939450039994e-05, "clip_ratio/region_mean": 0.0024462142901029438, "epoch": 0.27063283756197143, "grad_norm": 0.11679504811763763, "learning_rate": 1e-06, "loss": -0.0105, "step": 116 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3028.0, "completions/mean_length": 582.9375, "completions/mean_terminated_length": 519.0635986328125, "completions/min_length": 152.0, "completions/min_terminated_length": 152.0, "epoch": 0.27296587926509186, "grad_norm": 0.14729362726211548, "learning_rate": 1e-06, "loss": 0.0046, "num_tokens": 17411696.0, "reward": 0.5814732313156128, "reward_std": 0.19956262409687042, "rewards/verify_math_reward/mean": 0.5814732313156128, "rewards/verify_math_reward/std": 0.4935929775238037, "step": 117 }, { "clip_ratio/high_max": 0.0022353681015374605, "clip_ratio/high_mean": 0.000848157171276398, "clip_ratio/low_mean": 0.0007919722811493557, "clip_ratio/low_min": 6.607407885894645e-05, "clip_ratio/region_mean": 0.0016401294778916053, "epoch": 0.2752989209682123, "grad_norm": 0.13534927368164062, "learning_rate": 1e-06, "loss": 0.0047, "step": 118 }, { "clip_ratio/high_max": 0.0024886363753466867, "clip_ratio/high_mean": 0.0009582586317264941, "clip_ratio/low_mean": 0.0009548193229420576, "clip_ratio/low_min": 8.961806815932505e-05, "clip_ratio/region_mean": 0.0019130779619445093, "epoch": 0.2776319626713328, "grad_norm": 0.12875303626060486, "learning_rate": 1e-06, "loss": 0.0044, "step": 119 }, { "clip_ratio/high_max": 0.0031516384551650845, "clip_ratio/high_mean": 0.0012720920894935261, "clip_ratio/low_mean": 0.001260018634638982, "clip_ratio/low_min": 0.0001334127309746691, "clip_ratio/region_mean": 0.002532110724132508, "epoch": 0.2799650043744532, "grad_norm": 0.12104269117116928, "learning_rate": 1e-06, "loss": 0.0041, "step": 120 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.033482142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 1854.0, "completions/mean_length": 713.3694458007812, "completions/mean_terminated_length": 596.188232421875, "completions/min_length": 152.0, "completions/min_terminated_length": 152.0, "epoch": 0.28229804607757364, "grad_norm": 0.14079630374908447, "learning_rate": 1e-06, "loss": -0.0289, "num_tokens": 18012875.0, "reward": 0.5636160969734192, "reward_std": 0.2250785529613495, "rewards/verify_math_reward/mean": 0.5636160969734192, "rewards/verify_math_reward/std": 0.49621346592903137, "step": 121 }, { "clip_ratio/high_max": 0.002123760321410373, "clip_ratio/high_mean": 0.0009876530384644866, "clip_ratio/low_mean": 0.0007009498767729383, "clip_ratio/low_min": 4.251286372891627e-05, "clip_ratio/region_mean": 0.0016886029261513613, "epoch": 0.28463108778069407, "grad_norm": 0.12778230011463165, "learning_rate": 1e-06, "loss": -0.0289, "step": 122 }, { "clip_ratio/high_max": 0.002361714985454455, "clip_ratio/high_mean": 0.0011121618990728166, "clip_ratio/low_mean": 0.0008798028375167632, "clip_ratio/low_min": 7.702303810219746e-05, "clip_ratio/region_mean": 0.0019919647311326116, "epoch": 0.2869641294838145, "grad_norm": 0.1207275241613388, "learning_rate": 1e-06, "loss": -0.0291, "step": 123 }, { "clip_ratio/high_max": 0.002912230876972899, "clip_ratio/high_mean": 0.0013530202086258214, "clip_ratio/low_mean": 0.0010904143673542421, "clip_ratio/low_min": 6.472261793533107e-05, "clip_ratio/region_mean": 0.0024434345978079364, "epoch": 0.289297171186935, "grad_norm": 0.11319538205862045, "learning_rate": 1e-06, "loss": -0.0294, "step": 124 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 2427.0, "completions/mean_length": 598.359375, "completions/mean_terminated_length": 546.865234375, "completions/min_length": 129.0, "completions/min_terminated_length": 129.0, "epoch": 0.2916302128900554, "grad_norm": 0.14457763731479645, "learning_rate": 1e-06, "loss": 0.0077, "num_tokens": 18586597.0, "reward": 0.5881696939468384, "reward_std": 0.1958770900964737, "rewards/verify_math_reward/mean": 0.5881696343421936, "rewards/verify_math_reward/std": 0.4924395978450775, "step": 125 }, { "clip_ratio/high_max": 0.001991376884689089, "clip_ratio/high_mean": 0.0008952707667049253, "clip_ratio/low_mean": 0.0007696016209592926, "clip_ratio/low_min": 4.010452175862156e-05, "clip_ratio/region_mean": 0.0016648723467369564, "epoch": 0.29396325459317585, "grad_norm": 0.12987324595451355, "learning_rate": 1e-06, "loss": 0.0077, "step": 126 }, { "clip_ratio/high_max": 0.002469553284754511, "clip_ratio/high_mean": 0.001016039806927438, "clip_ratio/low_mean": 0.0009526717312837718, "clip_ratio/low_min": 7.681566967221443e-05, "clip_ratio/region_mean": 0.001968711534573231, "epoch": 0.2962962962962963, "grad_norm": 0.12295429408550262, "learning_rate": 1e-06, "loss": 0.0075, "step": 127 }, { "clip_ratio/high_max": 0.0030500318825943395, "clip_ratio/high_mean": 0.0012541321193566546, "clip_ratio/low_mean": 0.0012578751811815891, "clip_ratio/low_min": 8.786473335931078e-05, "clip_ratio/region_mean": 0.002512007333280053, "epoch": 0.29862933799941677, "grad_norm": 0.11378207802772522, "learning_rate": 1e-06, "loss": 0.0072, "step": 128 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 2109.0, "completions/mean_length": 551.9777221679688, "completions/mean_terminated_length": 503.8688049316406, "completions/min_length": 151.0, "completions/min_terminated_length": 151.0, "epoch": 0.3009623797025372, "grad_norm": 0.13662169873714447, "learning_rate": 1e-06, "loss": -0.0006, "num_tokens": 19118553.0, "reward": 0.5524553656578064, "reward_std": 0.17604519426822662, "rewards/verify_math_reward/mean": 0.5524553656578064, "rewards/verify_math_reward/std": 0.49751853942871094, "step": 129 }, { "clip_ratio/high_max": 0.0022779615756007843, "clip_ratio/high_mean": 0.0008318343370774528, "clip_ratio/low_mean": 0.0006925915386091219, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.00152442589023849, "epoch": 0.30329542140565763, "grad_norm": 0.1270441710948944, "learning_rate": 1e-06, "loss": -0.0007, "step": 130 }, { "clip_ratio/high_max": 0.0028367843260639347, "clip_ratio/high_mean": 0.00102889733898337, "clip_ratio/low_mean": 0.0007989179484866327, "clip_ratio/low_min": 4.832641388929915e-05, "clip_ratio/region_mean": 0.0018278152856510133, "epoch": 0.30562846310877806, "grad_norm": 0.12071854621171951, "learning_rate": 1e-06, "loss": -0.0009, "step": 131 }, { "clip_ratio/high_max": 0.0029930577147752047, "clip_ratio/high_mean": 0.0011137633518956136, "clip_ratio/low_mean": 0.001050291659339564, "clip_ratio/low_min": 8.356419039046159e-05, "clip_ratio/region_mean": 0.002164054967579432, "epoch": 0.3079615048118985, "grad_norm": 0.11223538219928741, "learning_rate": 1e-06, "loss": -0.0011, "step": 132 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0200892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3936.0, "completions/mean_length": 642.640625, "completions/mean_terminated_length": 571.8428344726562, "completions/min_length": 137.0, "completions/min_terminated_length": 137.0, "epoch": 0.310294546515019, "grad_norm": 0.1413862556219101, "learning_rate": 1e-06, "loss": 0.004, "num_tokens": 19704023.0, "reward": 0.5703125, "reward_std": 0.21695458889007568, "rewards/verify_math_reward/mean": 0.5703125, "rewards/verify_math_reward/std": 0.49530795216560364, "step": 133 }, { "clip_ratio/high_max": 0.002118021686328575, "clip_ratio/high_mean": 0.0008867751712386962, "clip_ratio/low_mean": 0.0007788575276208576, "clip_ratio/low_min": 2.9703820473514497e-05, "clip_ratio/region_mean": 0.0016656326988595538, "epoch": 0.3126275882181394, "grad_norm": 0.13218247890472412, "learning_rate": 1e-06, "loss": 0.004, "step": 134 }, { "clip_ratio/high_max": 0.0025291019555879757, "clip_ratio/high_mean": 0.001016958885884378, "clip_ratio/low_mean": 0.0009593481772753876, "clip_ratio/low_min": 6.258060875552474e-05, "clip_ratio/region_mean": 0.0019763070667977445, "epoch": 0.31496062992125984, "grad_norm": 0.12421286851167679, "learning_rate": 1e-06, "loss": 0.0038, "step": 135 }, { "clip_ratio/high_max": 0.003061683637497481, "clip_ratio/high_mean": 0.0012124020940973423, "clip_ratio/low_mean": 0.0013217039740993641, "clip_ratio/low_min": 0.00014375950922840275, "clip_ratio/region_mean": 0.002534106039092876, "epoch": 0.31729367162438027, "grad_norm": 0.1148647591471672, "learning_rate": 1e-06, "loss": 0.0035, "step": 136 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.024553571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3015.0, "completions/mean_length": 657.0647583007812, "completions/mean_terminated_length": 570.5010986328125, "completions/min_length": 159.0, "completions/min_terminated_length": 159.0, "epoch": 0.31962671332750076, "grad_norm": 0.1420687437057495, "learning_rate": 1e-06, "loss": -0.0076, "num_tokens": 20286297.0, "reward": 0.6395089626312256, "reward_std": 0.19985796511173248, "rewards/verify_math_reward/mean": 0.6395089030265808, "rewards/verify_math_reward/std": 0.4804111421108246, "step": 137 }, { "clip_ratio/high_max": 0.0021685889805667102, "clip_ratio/high_mean": 0.0009483293542871252, "clip_ratio/low_mean": 0.0006977156681386987, "clip_ratio/low_min": 6.864401530037867e-05, "clip_ratio/region_mean": 0.0016460450206068344, "epoch": 0.3219597550306212, "grad_norm": 0.13288776576519012, "learning_rate": 1e-06, "loss": -0.0076, "step": 138 }, { "clip_ratio/high_max": 0.002660265556187369, "clip_ratio/high_mean": 0.0011331602690916043, "clip_ratio/low_mean": 0.0009111128420045134, "clip_ratio/low_min": 7.782035845593782e-05, "clip_ratio/region_mean": 0.002044273081992287, "epoch": 0.3242927967337416, "grad_norm": 0.12325869500637054, "learning_rate": 1e-06, "loss": -0.0078, "step": 139 }, { "clip_ratio/high_max": 0.0032126164151122794, "clip_ratio/high_mean": 0.0013978934293845668, "clip_ratio/low_mean": 0.0011614134309638757, "clip_ratio/low_min": 0.00010561648014117964, "clip_ratio/region_mean": 0.0025593068785383366, "epoch": 0.32662583843686205, "grad_norm": 0.11403841525316238, "learning_rate": 1e-06, "loss": -0.0081, "step": 140 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3955.0, "completions/mean_length": 639.349365234375, "completions/mean_terminated_length": 576.5010986328125, "completions/min_length": 165.0, "completions/min_terminated_length": 165.0, "epoch": 0.3289588801399825, "grad_norm": 0.1328297108411789, "learning_rate": 1e-06, "loss": 0.019, "num_tokens": 20886442.0, "reward": 0.566964328289032, "reward_std": 0.18384820222854614, "rewards/verify_math_reward/mean": 0.5669642686843872, "rewards/verify_math_reward/std": 0.49577224254608154, "step": 141 }, { "clip_ratio/high_max": 0.0020415060862433165, "clip_ratio/high_mean": 0.0007540604619862279, "clip_ratio/low_mean": 0.0006621093152716639, "clip_ratio/low_min": 1.1567647561605554e-05, "clip_ratio/region_mean": 0.0014161697909003124, "epoch": 0.33129192184310297, "grad_norm": 0.12624932825565338, "learning_rate": 1e-06, "loss": 0.019, "step": 142 }, { "clip_ratio/high_max": 0.0023433275418938138, "clip_ratio/high_mean": 0.000899269765795907, "clip_ratio/low_mean": 0.0008481623408442829, "clip_ratio/low_min": 2.1822625058121048e-05, "clip_ratio/region_mean": 0.0017474321066401899, "epoch": 0.3336249635462234, "grad_norm": 0.11816389858722687, "learning_rate": 1e-06, "loss": 0.0188, "step": 143 }, { "clip_ratio/high_max": 0.002858101186575368, "clip_ratio/high_mean": 0.0010737119755503954, "clip_ratio/low_mean": 0.001038404379869462, "clip_ratio/low_min": 1.3037129974691197e-05, "clip_ratio/region_mean": 0.002112116271746345, "epoch": 0.3359580052493438, "grad_norm": 0.11135821044445038, "learning_rate": 1e-06, "loss": 0.0186, "step": 144 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.022321428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3633.0, "completions/mean_length": 618.3817138671875, "completions/mean_terminated_length": 538.9840087890625, "completions/min_length": 147.0, "completions/min_terminated_length": 147.0, "epoch": 0.33829104695246426, "grad_norm": 0.15332533419132233, "learning_rate": 1e-06, "loss": 0.0032, "num_tokens": 21435320.0, "reward": 0.5714285969734192, "reward_std": 0.19505152106285095, "rewards/verify_math_reward/mean": 0.5714285969734192, "rewards/verify_math_reward/std": 0.49514803290367126, "step": 145 }, { "clip_ratio/high_max": 0.0020463345099415164, "clip_ratio/high_mean": 0.0008479078805976314, "clip_ratio/low_mean": 0.0006631461656070314, "clip_ratio/low_min": 1.4778907825530041e-05, "clip_ratio/region_mean": 0.0015110540043679066, "epoch": 0.34062408865558474, "grad_norm": 0.1367734968662262, "learning_rate": 1e-06, "loss": 0.0032, "step": 146 }, { "clip_ratio/high_max": 0.0025536008542985655, "clip_ratio/high_mean": 0.0011314190742268693, "clip_ratio/low_mean": 0.0008894280945241917, "clip_ratio/low_min": 1.127548239310272e-05, "clip_ratio/region_mean": 0.002020847226958722, "epoch": 0.3429571303587052, "grad_norm": 0.1255784034729004, "learning_rate": 1e-06, "loss": 0.0029, "step": 147 }, { "clip_ratio/high_max": 0.0032337115990230814, "clip_ratio/high_mean": 0.0013143957185093313, "clip_ratio/low_mean": 0.001290868120122468, "clip_ratio/low_min": 5.069767757959198e-05, "clip_ratio/region_mean": 0.0026052638568216935, "epoch": 0.3452901720618256, "grad_norm": 0.11244704574346542, "learning_rate": 1e-06, "loss": 0.0026, "step": 148 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0345982142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 2181.0, "completions/mean_length": 670.7589721679688, "completions/mean_terminated_length": 548.004638671875, "completions/min_length": 117.0, "completions/min_terminated_length": 117.0, "epoch": 0.34762321376494604, "grad_norm": 0.1416712999343872, "learning_rate": 1e-06, "loss": -0.0057, "num_tokens": 21998680.0, "reward": 0.5792410969734192, "reward_std": 0.18539589643478394, "rewards/verify_math_reward/mean": 0.5792410969734192, "rewards/verify_math_reward/std": 0.49395665526390076, "step": 149 }, { "clip_ratio/high_max": 0.002060828916000901, "clip_ratio/high_mean": 0.0008964760581875453, "clip_ratio/low_mean": 0.0007116560336726252, "clip_ratio/low_min": 2.3708240405539982e-05, "clip_ratio/region_mean": 0.001608132111869054, "epoch": 0.34995625546806647, "grad_norm": 0.13007508218288422, "learning_rate": 1e-06, "loss": -0.0057, "step": 150 }, { "clip_ratio/high_max": 0.0024481159562128596, "clip_ratio/high_mean": 0.0010035141422122251, "clip_ratio/low_mean": 0.000854908967994561, "clip_ratio/low_min": 3.4888919799414e-05, "clip_ratio/region_mean": 0.0018584230856504291, "epoch": 0.35228929717118695, "grad_norm": 0.12309257686138153, "learning_rate": 1e-06, "loss": -0.006, "step": 151 }, { "clip_ratio/high_max": 0.0027699472557287663, "clip_ratio/high_mean": 0.001237294425664004, "clip_ratio/low_mean": 0.0011198989104741486, "clip_ratio/low_min": 6.129092344053788e-05, "clip_ratio/region_mean": 0.0023571933270432055, "epoch": 0.3546223388743074, "grad_norm": 0.11313092708587646, "learning_rate": 1e-06, "loss": -0.0062, "step": 152 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.029017857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3347.0, "completions/mean_length": 635.640625, "completions/mean_terminated_length": 532.2276000976562, "completions/min_length": 135.0, "completions/min_terminated_length": 135.0, "epoch": 0.3569553805774278, "grad_norm": 0.16089573502540588, "learning_rate": 1e-06, "loss": 0.0126, "num_tokens": 22557582.0, "reward": 0.5658482313156128, "reward_std": 0.19028112292289734, "rewards/verify_math_reward/mean": 0.5658482313156128, "rewards/verify_math_reward/std": 0.49592188000679016, "step": 153 }, { "clip_ratio/high_max": 0.002484954813553486, "clip_ratio/high_mean": 0.0010083843262691516, "clip_ratio/low_mean": 0.0007808934342392604, "clip_ratio/low_min": 2.7244986995356157e-05, "clip_ratio/region_mean": 0.0017892778269015253, "epoch": 0.35928842228054825, "grad_norm": 0.1429881751537323, "learning_rate": 1e-06, "loss": 0.0126, "step": 154 }, { "clip_ratio/high_max": 0.0029618101398227736, "clip_ratio/high_mean": 0.0011523421126184985, "clip_ratio/low_mean": 0.0010133042687812122, "clip_ratio/low_min": 1.8463810192770325e-05, "clip_ratio/region_mean": 0.0021656463795807213, "epoch": 0.36162146398366873, "grad_norm": 0.1328917145729065, "learning_rate": 1e-06, "loss": 0.0123, "step": 155 }, { "clip_ratio/high_max": 0.003874518457450904, "clip_ratio/high_mean": 0.0015588811147608794, "clip_ratio/low_mean": 0.001363281629892299, "clip_ratio/low_min": 3.692762038554065e-05, "clip_ratio/region_mean": 0.0029221628356026486, "epoch": 0.36395450568678916, "grad_norm": 0.11679844558238983, "learning_rate": 1e-06, "loss": 0.0119, "step": 156 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0279017857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 2478.0, "completions/mean_length": 632.2701416015625, "completions/mean_terminated_length": 532.8518676757812, "completions/min_length": 135.0, "completions/min_terminated_length": 135.0, "epoch": 0.3662875473899096, "grad_norm": 0.15063433349132538, "learning_rate": 1e-06, "loss": 0.002, "num_tokens": 23124048.0, "reward": 0.5245535969734192, "reward_std": 0.16157494485378265, "rewards/verify_math_reward/mean": 0.5245535969734192, "rewards/verify_math_reward/std": 0.4996756613254547, "step": 157 }, { "clip_ratio/high_max": 0.0018712463061092421, "clip_ratio/high_mean": 0.0006921066869836068, "clip_ratio/low_mean": 0.0006615769361815182, "clip_ratio/low_min": 4.8527930630370975e-05, "clip_ratio/region_mean": 0.0013536836195271462, "epoch": 0.36862058909303, "grad_norm": 0.13288190960884094, "learning_rate": 1e-06, "loss": 0.0019, "step": 158 }, { "clip_ratio/high_max": 0.0023536112857982516, "clip_ratio/high_mean": 0.0009104724431381328, "clip_ratio/low_mean": 0.0008079300805547973, "clip_ratio/low_min": 8.994123345473781e-05, "clip_ratio/region_mean": 0.001718402505503036, "epoch": 0.37095363079615046, "grad_norm": 0.11906816810369492, "learning_rate": 1e-06, "loss": 0.0017, "step": 159 }, { "clip_ratio/high_max": 0.0028984402961214073, "clip_ratio/high_mean": 0.0011211965211259667, "clip_ratio/low_mean": 0.0011310140216664877, "clip_ratio/low_min": 7.551038470410276e-05, "clip_ratio/region_mean": 0.0022522105355164967, "epoch": 0.37328667249927094, "grad_norm": 0.10718466341495514, "learning_rate": 1e-06, "loss": 0.0014, "step": 160 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0323660714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3603.0, "completions/mean_length": 710.7756958007812, "completions/mean_terminated_length": 597.5443725585938, "completions/min_length": 157.0, "completions/min_terminated_length": 157.0, "epoch": 0.3756197142023914, "grad_norm": 0.1311246156692505, "learning_rate": 1e-06, "loss": -0.0026, "num_tokens": 23743903.0, "reward": 0.5379464626312256, "reward_std": 0.20034721493721008, "rewards/verify_math_reward/mean": 0.5379464030265808, "rewards/verify_math_reward/std": 0.4988364577293396, "step": 161 }, { "clip_ratio/high_max": 0.0018662873990251683, "clip_ratio/high_mean": 0.0007580564106319798, "clip_ratio/low_mean": 0.0006720069904986303, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014300633847597055, "epoch": 0.3779527559055118, "grad_norm": 0.12285172939300537, "learning_rate": 1e-06, "loss": -0.0026, "step": 162 }, { "clip_ratio/high_max": 0.002084443942294456, "clip_ratio/high_mean": 0.000873773335115402, "clip_ratio/low_mean": 0.0008651581629237626, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.00173893148166826, "epoch": 0.38028579760863224, "grad_norm": 0.11335916817188263, "learning_rate": 1e-06, "loss": -0.0028, "step": 163 }, { "clip_ratio/high_max": 0.00263272384472657, "clip_ratio/high_mean": 0.0011476036870590178, "clip_ratio/low_mean": 0.0011298640456516296, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0022774677781853825, "epoch": 0.3826188393117527, "grad_norm": 0.10379137098789215, "learning_rate": 1e-06, "loss": -0.003, "step": 164 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3955.0, "completions/mean_length": 596.880615234375, "completions/mean_terminated_length": 545.3646240234375, "completions/min_length": 147.0, "completions/min_terminated_length": 147.0, "epoch": 0.38495188101487315, "grad_norm": 0.12824508547782898, "learning_rate": 1e-06, "loss": 0.0024, "num_tokens": 24316660.0, "reward": 0.5301339626312256, "reward_std": 0.1507183164358139, "rewards/verify_math_reward/mean": 0.5301339030265808, "rewards/verify_math_reward/std": 0.49936985969543457, "step": 165 }, { "clip_ratio/high_max": 0.0019218572124373168, "clip_ratio/high_mean": 0.0006738395877619041, "clip_ratio/low_mean": 0.0005990883273625514, "clip_ratio/low_min": 1.594998138898518e-05, "clip_ratio/region_mean": 0.001272927904210519, "epoch": 0.3872849227179936, "grad_norm": 0.1157822459936142, "learning_rate": 1e-06, "loss": 0.0024, "step": 166 }, { "clip_ratio/high_max": 0.0023003846072242595, "clip_ratio/high_mean": 0.0008369924389626249, "clip_ratio/low_mean": 0.0007512151278206147, "clip_ratio/low_min": 5.586977204075083e-05, "clip_ratio/region_mean": 0.0015882075895206071, "epoch": 0.389617964421114, "grad_norm": 0.11070374399423599, "learning_rate": 1e-06, "loss": 0.0022, "step": 167 }, { "clip_ratio/high_max": 0.0027091887313872576, "clip_ratio/high_mean": 0.001003446342110692, "clip_ratio/low_mean": 0.0009466884366702288, "clip_ratio/low_min": 8.125521162583027e-05, "clip_ratio/region_mean": 0.0019501347705954686, "epoch": 0.39195100612423445, "grad_norm": 0.10186446458101273, "learning_rate": 1e-06, "loss": 0.002, "step": 168 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3353.0, "completions/mean_length": 616.4877319335938, "completions/mean_terminated_length": 557.2451782226562, "completions/min_length": 149.0, "completions/min_terminated_length": 149.0, "epoch": 0.39428404782735493, "grad_norm": 0.14798404276371002, "learning_rate": 1e-06, "loss": -0.0065, "num_tokens": 24893729.0, "reward": 0.609375, "reward_std": 0.20989085733890533, "rewards/verify_math_reward/mean": 0.609375, "rewards/verify_math_reward/std": 0.48816296458244324, "step": 169 }, { "clip_ratio/high_max": 0.0018904729513451457, "clip_ratio/high_mean": 0.0007839824556867825, "clip_ratio/low_mean": 0.0007382788771792548, "clip_ratio/low_min": 1.660467569308821e-05, "clip_ratio/region_mean": 0.0015222613074001856, "epoch": 0.39661708953047536, "grad_norm": 0.13553324341773987, "learning_rate": 1e-06, "loss": -0.0065, "step": 170 }, { "clip_ratio/high_max": 0.002747044949501287, "clip_ratio/high_mean": 0.0010466027597431093, "clip_ratio/low_mean": 0.000919402576982975, "clip_ratio/low_min": 4.9770029363571666e-05, "clip_ratio/region_mean": 0.0019660053512779996, "epoch": 0.3989501312335958, "grad_norm": 0.12578773498535156, "learning_rate": 1e-06, "loss": -0.0067, "step": 171 }, { "clip_ratio/high_max": 0.00323473742173519, "clip_ratio/high_mean": 0.0012907685159007087, "clip_ratio/low_mean": 0.0012668096933339257, "clip_ratio/low_min": 6.0563043916772585e-05, "clip_ratio/region_mean": 0.0025575781328370795, "epoch": 0.4012831729367162, "grad_norm": 0.11518684774637222, "learning_rate": 1e-06, "loss": -0.0071, "step": 172 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 1808.0, "completions/mean_length": 554.7902221679688, "completions/mean_terminated_length": 502.6545715332031, "completions/min_length": 153.0, "completions/min_terminated_length": 153.0, "epoch": 0.4036162146398367, "grad_norm": 0.12800852954387665, "learning_rate": 1e-06, "loss": 0.0033, "num_tokens": 25423765.0, "reward": 0.6417410969734192, "reward_std": 0.13940230011940002, "rewards/verify_math_reward/mean": 0.6417410969734192, "rewards/verify_math_reward/std": 0.47975656390190125, "step": 173 }, { "clip_ratio/high_max": 0.0019425980644882657, "clip_ratio/high_mean": 0.0006649385959462961, "clip_ratio/low_mean": 0.0005632325992337428, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012281711969990283, "epoch": 0.40594925634295714, "grad_norm": 0.1172415241599083, "learning_rate": 1e-06, "loss": 0.0033, "step": 174 }, { "clip_ratio/high_max": 0.0026202484586974606, "clip_ratio/high_mean": 0.0008713735078345053, "clip_ratio/low_mean": 0.0007413044013446779, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0016126778937177733, "epoch": 0.4082822980460776, "grad_norm": 0.10593356192111969, "learning_rate": 1e-06, "loss": 0.0031, "step": 175 }, { "clip_ratio/high_max": 0.0032829120027599856, "clip_ratio/high_mean": 0.0010729132209235104, "clip_ratio/low_mean": 0.001052813387104834, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0021257266198517755, "epoch": 0.410615339749198, "grad_norm": 0.09476820379495621, "learning_rate": 1e-06, "loss": 0.0029, "step": 176 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 2859.0, "completions/mean_length": 628.833740234375, "completions/mean_terminated_length": 569.8013916015625, "completions/min_length": 136.0, "completions/min_terminated_length": 136.0, "epoch": 0.41294838145231844, "grad_norm": 0.1438443958759308, "learning_rate": 1e-06, "loss": 0.0154, "num_tokens": 26019152.0, "reward": 0.6015625, "reward_std": 0.19009174406528473, "rewards/verify_math_reward/mean": 0.6015625, "rewards/verify_math_reward/std": 0.48984986543655396, "step": 177 }, { "clip_ratio/high_max": 0.0019176373862137552, "clip_ratio/high_mean": 0.0008010229430510662, "clip_ratio/low_mean": 0.0006532900006277487, "clip_ratio/low_min": 4.77802095701918e-05, "clip_ratio/region_mean": 0.0014543129691446666, "epoch": 0.4152814231554389, "grad_norm": 0.12829677760601044, "learning_rate": 1e-06, "loss": 0.0154, "step": 178 }, { "clip_ratio/high_max": 0.002404999773716554, "clip_ratio/high_mean": 0.0009936790356732672, "clip_ratio/low_mean": 0.0007914682173577603, "clip_ratio/low_min": 4.34833018516656e-05, "clip_ratio/region_mean": 0.0017851472657639533, "epoch": 0.41761446485855935, "grad_norm": 0.11850535869598389, "learning_rate": 1e-06, "loss": 0.0152, "step": 179 }, { "clip_ratio/high_max": 0.0030456394961220212, "clip_ratio/high_mean": 0.0012951477692695335, "clip_ratio/low_mean": 0.0010704104097385425, "clip_ratio/low_min": 0.00010052147626993246, "clip_ratio/region_mean": 0.002365558124438394, "epoch": 0.4199475065616798, "grad_norm": 0.10846678912639618, "learning_rate": 1e-06, "loss": 0.0149, "step": 180 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.030133928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 2157.0, "completions/mean_length": 702.2545166015625, "completions/mean_terminated_length": 596.8101196289062, "completions/min_length": 177.0, "completions/min_terminated_length": 177.0, "epoch": 0.4222805482648002, "grad_norm": 0.15255486965179443, "learning_rate": 1e-06, "loss": -0.0123, "num_tokens": 26626228.0, "reward": 0.578125, "reward_std": 0.22304727137088776, "rewards/verify_math_reward/mean": 0.578125, "rewards/verify_math_reward/std": 0.4941346049308777, "step": 181 }, { "clip_ratio/high_max": 0.002199248679971788, "clip_ratio/high_mean": 0.0009739303804963129, "clip_ratio/low_mean": 0.0007363921686192043, "clip_ratio/low_min": 4.494204677030211e-05, "clip_ratio/region_mean": 0.0017103225400205702, "epoch": 0.4246135899679207, "grad_norm": 0.13235877454280853, "learning_rate": 1e-06, "loss": -0.0124, "step": 182 }, { "clip_ratio/high_max": 0.0028784031164832413, "clip_ratio/high_mean": 0.0012378583414829336, "clip_ratio/low_mean": 0.001032456482789712, "clip_ratio/low_min": 0.00015805111070221756, "clip_ratio/region_mean": 0.0022703147915308364, "epoch": 0.42694663167104113, "grad_norm": 0.12457403540611267, "learning_rate": 1e-06, "loss": -0.0127, "step": 183 }, { "clip_ratio/high_max": 0.0032764260104158893, "clip_ratio/high_mean": 0.0014971828823036049, "clip_ratio/low_mean": 0.0013137157548044343, "clip_ratio/low_min": 0.00025359808205394074, "clip_ratio/region_mean": 0.0028108987025916576, "epoch": 0.42927967337416156, "grad_norm": 0.11084217578172684, "learning_rate": 1e-06, "loss": -0.0129, "step": 184 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0234375, "completions/max_length": 4096.0, "completions/max_terminated_length": 3940.0, "completions/mean_length": 674.5301513671875, "completions/mean_terminated_length": 592.4148559570312, "completions/min_length": 113.0, "completions/min_terminated_length": 113.0, "epoch": 0.431612715077282, "grad_norm": 0.15568730235099792, "learning_rate": 1e-06, "loss": 0.0056, "num_tokens": 27229487.0, "reward": 0.5770089626312256, "reward_std": 0.22909040749073029, "rewards/verify_math_reward/mean": 0.5770089030265808, "rewards/verify_math_reward/std": 0.4943099319934845, "step": 185 }, { "clip_ratio/high_max": 0.00252677879325347, "clip_ratio/high_mean": 0.0010718216180976015, "clip_ratio/low_mean": 0.0007078466478560586, "clip_ratio/low_min": 5.008752577850828e-05, "clip_ratio/region_mean": 0.0017796682441257872, "epoch": 0.4339457567804024, "grad_norm": 0.14412274956703186, "learning_rate": 1e-06, "loss": 0.0056, "step": 186 }, { "clip_ratio/high_max": 0.003174394354573451, "clip_ratio/high_mean": 0.0013791719102300704, "clip_ratio/low_mean": 0.0009512608667137101, "clip_ratio/low_min": 3.950148220610572e-05, "clip_ratio/region_mean": 0.0023304327769437805, "epoch": 0.4362787984835229, "grad_norm": 0.12697069346904755, "learning_rate": 1e-06, "loss": 0.0053, "step": 187 }, { "clip_ratio/high_max": 0.003860727563733235, "clip_ratio/high_mean": 0.0016643093767925166, "clip_ratio/low_mean": 0.0013076584800728597, "clip_ratio/low_min": 9.31276636038092e-05, "clip_ratio/region_mean": 0.002971967842313461, "epoch": 0.43861184018664334, "grad_norm": 0.11659512668848038, "learning_rate": 1e-06, "loss": 0.005, "step": 188 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0267857142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 2613.0, "completions/mean_length": 648.7545166015625, "completions/mean_terminated_length": 553.8760986328125, "completions/min_length": 129.0, "completions/min_terminated_length": 129.0, "epoch": 0.4409448818897638, "grad_norm": 0.14523477852344513, "learning_rate": 1e-06, "loss": 0.0007, "num_tokens": 27807115.0, "reward": 0.5602678656578064, "reward_std": 0.17862920463085175, "rewards/verify_math_reward/mean": 0.5602678656578064, "rewards/verify_math_reward/std": 0.4966317415237427, "step": 189 }, { "clip_ratio/high_max": 0.002536843756388407, "clip_ratio/high_mean": 0.0009017960110213608, "clip_ratio/low_mean": 0.000687601628669654, "clip_ratio/low_min": 3.680522604554426e-05, "clip_ratio/region_mean": 0.0015893976706138346, "epoch": 0.4432779235928842, "grad_norm": 0.13064271211624146, "learning_rate": 1e-06, "loss": 0.0007, "step": 190 }, { "clip_ratio/high_max": 0.0025679940008558333, "clip_ratio/high_mean": 0.0009879293429548852, "clip_ratio/low_mean": 0.000839366370200878, "clip_ratio/low_min": 5.396260712586809e-05, "clip_ratio/region_mean": 0.0018272957313456573, "epoch": 0.4456109652960047, "grad_norm": 0.12030471116304398, "learning_rate": 1e-06, "loss": 0.0005, "step": 191 }, { "clip_ratio/high_max": 0.0034528510877862573, "clip_ratio/high_mean": 0.0012927373718412127, "clip_ratio/low_mean": 0.0012951187600265257, "clip_ratio/low_min": 0.000124797576063429, "clip_ratio/region_mean": 0.002587856120953802, "epoch": 0.4479440069991251, "grad_norm": 0.11014598608016968, "learning_rate": 1e-06, "loss": 0.0001, "step": 192 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 2771.0, "completions/mean_length": 613.466552734375, "completions/mean_terminated_length": 562.1947631835938, "completions/min_length": 178.0, "completions/min_terminated_length": 178.0, "epoch": 0.45027704870224555, "grad_norm": 0.16899962723255157, "learning_rate": 1e-06, "loss": 0.0221, "num_tokens": 28386549.0, "reward": 0.5267857313156128, "reward_std": 0.20741882920265198, "rewards/verify_math_reward/mean": 0.5267857313156128, "rewards/verify_math_reward/std": 0.4995608627796173, "step": 193 }, { "clip_ratio/high_max": 0.0021832726124557666, "clip_ratio/high_mean": 0.000943538887440809, "clip_ratio/low_mean": 0.0007493491175409872, "clip_ratio/low_min": 4.98210902151186e-05, "clip_ratio/region_mean": 0.0016928880068007857, "epoch": 0.452610090405366, "grad_norm": 0.14885267615318298, "learning_rate": 1e-06, "loss": 0.0221, "step": 194 }, { "clip_ratio/high_max": 0.002298388833878562, "clip_ratio/high_mean": 0.001078218931070296, "clip_ratio/low_mean": 0.0009705499433039222, "clip_ratio/low_min": 3.285122511442751e-05, "clip_ratio/region_mean": 0.0020487688525463454, "epoch": 0.4549431321084864, "grad_norm": 0.13287481665611267, "learning_rate": 1e-06, "loss": 0.0218, "step": 195 }, { "clip_ratio/high_max": 0.003459480925812386, "clip_ratio/high_mean": 0.0014615041327488143, "clip_ratio/low_mean": 0.001427257404429838, "clip_ratio/low_min": 8.665129826113116e-05, "clip_ratio/region_mean": 0.0028887615189887583, "epoch": 0.4572761738116069, "grad_norm": 0.11544232070446014, "learning_rate": 1e-06, "loss": 0.0214, "step": 196 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0424107142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 4034.0, "completions/mean_length": 688.0892944335938, "completions/mean_terminated_length": 537.1561889648438, "completions/min_length": 148.0, "completions/min_terminated_length": 148.0, "epoch": 0.45960921551472733, "grad_norm": 0.16457441449165344, "learning_rate": 1e-06, "loss": -0.0044, "num_tokens": 28949965.0, "reward": 0.5647321939468384, "reward_std": 0.21653950214385986, "rewards/verify_math_reward/mean": 0.5647321343421936, "rewards/verify_math_reward/std": 0.49606895446777344, "step": 197 }, { "clip_ratio/high_max": 0.0026144549265154637, "clip_ratio/high_mean": 0.001071675491402857, "clip_ratio/low_mean": 0.0007388635312963743, "clip_ratio/low_min": 1.6477722965646535e-05, "clip_ratio/region_mean": 0.0018105390336131677, "epoch": 0.46194225721784776, "grad_norm": 0.14192867279052734, "learning_rate": 1e-06, "loss": -0.0045, "step": 198 }, { "clip_ratio/high_max": 0.0031330378478742205, "clip_ratio/high_mean": 0.0013363244179345202, "clip_ratio/low_mean": 0.0010993410069204401, "clip_ratio/low_min": 8.756616261962336e-05, "clip_ratio/region_mean": 0.002435665468510706, "epoch": 0.4642752989209682, "grad_norm": 0.13264091312885284, "learning_rate": 1e-06, "loss": -0.0048, "step": 199 }, { "clip_ratio/high_max": 0.004000759348855354, "clip_ratio/high_mean": 0.0017286303773289546, "clip_ratio/low_mean": 0.0015543950758001301, "clip_ratio/low_min": 0.00011771431582019432, "clip_ratio/region_mean": 0.0032830254785949364, "epoch": 0.4666083406240887, "grad_norm": 0.11908677220344543, "learning_rate": 1e-06, "loss": -0.0051, "step": 200 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0279017857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 2986.0, "completions/mean_length": 678.1529541015625, "completions/mean_terminated_length": 580.0516357421875, "completions/min_length": 132.0, "completions/min_terminated_length": 132.0, "epoch": 0.4689413823272091, "grad_norm": 0.14772479236125946, "learning_rate": 1e-06, "loss": 0.0036, "num_tokens": 29536398.0, "reward": 0.613839328289032, "reward_std": 0.2103448063135147, "rewards/verify_math_reward/mean": 0.6138392686843872, "rewards/verify_math_reward/std": 0.48714008927345276, "step": 201 }, { "clip_ratio/high_max": 0.0022748478877474554, "clip_ratio/high_mean": 0.0009544536587782204, "clip_ratio/low_mean": 0.0008078781611402519, "clip_ratio/low_min": 4.810427526535932e-05, "clip_ratio/region_mean": 0.001762331805366557, "epoch": 0.47127442403032954, "grad_norm": 0.13459992408752441, "learning_rate": 1e-06, "loss": 0.0036, "step": 202 }, { "clip_ratio/high_max": 0.0029411555660772137, "clip_ratio/high_mean": 0.0012017566041322425, "clip_ratio/low_mean": 0.0009390130981046241, "clip_ratio/low_min": 2.4513466996722855e-05, "clip_ratio/region_mean": 0.0021407697204267606, "epoch": 0.47360746573345, "grad_norm": 0.12351057678461075, "learning_rate": 1e-06, "loss": 0.0033, "step": 203 }, { "clip_ratio/high_max": 0.004078905949427281, "clip_ratio/high_mean": 0.001624068543605972, "clip_ratio/low_mean": 0.0014257618931878824, "clip_ratio/low_min": 2.8877647309855092e-05, "clip_ratio/region_mean": 0.0030498303822241724, "epoch": 0.4759405074365704, "grad_norm": 0.11329270899295807, "learning_rate": 1e-06, "loss": 0.0029, "step": 204 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.03125, "completions/max_length": 4096.0, "completions/max_terminated_length": 2544.0, "completions/mean_length": 645.5904541015625, "completions/mean_terminated_length": 534.286865234375, "completions/min_length": 141.0, "completions/min_terminated_length": 141.0, "epoch": 0.4782735491396909, "grad_norm": 0.16950026154518127, "learning_rate": 1e-06, "loss": -0.003, "num_tokens": 30079847.0, "reward": 0.6495535969734192, "reward_std": 0.18825094401836395, "rewards/verify_math_reward/mean": 0.6495535969734192, "rewards/verify_math_reward/std": 0.477376252412796, "step": 205 }, { "clip_ratio/high_max": 0.002494868869689526, "clip_ratio/high_mean": 0.0010776024173537735, "clip_ratio/low_mean": 0.0005955488450126722, "clip_ratio/low_min": 2.6778063329402357e-05, "clip_ratio/region_mean": 0.0016731512223486789, "epoch": 0.4806065908428113, "grad_norm": 0.13992731273174286, "learning_rate": 1e-06, "loss": -0.003, "step": 206 }, { "clip_ratio/high_max": 0.0030345621598826256, "clip_ratio/high_mean": 0.0013264883800729876, "clip_ratio/low_mean": 0.0008921740100049647, "clip_ratio/low_min": 2.4022096113185398e-05, "clip_ratio/region_mean": 0.0022186624046298675, "epoch": 0.48293963254593175, "grad_norm": 0.12514518201351166, "learning_rate": 1e-06, "loss": -0.0033, "step": 207 }, { "clip_ratio/high_max": 0.003879493298882153, "clip_ratio/high_mean": 0.001654574618441984, "clip_ratio/low_mean": 0.0011969509469054174, "clip_ratio/low_min": 6.202638087415835e-05, "clip_ratio/region_mean": 0.002851525481673889, "epoch": 0.4852726742490522, "grad_norm": 0.11105724424123764, "learning_rate": 1e-06, "loss": -0.0036, "step": 208 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0200892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3985.0, "completions/mean_length": 618.458740234375, "completions/mean_terminated_length": 547.1651611328125, "completions/min_length": 137.0, "completions/min_terminated_length": 137.0, "epoch": 0.48760571595217267, "grad_norm": 0.1467469334602356, "learning_rate": 1e-06, "loss": 0.0117, "num_tokens": 30646010.0, "reward": 0.566964328289032, "reward_std": 0.17603449523448944, "rewards/verify_math_reward/mean": 0.5669642686843872, "rewards/verify_math_reward/std": 0.49577224254608154, "step": 209 }, { "clip_ratio/high_max": 0.0019824349583359435, "clip_ratio/high_mean": 0.0007825659631635062, "clip_ratio/low_mean": 0.0006163101461424958, "clip_ratio/low_min": 2.2776968762627803e-05, "clip_ratio/region_mean": 0.0013988761129439808, "epoch": 0.4899387576552931, "grad_norm": 0.13121576607227325, "learning_rate": 1e-06, "loss": 0.0117, "step": 210 }, { "clip_ratio/high_max": 0.002819302742864238, "clip_ratio/high_mean": 0.0009831107927311677, "clip_ratio/low_mean": 0.0009618357726139948, "clip_ratio/low_min": 6.817784742452204e-05, "clip_ratio/region_mean": 0.0019449465908110142, "epoch": 0.49227179935841353, "grad_norm": 0.11605199426412582, "learning_rate": 1e-06, "loss": 0.0114, "step": 211 }, { "clip_ratio/high_max": 0.0034392545494483784, "clip_ratio/high_mean": 0.0012148520327173173, "clip_ratio/low_mean": 0.0012521228272817098, "clip_ratio/low_min": 3.9499698686995544e-05, "clip_ratio/region_mean": 0.0024669748381711543, "epoch": 0.49460484106153396, "grad_norm": 0.10615548491477966, "learning_rate": 1e-06, "loss": 0.0111, "step": 212 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0189732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 2979.0, "completions/mean_length": 552.8460083007812, "completions/mean_terminated_length": 484.32080078125, "completions/min_length": 117.0, "completions/min_terminated_length": 117.0, "epoch": 0.4969378827646544, "grad_norm": 0.14312267303466797, "learning_rate": 1e-06, "loss": 0.0052, "num_tokens": 31161568.0, "reward": 0.6183035969734192, "reward_std": 0.16085998713970184, "rewards/verify_math_reward/mean": 0.6183035969734192, "rewards/verify_math_reward/std": 0.4860740303993225, "step": 213 }, { "clip_ratio/high_max": 0.001930233665916603, "clip_ratio/high_mean": 0.0007001064768701326, "clip_ratio/low_mean": 0.0005967966108073597, "clip_ratio/low_min": 1.752664138621185e-05, "clip_ratio/region_mean": 0.0012969030649401248, "epoch": 0.4992709244677749, "grad_norm": 0.12775717675685883, "learning_rate": 1e-06, "loss": 0.0051, "step": 214 }, { "clip_ratio/high_max": 0.002679873548913747, "clip_ratio/high_mean": 0.0009508538278168999, "clip_ratio/low_mean": 0.0008621784945717081, "clip_ratio/low_min": 6.141947051219177e-05, "clip_ratio/region_mean": 0.0018130323151126504, "epoch": 0.5016039661708953, "grad_norm": 0.11602197587490082, "learning_rate": 1e-06, "loss": 0.0049, "step": 215 }, { "clip_ratio/high_max": 0.0031761801001266576, "clip_ratio/high_mean": 0.0012458944001991767, "clip_ratio/low_mean": 0.0012328168540989282, "clip_ratio/low_min": 7.42700467526447e-05, "clip_ratio/region_mean": 0.0024787111760815606, "epoch": 0.5039370078740157, "grad_norm": 0.10054956376552582, "learning_rate": 1e-06, "loss": 0.0046, "step": 216 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 2.2940832877793582e-06, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 2.2940832877793582e-06, "completions/clipped_ratio": 0.0200892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3345.0, "completions/mean_length": 580.234375, "completions/mean_terminated_length": 508.1571960449219, "completions/min_length": 135.0, "completions/min_terminated_length": 135.0, "epoch": 0.5062700495771362, "grad_norm": 0.16067731380462646, "learning_rate": 1e-06, "loss": -0.0102, "num_tokens": 31702122.0, "reward": 0.6283482313156128, "reward_std": 0.16698868572711945, "rewards/verify_math_reward/mean": 0.6283482313156128, "rewards/verify_math_reward/std": 0.4835159480571747, "step": 217 }, { "clip_ratio/high_max": 0.0023931159084895626, "clip_ratio/high_mean": 0.0008846223918226315, "clip_ratio/low_mean": 0.0006512777472380549, "clip_ratio/low_min": 1.190249440696789e-05, "clip_ratio/region_mean": 0.001535900115413824, "epoch": 0.5086030912802566, "grad_norm": 0.13949407637119293, "learning_rate": 1e-06, "loss": -0.0102, "step": 218 }, { "clip_ratio/high_max": 0.0031154281641647685, "clip_ratio/high_mean": 0.0010555867302173283, "clip_ratio/low_mean": 0.0007881471728978795, "clip_ratio/low_min": 2.795695127133513e-05, "clip_ratio/region_mean": 0.0018437338949297555, "epoch": 0.510936132983377, "grad_norm": 0.12435499578714371, "learning_rate": 1e-06, "loss": -0.0105, "step": 219 }, { "clip_ratio/high_max": 0.003920399438356981, "clip_ratio/high_mean": 0.0014925948635209352, "clip_ratio/low_mean": 0.0012264092274563154, "clip_ratio/low_min": 5.052519918535836e-05, "clip_ratio/region_mean": 0.0027190040491404943, "epoch": 0.5132691746864976, "grad_norm": 0.10690676420927048, "learning_rate": 1e-06, "loss": -0.0108, "step": 220 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.021205357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3486.0, "completions/mean_length": 620.1886596679688, "completions/mean_terminated_length": 544.885986328125, "completions/min_length": 96.0, "completions/min_terminated_length": 96.0, "epoch": 0.515602216389618, "grad_norm": 0.14436660706996918, "learning_rate": 1e-06, "loss": -0.0029, "num_tokens": 32271395.0, "reward": 0.535714328289032, "reward_std": 0.17908427119255066, "rewards/verify_math_reward/mean": 0.5357142686843872, "rewards/verify_math_reward/std": 0.4990014135837555, "step": 221 }, { "clip_ratio/high_max": 0.0018413367215543985, "clip_ratio/high_mean": 0.000700404209055705, "clip_ratio/low_mean": 0.0007181626806413988, "clip_ratio/low_min": 4.810638256458333e-05, "clip_ratio/region_mean": 0.0014185668660502415, "epoch": 0.5179352580927384, "grad_norm": 0.13295628130435944, "learning_rate": 1e-06, "loss": -0.0029, "step": 222 }, { "clip_ratio/high_max": 0.002294433375936933, "clip_ratio/high_mean": 0.0009388713660882786, "clip_ratio/low_mean": 0.0010490784115972929, "clip_ratio/low_min": 9.935012712958269e-05, "clip_ratio/region_mean": 0.0019879498358932324, "epoch": 0.5202682997958589, "grad_norm": 0.11821199953556061, "learning_rate": 1e-06, "loss": -0.0032, "step": 223 }, { "clip_ratio/high_max": 0.0028982906878809445, "clip_ratio/high_mean": 0.001236619966221042, "clip_ratio/low_mean": 0.001366804131976096, "clip_ratio/low_min": 0.0001511597329226788, "clip_ratio/region_mean": 0.0026034241091110744, "epoch": 0.5226013414989793, "grad_norm": 0.10926952213048935, "learning_rate": 1e-06, "loss": -0.0035, "step": 224 }, { "clip_ratio/high_max": 2.6997840905096382e-05, "clip_ratio/high_mean": 3.856834609905491e-06, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 3.856834609905491e-06, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 2862.0, "completions/mean_length": 584.2824096679688, "completions/mean_terminated_length": 524.4915161132812, "completions/min_length": 145.0, "completions/min_terminated_length": 145.0, "epoch": 0.5249343832020997, "grad_norm": 0.15630322694778442, "learning_rate": 1e-06, "loss": 0.0097, "num_tokens": 32820224.0, "reward": 0.5959821939468384, "reward_std": 0.1813836246728897, "rewards/verify_math_reward/mean": 0.5959821343421936, "rewards/verify_math_reward/std": 0.490975022315979, "step": 225 }, { "clip_ratio/high_max": 0.0024520766564819496, "clip_ratio/high_mean": 0.0008743142379898927, "clip_ratio/low_mean": 0.0007158357511798386, "clip_ratio/low_min": 1.546838211652357e-05, "clip_ratio/region_mean": 0.0015901499937172048, "epoch": 0.5272674249052202, "grad_norm": 0.1373729407787323, "learning_rate": 1e-06, "loss": 0.0097, "step": 226 }, { "clip_ratio/high_max": 0.0030381141914403997, "clip_ratio/high_mean": 0.001117759390581341, "clip_ratio/low_mean": 0.0010270871243847068, "clip_ratio/low_min": 6.314043093880173e-05, "clip_ratio/region_mean": 0.002144846504961606, "epoch": 0.5296004666083406, "grad_norm": 0.12113391607999802, "learning_rate": 1e-06, "loss": 0.0094, "step": 227 }, { "clip_ratio/high_max": 0.004030859130580211, "clip_ratio/high_mean": 0.0014508364347420866, "clip_ratio/low_mean": 0.0013528384406527039, "clip_ratio/low_min": 7.481788270524703e-05, "clip_ratio/region_mean": 0.002803674797178246, "epoch": 0.531933508311461, "grad_norm": 0.10859699547290802, "learning_rate": 1e-06, "loss": 0.0091, "step": 228 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 4053.0, "completions/mean_length": 621.794677734375, "completions/mean_terminated_length": 566.6485595703125, "completions/min_length": 154.0, "completions/min_terminated_length": 154.0, "epoch": 0.5342665500145816, "grad_norm": 0.14967618882656097, "learning_rate": 1e-06, "loss": 0.0213, "num_tokens": 33409872.0, "reward": 0.59375, "reward_std": 0.18276232481002808, "rewards/verify_math_reward/mean": 0.59375, "rewards/verify_math_reward/std": 0.4914066195487976, "step": 229 }, { "clip_ratio/high_max": 0.0021894660385441966, "clip_ratio/high_mean": 0.000889429200469749, "clip_ratio/low_mean": 0.0006891751636430854, "clip_ratio/low_min": 3.910828672815114e-05, "clip_ratio/region_mean": 0.0015786043222760782, "epoch": 0.536599591717702, "grad_norm": 0.13102629780769348, "learning_rate": 1e-06, "loss": 0.0212, "step": 230 }, { "clip_ratio/high_max": 0.002897620084695518, "clip_ratio/high_mean": 0.0011092502063547727, "clip_ratio/low_mean": 0.0009412796280230395, "clip_ratio/low_min": 7.856086449464783e-05, "clip_ratio/region_mean": 0.0020505298307398334, "epoch": 0.5389326334208224, "grad_norm": 0.11863705515861511, "learning_rate": 1e-06, "loss": 0.021, "step": 231 }, { "clip_ratio/high_max": 0.003870700835250318, "clip_ratio/high_mean": 0.0014126963360467926, "clip_ratio/low_mean": 0.0014111116433923598, "clip_ratio/low_min": 8.784459350863472e-05, "clip_ratio/region_mean": 0.0028238079394213855, "epoch": 0.5412656751239429, "grad_norm": 0.10564436763525009, "learning_rate": 1e-06, "loss": 0.0207, "step": 232 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.033482142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 4020.0, "completions/mean_length": 680.005615234375, "completions/mean_terminated_length": 561.6685791015625, "completions/min_length": 157.0, "completions/min_terminated_length": 157.0, "epoch": 0.5435987168270633, "grad_norm": 0.14303980767726898, "learning_rate": 1e-06, "loss": -0.0035, "num_tokens": 33990421.0, "reward": 0.6171875, "reward_std": 0.14526040852069855, "rewards/verify_math_reward/mean": 0.6171875, "rewards/verify_math_reward/std": 0.4863446056842804, "step": 233 }, { "clip_ratio/high_max": 0.002403839083854109, "clip_ratio/high_mean": 0.0008075324421952246, "clip_ratio/low_mean": 0.0007027864303381648, "clip_ratio/low_min": 2.497197965567466e-05, "clip_ratio/region_mean": 0.001510318907094188, "epoch": 0.5459317585301837, "grad_norm": 0.12936359643936157, "learning_rate": 1e-06, "loss": -0.0036, "step": 234 }, { "clip_ratio/high_max": 0.002902611973695457, "clip_ratio/high_mean": 0.0009870733229035977, "clip_ratio/low_mean": 0.0008301318594021723, "clip_ratio/low_min": 7.483875015168451e-05, "clip_ratio/region_mean": 0.0018172051277360879, "epoch": 0.5482648002333042, "grad_norm": 0.11433450132608414, "learning_rate": 1e-06, "loss": -0.0038, "step": 235 }, { "clip_ratio/high_max": 0.003531443973770365, "clip_ratio/high_mean": 0.0012489543696574401, "clip_ratio/low_mean": 0.001265525548660662, "clip_ratio/low_min": 8.160452671290841e-05, "clip_ratio/region_mean": 0.0025144799728877842, "epoch": 0.5505978419364246, "grad_norm": 0.10131219029426575, "learning_rate": 1e-06, "loss": -0.0041, "step": 236 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 3854.0, "completions/mean_length": 575.8638916015625, "completions/mean_terminated_length": 532.1107177734375, "completions/min_length": 154.0, "completions/min_terminated_length": 154.0, "epoch": 0.552930883639545, "grad_norm": 0.15133512020111084, "learning_rate": 1e-06, "loss": -0.0061, "num_tokens": 34546347.0, "reward": 0.625, "reward_std": 0.17123128473758698, "rewards/verify_math_reward/mean": 0.625, "rewards/verify_math_reward/std": 0.48439329862594604, "step": 237 }, { "clip_ratio/high_max": 0.0025426567590329796, "clip_ratio/high_mean": 0.0009504907357040793, "clip_ratio/low_mean": 0.0006004292363286368, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015509199402004015, "epoch": 0.5552639253426656, "grad_norm": 0.14628814160823822, "learning_rate": 1e-06, "loss": -0.0062, "step": 238 }, { "clip_ratio/high_max": 0.0030756705527892336, "clip_ratio/high_mean": 0.0012032609047309961, "clip_ratio/low_mean": 0.0007964599744809675, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0019997209310531616, "epoch": 0.557596967045786, "grad_norm": 0.11781806498765945, "learning_rate": 1e-06, "loss": -0.0065, "step": 239 }, { "clip_ratio/high_max": 0.003950161975808442, "clip_ratio/high_mean": 0.001592805034306366, "clip_ratio/low_mean": 0.0011004912985299597, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0026932962791761383, "epoch": 0.5599300087489064, "grad_norm": 0.10449231415987015, "learning_rate": 1e-06, "loss": -0.0067, "step": 240 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.03125, "completions/max_length": 4096.0, "completions/max_terminated_length": 2426.0, "completions/mean_length": 705.5402221679688, "completions/mean_terminated_length": 596.1705322265625, "completions/min_length": 162.0, "completions/min_terminated_length": 162.0, "epoch": 0.5622630504520268, "grad_norm": 0.16208094358444214, "learning_rate": 1e-06, "loss": 0.0019, "num_tokens": 35147847.0, "reward": 0.5245535969734192, "reward_std": 0.20383702218532562, "rewards/verify_math_reward/mean": 0.5245535969734192, "rewards/verify_math_reward/std": 0.4996756613254547, "step": 241 }, { "clip_ratio/high_max": 0.0022426483265007846, "clip_ratio/high_mean": 0.000893475538759958, "clip_ratio/low_mean": 0.0008003952698345529, "clip_ratio/low_min": 1.0061172361019999e-05, "clip_ratio/region_mean": 0.0016938708213274367, "epoch": 0.5645960921551473, "grad_norm": 0.13810959458351135, "learning_rate": 1e-06, "loss": 0.0019, "step": 242 }, { "clip_ratio/high_max": 0.002830511519277934, "clip_ratio/high_mean": 0.0011773946753237396, "clip_ratio/low_mean": 0.00107185721208225, "clip_ratio/low_min": 4.4980526581639424e-05, "clip_ratio/region_mean": 0.0022492518546641804, "epoch": 0.5669291338582677, "grad_norm": 0.12350639700889587, "learning_rate": 1e-06, "loss": 0.0016, "step": 243 }, { "clip_ratio/high_max": 0.0038162424680194817, "clip_ratio/high_mean": 0.0015512412101088557, "clip_ratio/low_mean": 0.0014474375966528896, "clip_ratio/low_min": 4.4888789489050396e-05, "clip_ratio/region_mean": 0.002998678835865576, "epoch": 0.5692621755613881, "grad_norm": 0.11295558512210846, "learning_rate": 1e-06, "loss": 0.0013, "step": 244 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0279017857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 2567.0, "completions/mean_length": 705.1217041015625, "completions/mean_terminated_length": 607.7944946289062, "completions/min_length": 158.0, "completions/min_terminated_length": 158.0, "epoch": 0.5715952172645086, "grad_norm": 0.16315478086471558, "learning_rate": 1e-06, "loss": 0.0065, "num_tokens": 35761204.0, "reward": 0.5680803656578064, "reward_std": 0.21933035552501678, "rewards/verify_math_reward/mean": 0.5680803656578064, "rewards/verify_math_reward/std": 0.4956200420856476, "step": 245 }, { "clip_ratio/high_max": 0.0025865714414976537, "clip_ratio/high_mean": 0.001123923146224115, "clip_ratio/low_mean": 0.0007891316436143825, "clip_ratio/low_min": 0.00012344556944299256, "clip_ratio/region_mean": 0.0019130547880195081, "epoch": 0.573928258967629, "grad_norm": 0.14528301358222961, "learning_rate": 1e-06, "loss": 0.0064, "step": 246 }, { "clip_ratio/high_max": 0.0032094203561428003, "clip_ratio/high_mean": 0.001431637654604856, "clip_ratio/low_mean": 0.0011640641350822989, "clip_ratio/low_min": 0.00012920122571813408, "clip_ratio/region_mean": 0.0025957018006010912, "epoch": 0.5762613006707495, "grad_norm": 0.12969298660755157, "learning_rate": 1e-06, "loss": 0.0061, "step": 247 }, { "clip_ratio/high_max": 0.0039793891774024814, "clip_ratio/high_mean": 0.0017132762768596876, "clip_ratio/low_mean": 0.0014909405435901135, "clip_ratio/low_min": 0.00012393450197123457, "clip_ratio/region_mean": 0.0032042168750194833, "epoch": 0.57859434237387, "grad_norm": 0.11695972830057144, "learning_rate": 1e-06, "loss": 0.0057, "step": 248 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0279017857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 4007.0, "completions/mean_length": 656.4866333007812, "completions/mean_terminated_length": 557.7634887695312, "completions/min_length": 110.0, "completions/min_terminated_length": 110.0, "epoch": 0.5809273840769904, "grad_norm": 0.16558833420276642, "learning_rate": 1e-06, "loss": -0.004, "num_tokens": 36325672.0, "reward": 0.6082589626312256, "reward_std": 0.20294757187366486, "rewards/verify_math_reward/mean": 0.6082589030265808, "rewards/verify_math_reward/std": 0.48841196298599243, "step": 249 }, { "clip_ratio/high_max": 0.0021300468943081796, "clip_ratio/high_mean": 0.0008074553225014824, "clip_ratio/low_mean": 0.0008462588775728364, "clip_ratio/low_min": 5.5244285249500535e-05, "clip_ratio/region_mean": 0.0016537142291781493, "epoch": 0.5832604257801108, "grad_norm": 0.1434733271598816, "learning_rate": 1e-06, "loss": -0.0041, "step": 250 }, { "clip_ratio/high_max": 0.003007978230016306, "clip_ratio/high_mean": 0.001103335504012648, "clip_ratio/low_mean": 0.0011921950463147368, "clip_ratio/low_min": 7.098777041392168e-05, "clip_ratio/region_mean": 0.0022955305030336604, "epoch": 0.5855934674832313, "grad_norm": 0.12906546890735626, "learning_rate": 1e-06, "loss": -0.0044, "step": 251 }, { "clip_ratio/high_max": 0.004007178213214502, "clip_ratio/high_mean": 0.001521322785265511, "clip_ratio/low_mean": 0.00167533941930742, "clip_ratio/low_min": 0.00018801092483045068, "clip_ratio/region_mean": 0.0031966622918844223, "epoch": 0.5879265091863517, "grad_norm": 0.11505523324012756, "learning_rate": 1e-06, "loss": -0.0048, "step": 252 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0200892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3404.0, "completions/mean_length": 662.208740234375, "completions/mean_terminated_length": 591.8120727539062, "completions/min_length": 118.0, "completions/min_terminated_length": 118.0, "epoch": 0.5902595508894721, "grad_norm": 0.17536237835884094, "learning_rate": 1e-06, "loss": 0.0044, "num_tokens": 36935051.0, "reward": 0.640625, "reward_std": 0.20182032883167267, "rewards/verify_math_reward/mean": 0.640625, "rewards/verify_math_reward/std": 0.48008525371551514, "step": 253 }, { "clip_ratio/high_max": 0.0026318365416955203, "clip_ratio/high_mean": 0.0010733067902037874, "clip_ratio/low_mean": 0.0009032445177581394, "clip_ratio/low_min": 8.841509679768933e-05, "clip_ratio/region_mean": 0.0019765513206948526, "epoch": 0.5925925925925926, "grad_norm": 0.15598393976688385, "learning_rate": 1e-06, "loss": 0.0043, "step": 254 }, { "clip_ratio/high_max": 0.00311816057364922, "clip_ratio/high_mean": 0.0013290793176565785, "clip_ratio/low_mean": 0.0011494629143271595, "clip_ratio/low_min": 0.0001635311473364709, "clip_ratio/region_mean": 0.002478542235621717, "epoch": 0.594925634295713, "grad_norm": 0.14001339673995972, "learning_rate": 1e-06, "loss": 0.004, "step": 255 }, { "clip_ratio/high_max": 0.00431459017272573, "clip_ratio/high_mean": 0.0017310104449279606, "clip_ratio/low_mean": 0.0015786569674673956, "clip_ratio/low_min": 0.0001399989560013637, "clip_ratio/region_mean": 0.0033096673651016317, "epoch": 0.5972586759988335, "grad_norm": 0.12211822718381882, "learning_rate": 1e-06, "loss": 0.0036, "step": 256 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.024553571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3997.0, "completions/mean_length": 686.5670166015625, "completions/mean_terminated_length": 600.7459716796875, "completions/min_length": 117.0, "completions/min_terminated_length": 117.0, "epoch": 0.599591717701954, "grad_norm": 0.15929347276687622, "learning_rate": 1e-06, "loss": 0.0039, "num_tokens": 37557783.0, "reward": 0.5412946939468384, "reward_std": 0.17543786764144897, "rewards/verify_math_reward/mean": 0.5412946343421936, "rewards/verify_math_reward/std": 0.49857014417648315, "step": 257 }, { "clip_ratio/high_max": 0.0017837136583693791, "clip_ratio/high_mean": 0.0007377745587291429, "clip_ratio/low_mean": 0.0007231112522276817, "clip_ratio/low_min": 3.702050798892742e-05, "clip_ratio/region_mean": 0.0014608858255087398, "epoch": 0.6019247594050744, "grad_norm": 0.13033680617809296, "learning_rate": 1e-06, "loss": 0.0039, "step": 258 }, { "clip_ratio/high_max": 0.0026826358007383533, "clip_ratio/high_mean": 0.001005907226499403, "clip_ratio/low_mean": 0.001006415030133212, "clip_ratio/low_min": 7.075888970575761e-05, "clip_ratio/region_mean": 0.0020123221911489964, "epoch": 0.6042578011081948, "grad_norm": 0.11488169431686401, "learning_rate": 1e-06, "loss": 0.0036, "step": 259 }, { "clip_ratio/high_max": 0.0031983513908926398, "clip_ratio/high_mean": 0.0012024563384329667, "clip_ratio/low_mean": 0.001424646754458081, "clip_ratio/low_min": 8.933505796449026e-05, "clip_ratio/region_mean": 0.002627103131089825, "epoch": 0.6065908428113153, "grad_norm": 0.10682583600282669, "learning_rate": 1e-06, "loss": 0.0034, "step": 260 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3707.0, "completions/mean_length": 586.6596069335938, "completions/mean_terminated_length": 522.8533935546875, "completions/min_length": 139.0, "completions/min_terminated_length": 139.0, "epoch": 0.6089238845144357, "grad_norm": 0.14499174058437347, "learning_rate": 1e-06, "loss": -0.0064, "num_tokens": 38116430.0, "reward": 0.6037946939468384, "reward_std": 0.15454471111297607, "rewards/verify_math_reward/mean": 0.6037946343421936, "rewards/verify_math_reward/std": 0.48938122391700745, "step": 261 }, { "clip_ratio/high_max": 0.0017958663956960663, "clip_ratio/high_mean": 0.0007177433399192523, "clip_ratio/low_mean": 0.0005991962339066959, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013169395833756425, "epoch": 0.6112569262175561, "grad_norm": 0.12276256829500198, "learning_rate": 1e-06, "loss": -0.0064, "step": 262 }, { "clip_ratio/high_max": 0.002295003483595792, "clip_ratio/high_mean": 0.0009093388689507265, "clip_ratio/low_mean": 0.0008009142634364252, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0017102531437558355, "epoch": 0.6135899679206765, "grad_norm": 0.11274917423725128, "learning_rate": 1e-06, "loss": -0.0067, "step": 263 }, { "clip_ratio/high_max": 0.0030376613067346625, "clip_ratio/high_mean": 0.0012121305335313082, "clip_ratio/low_mean": 0.0012270758952581673, "clip_ratio/low_min": 1.1895698662556242e-05, "clip_ratio/region_mean": 0.0024392064951825887, "epoch": 0.615923009623797, "grad_norm": 0.09926076233386993, "learning_rate": 1e-06, "loss": -0.0069, "step": 264 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.025669642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3466.0, "completions/mean_length": 686.325927734375, "completions/mean_terminated_length": 596.494873046875, "completions/min_length": 71.0, "completions/min_terminated_length": 71.0, "epoch": 0.6182560513269175, "grad_norm": 0.14685268700122833, "learning_rate": 1e-06, "loss": -0.0065, "num_tokens": 38732842.0, "reward": 0.590401828289032, "reward_std": 0.18922480940818787, "rewards/verify_math_reward/mean": 0.5904017686843872, "rewards/verify_math_reward/std": 0.49203425645828247, "step": 265 }, { "clip_ratio/high_max": 0.0022309156411211006, "clip_ratio/high_mean": 0.0009133751191257033, "clip_ratio/low_mean": 0.0007022889130894328, "clip_ratio/low_min": 4.1881876313709654e-05, "clip_ratio/region_mean": 0.0016156640122062527, "epoch": 0.620589093030038, "grad_norm": 0.13202345371246338, "learning_rate": 1e-06, "loss": -0.0066, "step": 266 }, { "clip_ratio/high_max": 0.0026844531093956903, "clip_ratio/high_mean": 0.0010849519239854999, "clip_ratio/low_mean": 0.0009730173806019593, "clip_ratio/low_min": 0.00012092160613974556, "clip_ratio/region_mean": 0.0020579692936735228, "epoch": 0.6229221347331584, "grad_norm": 0.11852691322565079, "learning_rate": 1e-06, "loss": -0.0068, "step": 267 }, { "clip_ratio/high_max": 0.0037240810197545215, "clip_ratio/high_mean": 0.0015229464188450947, "clip_ratio/low_mean": 0.0013412464832072146, "clip_ratio/low_min": 7.539279431512114e-05, "clip_ratio/region_mean": 0.0028641928802244365, "epoch": 0.6252551764362788, "grad_norm": 0.10494676232337952, "learning_rate": 1e-06, "loss": -0.0071, "step": 268 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.029017857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 2752.0, "completions/mean_length": 655.0546875, "completions/mean_terminated_length": 552.2218627929688, "completions/min_length": 142.0, "completions/min_terminated_length": 142.0, "epoch": 0.6275882181393992, "grad_norm": 0.16782568395137787, "learning_rate": 1e-06, "loss": -0.0034, "num_tokens": 39296899.0, "reward": 0.590401828289032, "reward_std": 0.1635616570711136, "rewards/verify_math_reward/mean": 0.5904017686843872, "rewards/verify_math_reward/std": 0.49203425645828247, "step": 269 }, { "clip_ratio/high_max": 0.0023139839395298623, "clip_ratio/high_mean": 0.0010046616662293673, "clip_ratio/low_mean": 0.0008066119844443165, "clip_ratio/low_min": 6.332535849651322e-05, "clip_ratio/region_mean": 0.0018112735924660228, "epoch": 0.6299212598425197, "grad_norm": 0.145249143242836, "learning_rate": 1e-06, "loss": -0.0036, "step": 270 }, { "clip_ratio/high_max": 0.002705428567423951, "clip_ratio/high_mean": 0.0012561484472826123, "clip_ratio/low_mean": 0.0011260675437370082, "clip_ratio/low_min": 0.00010552876847214065, "clip_ratio/region_mean": 0.0023822160001145676, "epoch": 0.6322543015456401, "grad_norm": 0.12457164376974106, "learning_rate": 1e-06, "loss": -0.0039, "step": 271 }, { "clip_ratio/high_max": 0.003920303446648177, "clip_ratio/high_mean": 0.0017320304505119566, "clip_ratio/low_mean": 0.0015336115429818165, "clip_ratio/low_min": 0.00015255796824931167, "clip_ratio/region_mean": 0.0032656419643899426, "epoch": 0.6345873432487605, "grad_norm": 0.10782023519277573, "learning_rate": 1e-06, "loss": -0.0042, "step": 272 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.025669642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 2670.0, "completions/mean_length": 676.6864013671875, "completions/mean_terminated_length": 586.6013793945312, "completions/min_length": 146.0, "completions/min_terminated_length": 146.0, "epoch": 0.636920384951881, "grad_norm": 0.18795904517173767, "learning_rate": 1e-06, "loss": -0.0112, "num_tokens": 39899370.0, "reward": 0.5758928656578064, "reward_std": 0.23920251429080963, "rewards/verify_math_reward/mean": 0.5758928656578064, "rewards/verify_math_reward/std": 0.49448272585868835, "step": 273 }, { "clip_ratio/high_max": 0.002392474933003541, "clip_ratio/high_mean": 0.0010740737889136653, "clip_ratio/low_mean": 0.000922060600714758, "clip_ratio/low_min": 5.739963125961367e-05, "clip_ratio/region_mean": 0.0019961343714385293, "epoch": 0.6392534266550015, "grad_norm": 0.15884293615818024, "learning_rate": 1e-06, "loss": -0.0113, "step": 274 }, { "clip_ratio/high_max": 0.0033024962976924144, "clip_ratio/high_mean": 0.0014491367801383603, "clip_ratio/low_mean": 0.0013788306168862619, "clip_ratio/low_min": 0.0001610294502825127, "clip_ratio/region_mean": 0.0028279674297664315, "epoch": 0.6415864683581219, "grad_norm": 0.13772989809513092, "learning_rate": 1e-06, "loss": -0.0117, "step": 275 }, { "clip_ratio/high_max": 0.004242722163326107, "clip_ratio/high_mean": 0.0018608238315209746, "clip_ratio/low_mean": 0.0018821156627382152, "clip_ratio/low_min": 0.00011654702575469855, "clip_ratio/region_mean": 0.003742939486983232, "epoch": 0.6439195100612424, "grad_norm": 0.12209689617156982, "learning_rate": 1e-06, "loss": -0.012, "step": 276 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.024553571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 4048.0, "completions/mean_length": 682.8516235351562, "completions/mean_terminated_length": 596.9370727539062, "completions/min_length": 166.0, "completions/min_terminated_length": 166.0, "epoch": 0.6462525517643628, "grad_norm": 0.18276484310626984, "learning_rate": 1e-06, "loss": 0.0047, "num_tokens": 40511133.0, "reward": 0.5647321939468384, "reward_std": 0.21763674914836884, "rewards/verify_math_reward/mean": 0.5647321343421936, "rewards/verify_math_reward/std": 0.49606895446777344, "step": 277 }, { "clip_ratio/high_max": 0.0024693090599612333, "clip_ratio/high_mean": 0.0009735047569847666, "clip_ratio/low_mean": 0.000849773936351994, "clip_ratio/low_min": 2.1174477296881378e-05, "clip_ratio/region_mean": 0.0018232787115266547, "epoch": 0.6485855934674832, "grad_norm": 0.14702019095420837, "learning_rate": 1e-06, "loss": 0.0046, "step": 278 }, { "clip_ratio/high_max": 0.0032151059567695484, "clip_ratio/high_mean": 0.001306145290072891, "clip_ratio/low_mean": 0.0012530233143479563, "clip_ratio/low_min": 2.3376920580631122e-05, "clip_ratio/region_mean": 0.0025591686280677095, "epoch": 0.6509186351706037, "grad_norm": 0.12796492874622345, "learning_rate": 1e-06, "loss": 0.0042, "step": 279 }, { "clip_ratio/high_max": 0.0037803994564455934, "clip_ratio/high_mean": 0.0016056657404988073, "clip_ratio/low_mean": 0.001694289778242819, "clip_ratio/low_min": 6.637095884798327e-05, "clip_ratio/region_mean": 0.0032999554387060925, "epoch": 0.6532516768737241, "grad_norm": 0.11351709812879562, "learning_rate": 1e-06, "loss": 0.0039, "step": 280 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0279017857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3373.0, "completions/mean_length": 659.763427734375, "completions/mean_terminated_length": 561.1343383789062, "completions/min_length": 116.0, "completions/min_terminated_length": 116.0, "epoch": 0.6555847185768445, "grad_norm": 0.1638711541891098, "learning_rate": 1e-06, "loss": 0.0027, "num_tokens": 41099465.0, "reward": 0.5725446939468384, "reward_std": 0.17626525461673737, "rewards/verify_math_reward/mean": 0.5725446343421936, "rewards/verify_math_reward/std": 0.49498558044433594, "step": 281 }, { "clip_ratio/high_max": 0.002225904623628594, "clip_ratio/high_mean": 0.0008210257274186006, "clip_ratio/low_mean": 0.0009191711978928652, "clip_ratio/low_min": 9.336838957096916e-05, "clip_ratio/region_mean": 0.0017401969016646035, "epoch": 0.657917760279965, "grad_norm": 0.1425696760416031, "learning_rate": 1e-06, "loss": 0.0026, "step": 282 }, { "clip_ratio/high_max": 0.003090400030487217, "clip_ratio/high_mean": 0.0010694263291952666, "clip_ratio/low_mean": 0.0013161131428205408, "clip_ratio/low_min": 0.00013725754979532212, "clip_ratio/region_mean": 0.0023855394174461253, "epoch": 0.6602508019830855, "grad_norm": 0.12079791724681854, "learning_rate": 1e-06, "loss": 0.0023, "step": 283 }, { "clip_ratio/high_max": 0.004113614733796567, "clip_ratio/high_mean": 0.0014542791068379302, "clip_ratio/low_mean": 0.0017083044949686155, "clip_ratio/low_min": 0.00024681071863597026, "clip_ratio/region_mean": 0.0031625836272723973, "epoch": 0.6625838436862059, "grad_norm": 0.10929761826992035, "learning_rate": 1e-06, "loss": 0.002, "step": 284 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0267857142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3919.0, "completions/mean_length": 677.4765625, "completions/mean_terminated_length": 583.3887329101562, "completions/min_length": 121.0, "completions/min_terminated_length": 121.0, "epoch": 0.6649168853893264, "grad_norm": 0.17890630662441254, "learning_rate": 1e-06, "loss": 0.0026, "num_tokens": 41705620.0, "reward": 0.5379464626312256, "reward_std": 0.20737282931804657, "rewards/verify_math_reward/mean": 0.5379464030265808, "rewards/verify_math_reward/std": 0.4988364577293396, "step": 285 }, { "clip_ratio/high_max": 0.0026140181798837148, "clip_ratio/high_mean": 0.000979855209152447, "clip_ratio/low_mean": 0.0008280791980723734, "clip_ratio/low_min": 2.3373224394163117e-05, "clip_ratio/region_mean": 0.0018079344154102728, "epoch": 0.6672499270924468, "grad_norm": 0.15217658877372742, "learning_rate": 1e-06, "loss": 0.0025, "step": 286 }, { "clip_ratio/high_max": 0.003276656054367777, "clip_ratio/high_mean": 0.0013374851769185625, "clip_ratio/low_mean": 0.001169391109215212, "clip_ratio/low_min": 4.6746448788326234e-05, "clip_ratio/region_mean": 0.0025068763352464885, "epoch": 0.6695829687955672, "grad_norm": 0.13031181693077087, "learning_rate": 1e-06, "loss": 0.0021, "step": 287 }, { "clip_ratio/high_max": 0.004641034742235206, "clip_ratio/high_mean": 0.0018147563969250768, "clip_ratio/low_mean": 0.0018042969331872882, "clip_ratio/low_min": 0.00011910085959243588, "clip_ratio/region_mean": 0.003619053211878054, "epoch": 0.6719160104986877, "grad_norm": 0.11582513153553009, "learning_rate": 1e-06, "loss": 0.0017, "step": 288 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.029017857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3680.0, "completions/mean_length": 669.583740234375, "completions/mean_terminated_length": 567.18505859375, "completions/min_length": 107.0, "completions/min_terminated_length": 107.0, "epoch": 0.6742490522018081, "grad_norm": 0.16318561136722565, "learning_rate": 1e-06, "loss": 0.0019, "num_tokens": 42295847.0, "reward": 0.5870535969734192, "reward_std": 0.14537875354290009, "rewards/verify_math_reward/mean": 0.5870535969734192, "rewards/verify_math_reward/std": 0.49263837933540344, "step": 289 }, { "clip_ratio/high_max": 0.002245078496343922, "clip_ratio/high_mean": 0.0007360515937762102, "clip_ratio/low_mean": 0.0007909748765086988, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015270264921127819, "epoch": 0.6765820939049285, "grad_norm": 0.13285036385059357, "learning_rate": 1e-06, "loss": 0.0018, "step": 290 }, { "clip_ratio/high_max": 0.0030153739935485646, "clip_ratio/high_mean": 0.0009452611370761588, "clip_ratio/low_mean": 0.0009909664240694838, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001936227607075125, "epoch": 0.678915135608049, "grad_norm": 0.11745085567235947, "learning_rate": 1e-06, "loss": 0.0015, "step": 291 }, { "clip_ratio/high_max": 0.003735733997018542, "clip_ratio/high_mean": 0.0012512569901446113, "clip_ratio/low_mean": 0.0013136456582287792, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0025649026792962104, "epoch": 0.6812481773111695, "grad_norm": 0.10572268068790436, "learning_rate": 1e-06, "loss": 0.0012, "step": 292 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0401785714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 2222.0, "completions/mean_length": 737.8717041015625, "completions/mean_terminated_length": 597.298828125, "completions/min_length": 154.0, "completions/min_terminated_length": 154.0, "epoch": 0.6835812190142899, "grad_norm": 0.17150069773197174, "learning_rate": 1e-06, "loss": -0.0018, "num_tokens": 42899228.0, "reward": 0.559151828289032, "reward_std": 0.2047802060842514, "rewards/verify_math_reward/mean": 0.5591517686843872, "rewards/verify_math_reward/std": 0.496766060590744, "step": 293 }, { "clip_ratio/high_max": 0.002142723278666381, "clip_ratio/high_mean": 0.0008918125604395755, "clip_ratio/low_mean": 0.0008082051790552214, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0017000177322188392, "epoch": 0.6859142607174104, "grad_norm": 0.14242017269134521, "learning_rate": 1e-06, "loss": -0.0019, "step": 294 }, { "clip_ratio/high_max": 0.0024951521700131707, "clip_ratio/high_mean": 0.0011268842245044652, "clip_ratio/low_mean": 0.001115310577006312, "clip_ratio/low_min": 5.978273202345008e-05, "clip_ratio/region_mean": 0.0022421948378905654, "epoch": 0.6882473024205308, "grad_norm": 0.12357576936483383, "learning_rate": 1e-06, "loss": -0.0022, "step": 295 }, { "clip_ratio/high_max": 0.00369637204130413, "clip_ratio/high_mean": 0.0016379483713535592, "clip_ratio/low_mean": 0.0018289698928128928, "clip_ratio/low_min": 0.00012015673837595386, "clip_ratio/region_mean": 0.0034669182787183672, "epoch": 0.6905803441236512, "grad_norm": 0.1049383208155632, "learning_rate": 1e-06, "loss": -0.0025, "step": 296 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0200892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 2970.0, "completions/mean_length": 663.3170166015625, "completions/mean_terminated_length": 592.9430541992188, "completions/min_length": 149.0, "completions/min_terminated_length": 149.0, "epoch": 0.6929133858267716, "grad_norm": 0.15988747775554657, "learning_rate": 1e-06, "loss": 0.0034, "num_tokens": 43504896.0, "reward": 0.5424107313156128, "reward_std": 0.14815638959407806, "rewards/verify_math_reward/mean": 0.5424107313156128, "rewards/verify_math_reward/std": 0.4984763562679291, "step": 297 }, { "clip_ratio/high_max": 0.001845342070737388, "clip_ratio/high_mean": 0.0006780456351407338, "clip_ratio/low_mean": 0.000669656144509645, "clip_ratio/low_min": 1.4824478057562374e-05, "clip_ratio/region_mean": 0.0013477017855620943, "epoch": 0.6952464275298921, "grad_norm": 0.13231121003627777, "learning_rate": 1e-06, "loss": 0.0033, "step": 298 }, { "clip_ratio/high_max": 0.0026544974971329793, "clip_ratio/high_mean": 0.0009119731003011111, "clip_ratio/low_mean": 0.0008552301842428278, "clip_ratio/low_min": 2.0647505152737722e-05, "clip_ratio/region_mean": 0.0017672032481641509, "epoch": 0.6975794692330125, "grad_norm": 0.10828538239002228, "learning_rate": 1e-06, "loss": 0.0031, "step": 299 }, { "clip_ratio/high_max": 0.00328280665416969, "clip_ratio/high_mean": 0.0011893674491147976, "clip_ratio/low_mean": 0.0013076265686322586, "clip_ratio/low_min": 4.4473435991676524e-05, "clip_ratio/region_mean": 0.0024969939622678794, "epoch": 0.6999125109361329, "grad_norm": 0.09215329587459564, "learning_rate": 1e-06, "loss": 0.0028, "step": 300 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0323660714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 2651.0, "completions/mean_length": 701.7623291015625, "completions/mean_terminated_length": 588.2294921875, "completions/min_length": 130.0, "completions/min_terminated_length": 130.0, "epoch": 0.7022455526392535, "grad_norm": 0.1863614171743393, "learning_rate": 1e-06, "loss": -0.0223, "num_tokens": 44108131.0, "reward": 0.5758928656578064, "reward_std": 0.2102695256471634, "rewards/verify_math_reward/mean": 0.5758928656578064, "rewards/verify_math_reward/std": 0.49448272585868835, "step": 301 }, { "clip_ratio/high_max": 0.0023975638250703923, "clip_ratio/high_mean": 0.0010614907660055906, "clip_ratio/low_mean": 0.000768143312598113, "clip_ratio/low_min": 4.379285110189812e-05, "clip_ratio/region_mean": 0.0018296340276720002, "epoch": 0.7045785943423739, "grad_norm": 0.15295547246932983, "learning_rate": 1e-06, "loss": -0.0224, "step": 302 }, { "clip_ratio/high_max": 0.0032328735251212493, "clip_ratio/high_mean": 0.0013486043026205152, "clip_ratio/low_mean": 0.0012347959273029119, "clip_ratio/low_min": 7.753022509859875e-05, "clip_ratio/region_mean": 0.002583400288131088, "epoch": 0.7069116360454943, "grad_norm": 0.12947703897953033, "learning_rate": 1e-06, "loss": -0.0227, "step": 303 }, { "clip_ratio/high_max": 0.004014515157905407, "clip_ratio/high_mean": 0.0018503821702324785, "clip_ratio/low_mean": 0.0017452756146667525, "clip_ratio/low_min": 8.680756764078978e-05, "clip_ratio/region_mean": 0.003595657763071358, "epoch": 0.7092446777486148, "grad_norm": 0.11146974563598633, "learning_rate": 1e-06, "loss": -0.023, "step": 304 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0435267857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3592.0, "completions/mean_length": 721.1563110351562, "completions/mean_terminated_length": 567.5752563476562, "completions/min_length": 116.0, "completions/min_terminated_length": 116.0, "epoch": 0.7115777194517352, "grad_norm": 0.17767667770385742, "learning_rate": 1e-06, "loss": 0.0008, "num_tokens": 44686167.0, "reward": 0.5803571939468384, "reward_std": 0.18272772431373596, "rewards/verify_math_reward/mean": 0.5803571343421936, "rewards/verify_math_reward/std": 0.4937761425971985, "step": 305 }, { "clip_ratio/high_max": 0.0023581050409120508, "clip_ratio/high_mean": 0.000843608204377233, "clip_ratio/low_mean": 0.0009864900894172024, "clip_ratio/low_min": 1.4785900020797271e-05, "clip_ratio/region_mean": 0.0018300982810615096, "epoch": 0.7139107611548556, "grad_norm": 0.148182675242424, "learning_rate": 1e-06, "loss": 0.0008, "step": 306 }, { "clip_ratio/high_max": 0.0029904470648034476, "clip_ratio/high_mean": 0.0010745678209787002, "clip_ratio/low_mean": 0.0013283444968692493, "clip_ratio/low_min": 5.6789811424096115e-05, "clip_ratio/region_mean": 0.00240291231602896, "epoch": 0.7162438028579761, "grad_norm": 0.12474964559078217, "learning_rate": 1e-06, "loss": 0.0004, "step": 307 }, { "clip_ratio/high_max": 0.00375042601081077, "clip_ratio/high_mean": 0.0013786717117909575, "clip_ratio/low_mean": 0.0019732602886506356, "clip_ratio/low_min": 8.85245026438497e-05, "clip_ratio/region_mean": 0.0033519319622428156, "epoch": 0.7185768445610965, "grad_norm": 0.10768650472164154, "learning_rate": 1e-06, "loss": 0.0001, "step": 308 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0234375, "completions/max_length": 4096.0, "completions/max_terminated_length": 3248.0, "completions/mean_length": 644.7924194335938, "completions/mean_terminated_length": 561.9634399414062, "completions/min_length": 149.0, "completions/min_terminated_length": 149.0, "epoch": 0.7209098862642169, "grad_norm": 0.178111270070076, "learning_rate": 1e-06, "loss": 0.0101, "num_tokens": 45263797.0, "reward": 0.6015625, "reward_std": 0.18877087533473969, "rewards/verify_math_reward/mean": 0.6015625, "rewards/verify_math_reward/std": 0.48984986543655396, "step": 309 }, { "clip_ratio/high_max": 0.0021982281541568227, "clip_ratio/high_mean": 0.0009732017279020511, "clip_ratio/low_mean": 0.000789531402006105, "clip_ratio/low_min": 1.392912872688612e-05, "clip_ratio/region_mean": 0.001762733176292386, "epoch": 0.7232429279673375, "grad_norm": 0.1471228450536728, "learning_rate": 1e-06, "loss": 0.0101, "step": 310 }, { "clip_ratio/high_max": 0.002947748092992697, "clip_ratio/high_mean": 0.001327784288150724, "clip_ratio/low_mean": 0.001164040237199515, "clip_ratio/low_min": 6.645681423833594e-05, "clip_ratio/region_mean": 0.002491824525350239, "epoch": 0.7255759696704579, "grad_norm": 0.1270896941423416, "learning_rate": 1e-06, "loss": 0.0097, "step": 311 }, { "clip_ratio/high_max": 0.003704839909914881, "clip_ratio/high_mean": 0.0017162806980195455, "clip_ratio/low_mean": 0.0016728131449781358, "clip_ratio/low_min": 0.00010700186612666585, "clip_ratio/region_mean": 0.0033890938066178933, "epoch": 0.7279090113735783, "grad_norm": 0.11113104969263077, "learning_rate": 1e-06, "loss": 0.0094, "step": 312 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.03125, "completions/max_length": 4096.0, "completions/max_terminated_length": 2286.0, "completions/mean_length": 643.8392944335938, "completions/mean_terminated_length": 532.479248046875, "completions/min_length": 141.0, "completions/min_terminated_length": 141.0, "epoch": 0.7302420530766988, "grad_norm": 0.1911001205444336, "learning_rate": 1e-06, "loss": -0.0162, "num_tokens": 45815077.0, "reward": 0.6328125, "reward_std": 0.1920456439256668, "rewards/verify_math_reward/mean": 0.6328125, "rewards/verify_math_reward/std": 0.48230743408203125, "step": 313 }, { "clip_ratio/high_max": 0.0027231294807279482, "clip_ratio/high_mean": 0.001205052984005306, "clip_ratio/low_mean": 0.0008517346523149172, "clip_ratio/low_min": 9.678277820057701e-05, "clip_ratio/region_mean": 0.002056787590845488, "epoch": 0.7325750947798192, "grad_norm": 0.15251220762729645, "learning_rate": 1e-06, "loss": -0.0164, "step": 314 }, { "clip_ratio/high_max": 0.0033590676830499433, "clip_ratio/high_mean": 0.0014756960663362406, "clip_ratio/low_mean": 0.0011992673134955112, "clip_ratio/low_min": 0.00010216729788226075, "clip_ratio/region_mean": 0.0026749633689178154, "epoch": 0.7349081364829396, "grad_norm": 0.1341160535812378, "learning_rate": 1e-06, "loss": -0.0166, "step": 315 }, { "clip_ratio/high_max": 0.004419691889779642, "clip_ratio/high_mean": 0.001997190021938877, "clip_ratio/low_mean": 0.0016970058022707235, "clip_ratio/low_min": 0.00013107948052493157, "clip_ratio/region_mean": 0.003694195853313431, "epoch": 0.73724117818606, "grad_norm": 0.11231222003698349, "learning_rate": 1e-06, "loss": -0.017, "step": 316 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0234375, "completions/max_length": 4096.0, "completions/max_terminated_length": 3666.0, "completions/mean_length": 617.3705444335938, "completions/mean_terminated_length": 533.8834228515625, "completions/min_length": 126.0, "completions/min_terminated_length": 126.0, "epoch": 0.7395742198891805, "grad_norm": 0.1953791379928589, "learning_rate": 1e-06, "loss": 0.0051, "num_tokens": 46372273.0, "reward": 0.6127232313156128, "reward_std": 0.21556422114372253, "rewards/verify_math_reward/mean": 0.6127232313156128, "rewards/verify_math_reward/std": 0.4873998463153839, "step": 317 }, { "clip_ratio/high_max": 0.0021992934634909034, "clip_ratio/high_mean": 0.0008922412926040124, "clip_ratio/low_mean": 0.0009755404571478721, "clip_ratio/low_min": 7.110198930604383e-05, "clip_ratio/region_mean": 0.0018677817643037997, "epoch": 0.7419072615923009, "grad_norm": 0.15953488647937775, "learning_rate": 1e-06, "loss": 0.005, "step": 318 }, { "clip_ratio/high_max": 0.0029712139439652674, "clip_ratio/high_mean": 0.0012085776179446839, "clip_ratio/low_mean": 0.0015635335439583287, "clip_ratio/low_min": 0.00022440197517425986, "clip_ratio/region_mean": 0.0027721110673155636, "epoch": 0.7442403032954215, "grad_norm": 0.1333884745836258, "learning_rate": 1e-06, "loss": 0.0046, "step": 319 }, { "clip_ratio/high_max": 0.003923837342881598, "clip_ratio/high_mean": 0.001666499323619064, "clip_ratio/low_mean": 0.0022561746009159833, "clip_ratio/low_min": 0.0005153750289537129, "clip_ratio/region_mean": 0.00392267391725909, "epoch": 0.7465733449985419, "grad_norm": 0.11533120274543762, "learning_rate": 1e-06, "loss": 0.0043, "step": 320 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0323660714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 2980.0, "completions/mean_length": 716.6428833007812, "completions/mean_terminated_length": 603.6078491210938, "completions/min_length": 153.0, "completions/min_terminated_length": 153.0, "epoch": 0.7489063867016623, "grad_norm": 0.17041844129562378, "learning_rate": 1e-06, "loss": 0.0043, "num_tokens": 46991417.0, "reward": 0.582589328289032, "reward_std": 0.17032834887504578, "rewards/verify_math_reward/mean": 0.5825892686843872, "rewards/verify_math_reward/std": 0.4934072494506836, "step": 321 }, { "clip_ratio/high_max": 0.002330017312488053, "clip_ratio/high_mean": 0.0008913758065318689, "clip_ratio/low_mean": 0.0008535044189557084, "clip_ratio/low_min": 4.7473279664700385e-05, "clip_ratio/region_mean": 0.0017448802245780826, "epoch": 0.7512394284047827, "grad_norm": 0.14126582443714142, "learning_rate": 1e-06, "loss": 0.0042, "step": 322 }, { "clip_ratio/high_max": 0.0031726174347568303, "clip_ratio/high_mean": 0.001141508444561623, "clip_ratio/low_mean": 0.0011585630018089432, "clip_ratio/low_min": 8.842844181344844e-05, "clip_ratio/region_mean": 0.002300071471836418, "epoch": 0.7535724701079032, "grad_norm": 0.11646294593811035, "learning_rate": 1e-06, "loss": 0.0039, "step": 323 }, { "clip_ratio/high_max": 0.003911312709533377, "clip_ratio/high_mean": 0.0014024608954059659, "clip_ratio/low_mean": 0.0016102465579024283, "clip_ratio/low_min": 0.00010280546484864317, "clip_ratio/region_mean": 0.0030127074278425425, "epoch": 0.7559055118110236, "grad_norm": 0.09994442760944366, "learning_rate": 1e-06, "loss": 0.0037, "step": 324 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0357142857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 4073.0, "completions/mean_length": 700.747802734375, "completions/mean_terminated_length": 574.9976806640625, "completions/min_length": 139.0, "completions/min_terminated_length": 139.0, "epoch": 0.758238553514144, "grad_norm": 0.19488313794136047, "learning_rate": 1e-06, "loss": -0.0126, "num_tokens": 47582383.0, "reward": 0.5714285969734192, "reward_std": 0.20764270424842834, "rewards/verify_math_reward/mean": 0.5714285969734192, "rewards/verify_math_reward/std": 0.49514803290367126, "step": 325 }, { "clip_ratio/high_max": 0.002429723128443584, "clip_ratio/high_mean": 0.0009923476773110451, "clip_ratio/low_mean": 0.000921401777304709, "clip_ratio/low_min": 3.409767850826029e-05, "clip_ratio/region_mean": 0.0019137494382448494, "epoch": 0.7605715952172645, "grad_norm": 0.1541854292154312, "learning_rate": 1e-06, "loss": -0.0127, "step": 326 }, { "clip_ratio/high_max": 0.002841058128979057, "clip_ratio/high_mean": 0.001338978185231099, "clip_ratio/low_mean": 0.0013930889217590448, "clip_ratio/low_min": 7.254754018504173e-05, "clip_ratio/region_mean": 0.0027320670706103556, "epoch": 0.7629046369203849, "grad_norm": 0.13097520172595978, "learning_rate": 1e-06, "loss": -0.013, "step": 327 }, { "clip_ratio/high_max": 0.0044514184264699, "clip_ratio/high_mean": 0.0018388287353445776, "clip_ratio/low_mean": 0.001996175167732872, "clip_ratio/low_min": 0.00013150443010090385, "clip_ratio/region_mean": 0.003835003823041916, "epoch": 0.7652376786235054, "grad_norm": 0.11963142454624176, "learning_rate": 1e-06, "loss": -0.0134, "step": 328 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.025669642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 4049.0, "completions/mean_length": 666.6674194335938, "completions/mean_terminated_length": 576.3184814453125, "completions/min_length": 128.0, "completions/min_terminated_length": 128.0, "epoch": 0.7675707203266259, "grad_norm": 0.17240570485591888, "learning_rate": 1e-06, "loss": -0.0071, "num_tokens": 48170725.0, "reward": 0.6729910969734192, "reward_std": 0.16412296891212463, "rewards/verify_math_reward/mean": 0.6729910969734192, "rewards/verify_math_reward/std": 0.46938255429267883, "step": 329 }, { "clip_ratio/high_max": 0.00220696660107933, "clip_ratio/high_mean": 0.0008583531944168499, "clip_ratio/low_mean": 0.0006830874199295067, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001541440640721703, "epoch": 0.7699037620297463, "grad_norm": 0.1470627635717392, "learning_rate": 1e-06, "loss": -0.0072, "step": 330 }, { "clip_ratio/high_max": 0.0033285979880020022, "clip_ratio/high_mean": 0.0011669493505905848, "clip_ratio/low_mean": 0.0010201777076872531, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0021871270146220922, "epoch": 0.7722368037328667, "grad_norm": 0.11684126406908035, "learning_rate": 1e-06, "loss": -0.0075, "step": 331 }, { "clip_ratio/high_max": 0.00420057843439281, "clip_ratio/high_mean": 0.0014383682755578775, "clip_ratio/low_mean": 0.0015544904126727488, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0029928586882306263, "epoch": 0.7745698454359872, "grad_norm": 0.10131551325321198, "learning_rate": 1e-06, "loss": -0.0077, "step": 332 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.044642857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 2356.0, "completions/mean_length": 745.8538208007812, "completions/mean_terminated_length": 589.3048706054688, "completions/min_length": 139.0, "completions/min_terminated_length": 139.0, "epoch": 0.7769028871391076, "grad_norm": 0.20078398287296295, "learning_rate": 1e-06, "loss": -0.0165, "num_tokens": 48770786.0, "reward": 0.5323660969734192, "reward_std": 0.18475650250911713, "rewards/verify_math_reward/mean": 0.5323660969734192, "rewards/verify_math_reward/std": 0.4992299973964691, "step": 333 }, { "clip_ratio/high_max": 0.0024247112487501, "clip_ratio/high_mean": 0.0010145076012122445, "clip_ratio/low_mean": 0.0007677957673877245, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0017823033485910855, "epoch": 0.779235928842228, "grad_norm": 0.15078534185886383, "learning_rate": 1e-06, "loss": -0.0166, "step": 334 }, { "clip_ratio/high_max": 0.0031352667792816646, "clip_ratio/high_mean": 0.0013583726213255432, "clip_ratio/low_mean": 0.001150686224718811, "clip_ratio/low_min": 8.553207044315059e-05, "clip_ratio/region_mean": 0.0025090588605962694, "epoch": 0.7815689705453485, "grad_norm": 0.12594419717788696, "learning_rate": 1e-06, "loss": -0.017, "step": 335 }, { "clip_ratio/high_max": 0.004018896681373008, "clip_ratio/high_mean": 0.0017607162262720522, "clip_ratio/low_mean": 0.001701744768070057, "clip_ratio/low_min": 0.000137335155159235, "clip_ratio/region_mean": 0.0034624609834281728, "epoch": 0.7839020122484689, "grad_norm": 0.1070583388209343, "learning_rate": 1e-06, "loss": -0.0173, "step": 336 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0234375, "completions/max_length": 4096.0, "completions/max_terminated_length": 2256.0, "completions/mean_length": 638.671875, "completions/mean_terminated_length": 555.6959838867188, "completions/min_length": 94.0, "completions/min_terminated_length": 94.0, "epoch": 0.7862350539515894, "grad_norm": 0.17797277867794037, "learning_rate": 1e-06, "loss": -0.0047, "num_tokens": 49346748.0, "reward": 0.606026828289032, "reward_std": 0.1567264348268509, "rewards/verify_math_reward/mean": 0.6060267686843872, "rewards/verify_math_reward/std": 0.48890191316604614, "step": 337 }, { "clip_ratio/high_max": 0.0018584726785775274, "clip_ratio/high_mean": 0.0007744862705294508, "clip_ratio/low_mean": 0.0006019287202434498, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013764149771304801, "epoch": 0.7885680956547099, "grad_norm": 0.147982656955719, "learning_rate": 1e-06, "loss": -0.0048, "step": 338 }, { "clip_ratio/high_max": 0.0028931079286849126, "clip_ratio/high_mean": 0.00114167316678504, "clip_ratio/low_mean": 0.0009640512635087362, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002105724430293776, "epoch": 0.7909011373578303, "grad_norm": 0.12493429332971573, "learning_rate": 1e-06, "loss": -0.0051, "step": 339 }, { "clip_ratio/high_max": 0.003678382869111374, "clip_ratio/high_mean": 0.0014876365530653857, "clip_ratio/low_mean": 0.0014402218748728046, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0029278584697749466, "epoch": 0.7932341790609507, "grad_norm": 0.10835491865873337, "learning_rate": 1e-06, "loss": -0.0053, "step": 340 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0189732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3694.0, "completions/mean_length": 643.9855346679688, "completions/mean_terminated_length": 577.2229614257812, "completions/min_length": 181.0, "completions/min_terminated_length": 181.0, "epoch": 0.7955672207640712, "grad_norm": 0.17144328355789185, "learning_rate": 1e-06, "loss": 0.0056, "num_tokens": 49940991.0, "reward": 0.6328125, "reward_std": 0.15890929102897644, "rewards/verify_math_reward/mean": 0.6328125, "rewards/verify_math_reward/std": 0.48230743408203125, "step": 341 }, { "clip_ratio/high_max": 0.0019693505491886754, "clip_ratio/high_mean": 0.0008956879919423955, "clip_ratio/low_mean": 0.0006415165935322875, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015372046254924498, "epoch": 0.7979002624671916, "grad_norm": 0.1370604932308197, "learning_rate": 1e-06, "loss": 0.0055, "step": 342 }, { "clip_ratio/high_max": 0.002291073094966123, "clip_ratio/high_mean": 0.0010944333516818006, "clip_ratio/low_mean": 0.0009486940361966845, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0020431274024304003, "epoch": 0.800233304170312, "grad_norm": 0.11058004200458527, "learning_rate": 1e-06, "loss": 0.0052, "step": 343 }, { "clip_ratio/high_max": 0.0032089280139189214, "clip_ratio/high_mean": 0.0014895219574100338, "clip_ratio/low_mean": 0.0012954038502357434, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0027849258185597137, "epoch": 0.8025663458734325, "grad_norm": 0.09459247440099716, "learning_rate": 1e-06, "loss": 0.005, "step": 344 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0267857142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 2227.0, "completions/mean_length": 620.9006958007812, "completions/mean_terminated_length": 525.2557373046875, "completions/min_length": 125.0, "completions/min_terminated_length": 125.0, "epoch": 0.8048993875765529, "grad_norm": 0.2086123675107956, "learning_rate": 1e-06, "loss": -0.0073, "num_tokens": 50479678.0, "reward": 0.6361607313156128, "reward_std": 0.1781841367483139, "rewards/verify_math_reward/mean": 0.6361607313156128, "rewards/verify_math_reward/std": 0.4813718795776367, "step": 345 }, { "clip_ratio/high_max": 0.002959531011583749, "clip_ratio/high_mean": 0.0011698146336129867, "clip_ratio/low_mean": 0.0007359661840382614, "clip_ratio/low_min": 1.806880572985392e-05, "clip_ratio/region_mean": 0.001905780787637923, "epoch": 0.8072324292796734, "grad_norm": 0.1588672697544098, "learning_rate": 1e-06, "loss": -0.0075, "step": 346 }, { "clip_ratio/high_max": 0.0037611953739542514, "clip_ratio/high_mean": 0.0015444871169165708, "clip_ratio/low_mean": 0.0012248911498318193, "clip_ratio/low_min": 3.570068656699732e-05, "clip_ratio/region_mean": 0.002769378275843337, "epoch": 0.8095654709827939, "grad_norm": 0.12893742322921753, "learning_rate": 1e-06, "loss": -0.0079, "step": 347 }, { "clip_ratio/high_max": 0.005271454210742377, "clip_ratio/high_mean": 0.0021154596106498502, "clip_ratio/low_mean": 0.0018154782228521071, "clip_ratio/low_min": 7.215423829620704e-05, "clip_ratio/region_mean": 0.0039309377752942964, "epoch": 0.8118985126859143, "grad_norm": 0.11167939007282257, "learning_rate": 1e-06, "loss": -0.0082, "step": 348 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.024553571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3589.0, "completions/mean_length": 628.0346069335938, "completions/mean_terminated_length": 540.740234375, "completions/min_length": 107.0, "completions/min_terminated_length": 107.0, "epoch": 0.8142315543890347, "grad_norm": 0.21729442477226257, "learning_rate": 1e-06, "loss": 0.0052, "num_tokens": 51032357.0, "reward": 0.6283482313156128, "reward_std": 0.1979040503501892, "rewards/verify_math_reward/mean": 0.6283482313156128, "rewards/verify_math_reward/std": 0.4835159480571747, "step": 349 }, { "clip_ratio/high_max": 0.0029134640062693506, "clip_ratio/high_mean": 0.0011066034112445777, "clip_ratio/low_mean": 0.0008679829534230521, "clip_ratio/low_min": 9.195012262352975e-05, "clip_ratio/region_mean": 0.001974586382857524, "epoch": 0.8165645960921551, "grad_norm": 0.18893226981163025, "learning_rate": 1e-06, "loss": 0.005, "step": 350 }, { "clip_ratio/high_max": 0.0040770038176560774, "clip_ratio/high_mean": 0.00147658016794594, "clip_ratio/low_mean": 0.0013517987972591072, "clip_ratio/low_min": 5.6533813221903984e-05, "clip_ratio/region_mean": 0.0028283789433771744, "epoch": 0.8188976377952756, "grad_norm": 0.13763734698295593, "learning_rate": 1e-06, "loss": 0.0046, "step": 351 }, { "clip_ratio/high_max": 0.005226439934631344, "clip_ratio/high_mean": 0.002044569802819751, "clip_ratio/low_mean": 0.002028863023042504, "clip_ratio/low_min": 0.00025756384729902493, "clip_ratio/region_mean": 0.004073432894074358, "epoch": 0.821230679498396, "grad_norm": 0.11648156493902206, "learning_rate": 1e-06, "loss": 0.0043, "step": 352 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0189732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 4081.0, "completions/mean_length": 634.3861694335938, "completions/mean_terminated_length": 567.43798828125, "completions/min_length": 127.0, "completions/min_terminated_length": 127.0, "epoch": 0.8235637212015164, "grad_norm": 0.1741400510072708, "learning_rate": 1e-06, "loss": -0.0061, "num_tokens": 51632983.0, "reward": 0.5703125, "reward_std": 0.16671767830848694, "rewards/verify_math_reward/mean": 0.5703125, "rewards/verify_math_reward/std": 0.49530795216560364, "step": 353 }, { "clip_ratio/high_max": 0.0024835940021148417, "clip_ratio/high_mean": 0.0008892758705769666, "clip_ratio/low_mean": 0.0006648132002737839, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015540890854026657, "epoch": 0.8258967629046369, "grad_norm": 0.14126086235046387, "learning_rate": 1e-06, "loss": -0.0061, "step": 354 }, { "clip_ratio/high_max": 0.003337303045555018, "clip_ratio/high_mean": 0.0012373716926958878, "clip_ratio/low_mean": 0.00104959225973289, "clip_ratio/low_min": 2.5725457817316055e-05, "clip_ratio/region_mean": 0.0022869639651617035, "epoch": 0.8282298046077574, "grad_norm": 0.11913535743951797, "learning_rate": 1e-06, "loss": -0.0064, "step": 355 }, { "clip_ratio/high_max": 0.0039918447873787954, "clip_ratio/high_mean": 0.0015468750898435246, "clip_ratio/low_mean": 0.0015335737989516929, "clip_ratio/low_min": 4.513337489697733e-05, "clip_ratio/region_mean": 0.003080448914261069, "epoch": 0.8305628463108778, "grad_norm": 0.10361921042203903, "learning_rate": 1e-06, "loss": -0.0067, "step": 356 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.030133928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 2630.0, "completions/mean_length": 670.9420166015625, "completions/mean_terminated_length": 564.5247192382812, "completions/min_length": 148.0, "completions/min_terminated_length": 148.0, "epoch": 0.8328958880139983, "grad_norm": 0.19722683727741241, "learning_rate": 1e-06, "loss": 0.0019, "num_tokens": 52211107.0, "reward": 0.5602678656578064, "reward_std": 0.16796395182609558, "rewards/verify_math_reward/mean": 0.5602678656578064, "rewards/verify_math_reward/std": 0.4966317117214203, "step": 357 }, { "clip_ratio/high_max": 0.0022055244335206226, "clip_ratio/high_mean": 0.0007792596661602147, "clip_ratio/low_mean": 0.0008433940183749655, "clip_ratio/low_min": 8.536597670172341e-05, "clip_ratio/region_mean": 0.001622653660888318, "epoch": 0.8352289297171187, "grad_norm": 0.15272952616214752, "learning_rate": 1e-06, "loss": 0.0018, "step": 358 }, { "clip_ratio/high_max": 0.0030782657631789334, "clip_ratio/high_mean": 0.001090582907636417, "clip_ratio/low_mean": 0.001217046010424383, "clip_ratio/low_min": 7.629889478266705e-05, "clip_ratio/region_mean": 0.0023076289071468636, "epoch": 0.8375619714202391, "grad_norm": 0.12272930890321732, "learning_rate": 1e-06, "loss": 0.0015, "step": 359 }, { "clip_ratio/high_max": 0.00407740636728704, "clip_ratio/high_mean": 0.0014526810737152118, "clip_ratio/low_mean": 0.001784455540473573, "clip_ratio/low_min": 0.00019804762814601418, "clip_ratio/region_mean": 0.0032371365959988907, "epoch": 0.8398950131233596, "grad_norm": 0.10683020204305649, "learning_rate": 1e-06, "loss": 0.0012, "step": 360 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.030133928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3105.0, "completions/mean_length": 664.1640625, "completions/mean_terminated_length": 557.5362548828125, "completions/min_length": 150.0, "completions/min_terminated_length": 150.0, "epoch": 0.84222805482648, "grad_norm": 0.17832210659980774, "learning_rate": 1e-06, "loss": -0.0144, "num_tokens": 52783566.0, "reward": 0.6238839626312256, "reward_std": 0.16243229806423187, "rewards/verify_math_reward/mean": 0.6238839030265808, "rewards/verify_math_reward/std": 0.48468026518821716, "step": 361 }, { "clip_ratio/high_max": 0.0023297566622204613, "clip_ratio/high_mean": 0.0008685299253556877, "clip_ratio/low_mean": 0.0007428834505844861, "clip_ratio/low_min": 1.6905598386074416e-05, "clip_ratio/region_mean": 0.0016114133286464494, "epoch": 0.8445610965296004, "grad_norm": 0.14468637108802795, "learning_rate": 1e-06, "loss": -0.0145, "step": 362 }, { "clip_ratio/high_max": 0.003014496629475616, "clip_ratio/high_mean": 0.0012456201948225498, "clip_ratio/low_mean": 0.0011876857242896222, "clip_ratio/low_min": 2.2587639250559732e-05, "clip_ratio/region_mean": 0.0024333059627679177, "epoch": 0.8468941382327209, "grad_norm": 0.11585845798254013, "learning_rate": 1e-06, "loss": -0.0148, "step": 363 }, { "clip_ratio/high_max": 0.00417908608505968, "clip_ratio/high_mean": 0.0016650610996293835, "clip_ratio/low_mean": 0.0016226969819399528, "clip_ratio/low_min": 3.381119677214883e-05, "clip_ratio/region_mean": 0.0032877580961212516, "epoch": 0.8492271799358414, "grad_norm": 0.10224167257547379, "learning_rate": 1e-06, "loss": -0.015, "step": 364 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0200892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3473.0, "completions/mean_length": 650.505615234375, "completions/mean_terminated_length": 579.8690185546875, "completions/min_length": 109.0, "completions/min_terminated_length": 109.0, "epoch": 0.8515602216389618, "grad_norm": 0.19092541933059692, "learning_rate": 1e-06, "loss": -0.0006, "num_tokens": 53379771.0, "reward": 0.625, "reward_std": 0.1758526861667633, "rewards/verify_math_reward/mean": 0.625, "rewards/verify_math_reward/std": 0.48439329862594604, "step": 365 }, { "clip_ratio/high_max": 0.002529852521547582, "clip_ratio/high_mean": 0.0009399781574757071, "clip_ratio/low_mean": 0.0007532849904237082, "clip_ratio/low_min": 4.43547678514733e-05, "clip_ratio/region_mean": 0.0016932631697272882, "epoch": 0.8538932633420823, "grad_norm": 0.186878964304924, "learning_rate": 1e-06, "loss": -0.0007, "step": 366 }, { "clip_ratio/high_max": 0.0036677171010524035, "clip_ratio/high_mean": 0.001266393628611695, "clip_ratio/low_mean": 0.0010504762421987834, "clip_ratio/low_min": 4.216390516376123e-05, "clip_ratio/region_mean": 0.00231686991173774, "epoch": 0.8562263050452027, "grad_norm": 0.12449093908071518, "learning_rate": 1e-06, "loss": -0.001, "step": 367 }, { "clip_ratio/high_max": 0.004313288489356637, "clip_ratio/high_mean": 0.0016284197299683, "clip_ratio/low_mean": 0.0015851396219659364, "clip_ratio/low_min": 0.00010079435924126301, "clip_ratio/region_mean": 0.0032135592846316285, "epoch": 0.8585593467483231, "grad_norm": 0.10393287986516953, "learning_rate": 1e-06, "loss": -0.0013, "step": 368 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0267857142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 2122.0, "completions/mean_length": 648.8348388671875, "completions/mean_terminated_length": 553.9586791992188, "completions/min_length": 144.0, "completions/min_terminated_length": 144.0, "epoch": 0.8608923884514436, "grad_norm": 0.21343426406383514, "learning_rate": 1e-06, "loss": 0.0121, "num_tokens": 53960359.0, "reward": 0.5948660969734192, "reward_std": 0.2080143243074417, "rewards/verify_math_reward/mean": 0.5948660969734192, "rewards/verify_math_reward/std": 0.49119213223457336, "step": 369 }, { "clip_ratio/high_max": 0.002667692409886513, "clip_ratio/high_mean": 0.0010972562995448243, "clip_ratio/low_mean": 0.0008936935573728988, "clip_ratio/low_min": 4.289918979338836e-05, "clip_ratio/region_mean": 0.0019909498369088396, "epoch": 0.863225430154564, "grad_norm": 0.1658339500427246, "learning_rate": 1e-06, "loss": 0.012, "step": 370 }, { "clip_ratio/high_max": 0.00338205919979373, "clip_ratio/high_mean": 0.0013968323182780296, "clip_ratio/low_mean": 0.001482483097788645, "clip_ratio/low_min": 7.005133738857694e-05, "clip_ratio/region_mean": 0.002879315390600823, "epoch": 0.8655584718576844, "grad_norm": 0.14531797170639038, "learning_rate": 1e-06, "loss": 0.0116, "step": 371 }, { "clip_ratio/high_max": 0.004199029615847394, "clip_ratio/high_mean": 0.0017772118262655567, "clip_ratio/low_mean": 0.002178777001972776, "clip_ratio/low_min": 5.460065585793927e-05, "clip_ratio/region_mean": 0.00395598889735993, "epoch": 0.8678915135608049, "grad_norm": 0.12013287097215652, "learning_rate": 1e-06, "loss": 0.0112, "step": 372 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.024553571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3406.0, "completions/mean_length": 664.1361694335938, "completions/mean_terminated_length": 577.7505493164062, "completions/min_length": 133.0, "completions/min_terminated_length": 133.0, "epoch": 0.8702245552639254, "grad_norm": 0.21239595115184784, "learning_rate": 1e-06, "loss": -0.0129, "num_tokens": 54542065.0, "reward": 0.5602678656578064, "reward_std": 0.1640174239873886, "rewards/verify_math_reward/mean": 0.5602678656578064, "rewards/verify_math_reward/std": 0.4966317415237427, "step": 373 }, { "clip_ratio/high_max": 0.002019111198023893, "clip_ratio/high_mean": 0.0007497282294934848, "clip_ratio/low_mean": 0.0008690734466654249, "clip_ratio/low_min": 8.964874905359466e-05, "clip_ratio/region_mean": 0.0016188017034437507, "epoch": 0.8725575969670458, "grad_norm": 0.15346002578735352, "learning_rate": 1e-06, "loss": -0.013, "step": 374 }, { "clip_ratio/high_max": 0.0030626643347204663, "clip_ratio/high_mean": 0.0010833644973899936, "clip_ratio/low_mean": 0.001340810580586549, "clip_ratio/low_min": 7.489075323974248e-05, "clip_ratio/region_mean": 0.002424175007035956, "epoch": 0.8748906386701663, "grad_norm": 0.12206783890724182, "learning_rate": 1e-06, "loss": -0.0133, "step": 375 }, { "clip_ratio/high_max": 0.004044111876282841, "clip_ratio/high_mean": 0.0014642214810010046, "clip_ratio/low_mean": 0.0018469086426193826, "clip_ratio/low_min": 0.00020383213268360123, "clip_ratio/region_mean": 0.003311130087240599, "epoch": 0.8772236803732867, "grad_norm": 0.10226547718048096, "learning_rate": 1e-06, "loss": -0.0136, "step": 376 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0323660714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3929.0, "completions/mean_length": 715.911865234375, "completions/mean_terminated_length": 602.8523559570312, "completions/min_length": 140.0, "completions/min_terminated_length": 140.0, "epoch": 0.8795567220764071, "grad_norm": 0.19563739001750946, "learning_rate": 1e-06, "loss": -0.017, "num_tokens": 55152234.0, "reward": 0.5613839626312256, "reward_std": 0.16965503990650177, "rewards/verify_math_reward/mean": 0.5613839030265808, "rewards/verify_math_reward/std": 0.496494859457016, "step": 377 }, { "clip_ratio/high_max": 0.0023589352203998715, "clip_ratio/high_mean": 0.0008785115314822178, "clip_ratio/low_mean": 0.000733127941202838, "clip_ratio/low_min": 1.4328289580589626e-05, "clip_ratio/region_mean": 0.0016116394908749498, "epoch": 0.8818897637795275, "grad_norm": 0.16078226268291473, "learning_rate": 1e-06, "loss": -0.0171, "step": 378 }, { "clip_ratio/high_max": 0.003231900976970792, "clip_ratio/high_mean": 0.0012779439475707477, "clip_ratio/low_mean": 0.001184357148304116, "clip_ratio/low_min": 7.452626960002817e-05, "clip_ratio/region_mean": 0.0024623011631774716, "epoch": 0.884222805482648, "grad_norm": 0.12120595574378967, "learning_rate": 1e-06, "loss": -0.0175, "step": 379 }, { "clip_ratio/high_max": 0.0042905782829620875, "clip_ratio/high_mean": 0.0017402846842742292, "clip_ratio/low_mean": 0.0015743002259114292, "clip_ratio/low_min": 0.00011380644173186738, "clip_ratio/region_mean": 0.003314584930194542, "epoch": 0.8865558471857684, "grad_norm": 0.10652026534080505, "learning_rate": 1e-06, "loss": -0.0177, "step": 380 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.029017857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 2401.0, "completions/mean_length": 685.4342041015625, "completions/mean_terminated_length": 583.5092163085938, "completions/min_length": 162.0, "completions/min_terminated_length": 162.0, "epoch": 0.8888888888888888, "grad_norm": 0.21085689961910248, "learning_rate": 1e-06, "loss": -0.0101, "num_tokens": 55743343.0, "reward": 0.5848214626312256, "reward_std": 0.20372965931892395, "rewards/verify_math_reward/mean": 0.5848214030265808, "rewards/verify_math_reward/std": 0.49302801489830017, "step": 381 }, { "clip_ratio/high_max": 0.0026160403358517215, "clip_ratio/high_mean": 0.0011723601382982451, "clip_ratio/low_mean": 0.0008544947077098186, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0020268548469175585, "epoch": 0.8912219305920094, "grad_norm": 0.1619434952735901, "learning_rate": 1e-06, "loss": -0.0102, "step": 382 }, { "clip_ratio/high_max": 0.0038326323265209794, "clip_ratio/high_mean": 0.0016989545511023607, "clip_ratio/low_mean": 0.001392811160258134, "clip_ratio/low_min": 3.2626349820930045e-05, "clip_ratio/region_mean": 0.0030917656695237383, "epoch": 0.8935549722951298, "grad_norm": 0.13214170932769775, "learning_rate": 1e-06, "loss": -0.0106, "step": 383 }, { "clip_ratio/high_max": 0.004804910233360715, "clip_ratio/high_mean": 0.0021292837845976464, "clip_ratio/low_mean": 0.0019806211439572508, "clip_ratio/low_min": 7.703181836404838e-05, "clip_ratio/region_mean": 0.004109904839424416, "epoch": 0.8958880139982502, "grad_norm": 0.11060050129890442, "learning_rate": 1e-06, "loss": -0.0109, "step": 384 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.021205357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 2981.0, "completions/mean_length": 651.171875, "completions/mean_terminated_length": 576.5404663085938, "completions/min_length": 181.0, "completions/min_terminated_length": 181.0, "epoch": 0.8982210557013707, "grad_norm": 0.2060902714729309, "learning_rate": 1e-06, "loss": 0.0083, "num_tokens": 56340265.0, "reward": 0.5725446939468384, "reward_std": 0.18321697413921356, "rewards/verify_math_reward/mean": 0.5725446343421936, "rewards/verify_math_reward/std": 0.49498558044433594, "step": 385 }, { "clip_ratio/high_max": 0.0023239181173266843, "clip_ratio/high_mean": 0.0009166036816168344, "clip_ratio/low_mean": 0.0009441336515010335, "clip_ratio/low_min": 3.190537427144591e-05, "clip_ratio/region_mean": 0.0018607373494887725, "epoch": 0.9005540974044911, "grad_norm": 0.15475915372371674, "learning_rate": 1e-06, "loss": 0.0082, "step": 386 }, { "clip_ratio/high_max": 0.0031610711448593065, "clip_ratio/high_mean": 0.0012591793965839315, "clip_ratio/low_mean": 0.0013248352297523525, "clip_ratio/low_min": 3.190537427144591e-05, "clip_ratio/region_mean": 0.0025840146772679873, "epoch": 0.9028871391076115, "grad_norm": 0.1257033795118332, "learning_rate": 1e-06, "loss": 0.0079, "step": 387 }, { "clip_ratio/high_max": 0.004017796833068132, "clip_ratio/high_mean": 0.0016208767956413794, "clip_ratio/low_mean": 0.002013769782934105, "clip_ratio/low_min": 3.382034628884867e-05, "clip_ratio/region_mean": 0.0036346465640235692, "epoch": 0.905220180810732, "grad_norm": 0.10506374388933182, "learning_rate": 1e-06, "loss": 0.0076, "step": 388 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0558035714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3659.0, "completions/mean_length": 817.7310791015625, "completions/mean_terminated_length": 623.9799194335938, "completions/min_length": 140.0, "completions/min_terminated_length": 140.0, "epoch": 0.9075532225138524, "grad_norm": 0.18775033950805664, "learning_rate": 1e-06, "loss": -0.0036, "num_tokens": 56954080.0, "reward": 0.4888392984867096, "reward_std": 0.17957279086112976, "rewards/verify_math_reward/mean": 0.4888392984867096, "rewards/verify_math_reward/std": 0.5001546144485474, "step": 389 }, { "clip_ratio/high_max": 0.0019980283359473106, "clip_ratio/high_mean": 0.0008106850982585456, "clip_ratio/low_mean": 0.0007146768366510514, "clip_ratio/low_min": 2.130032044078689e-05, "clip_ratio/region_mean": 0.0015253619130817242, "epoch": 0.9098862642169728, "grad_norm": 0.15038326382637024, "learning_rate": 1e-06, "loss": -0.0037, "step": 390 }, { "clip_ratio/high_max": 0.0029274467087816447, "clip_ratio/high_mean": 0.001136054737798986, "clip_ratio/low_mean": 0.0010606899468257325, "clip_ratio/low_min": 5.737617357226554e-05, "clip_ratio/region_mean": 0.0021967447173665278, "epoch": 0.9122193059200934, "grad_norm": 0.12011077255010605, "learning_rate": 1e-06, "loss": -0.004, "step": 391 }, { "clip_ratio/high_max": 0.0039614914858248085, "clip_ratio/high_mean": 0.0014890884231135715, "clip_ratio/low_mean": 0.0015351798574556597, "clip_ratio/low_min": 0.0001075335603673011, "clip_ratio/region_mean": 0.00302426828420721, "epoch": 0.9145523476232138, "grad_norm": 0.09603478014469147, "learning_rate": 1e-06, "loss": -0.0042, "step": 392 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0345982142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3268.0, "completions/mean_length": 710.7767944335938, "completions/mean_terminated_length": 589.4566040039062, "completions/min_length": 138.0, "completions/min_terminated_length": 138.0, "epoch": 0.9168853893263342, "grad_norm": 0.2012801617383957, "learning_rate": 1e-06, "loss": 0.013, "num_tokens": 57563176.0, "reward": 0.5100446939468384, "reward_std": 0.18306221067905426, "rewards/verify_math_reward/mean": 0.5100446343421936, "rewards/verify_math_reward/std": 0.5001782774925232, "step": 393 }, { "clip_ratio/high_max": 0.0022089789563324302, "clip_ratio/high_mean": 0.0009703058403829345, "clip_ratio/low_mean": 0.0007883378602855373, "clip_ratio/low_min": 8.753387191973161e-05, "clip_ratio/region_mean": 0.001758643651555758, "epoch": 0.9192184310294547, "grad_norm": 0.1570790559053421, "learning_rate": 1e-06, "loss": 0.0129, "step": 394 }, { "clip_ratio/high_max": 0.0027049439449911006, "clip_ratio/high_mean": 0.0012697697820840403, "clip_ratio/low_mean": 0.0012182189584564185, "clip_ratio/low_min": 0.00012845540913986042, "clip_ratio/region_mean": 0.002487988764187321, "epoch": 0.9215514727325751, "grad_norm": 0.12965305149555206, "learning_rate": 1e-06, "loss": 0.0126, "step": 395 }, { "clip_ratio/high_max": 0.0037276651346473955, "clip_ratio/high_mean": 0.0016599319969827775, "clip_ratio/low_mean": 0.0019110107932647225, "clip_ratio/low_min": 0.00022249330504564568, "clip_ratio/region_mean": 0.003570942753867712, "epoch": 0.9238845144356955, "grad_norm": 0.10834057629108429, "learning_rate": 1e-06, "loss": 0.0122, "step": 396 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0379464285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3632.0, "completions/mean_length": 733.1138916015625, "completions/mean_terminated_length": 600.470947265625, "completions/min_length": 144.0, "completions/min_terminated_length": 144.0, "epoch": 0.926217556138816, "grad_norm": 0.1775915026664734, "learning_rate": 1e-06, "loss": -0.0025, "num_tokens": 58171046.0, "reward": 0.5870535969734192, "reward_std": 0.16450344026088715, "rewards/verify_math_reward/mean": 0.5870535969734192, "rewards/verify_math_reward/std": 0.49263834953308105, "step": 397 }, { "clip_ratio/high_max": 0.002447315797326155, "clip_ratio/high_mean": 0.000885686073161196, "clip_ratio/low_mean": 0.0007784778263157932, "clip_ratio/low_min": 1.3691128515347373e-05, "clip_ratio/region_mean": 0.0016641639231238514, "epoch": 0.9285505978419364, "grad_norm": 0.16023185849189758, "learning_rate": 1e-06, "loss": -0.0026, "step": 398 }, { "clip_ratio/high_max": 0.002973697461129632, "clip_ratio/high_mean": 0.0011252460244577378, "clip_ratio/low_mean": 0.001211413720739074, "clip_ratio/low_min": 3.149408075842075e-05, "clip_ratio/region_mean": 0.0023366597379208542, "epoch": 0.9308836395450568, "grad_norm": 0.11900816857814789, "learning_rate": 1e-06, "loss": -0.0029, "step": 399 }, { "clip_ratio/high_max": 0.0036114540707785636, "clip_ratio/high_mean": 0.0014400892468984239, "clip_ratio/low_mean": 0.0017501140719105024, "clip_ratio/low_min": 7.46179575799033e-05, "clip_ratio/region_mean": 0.003190203358826693, "epoch": 0.9332166812481774, "grad_norm": 0.10340147465467453, "learning_rate": 1e-06, "loss": -0.0032, "step": 400 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.03125, "completions/max_length": 4096.0, "completions/max_terminated_length": 3191.0, "completions/mean_length": 678.8404541015625, "completions/mean_terminated_length": 568.6094360351562, "completions/min_length": 140.0, "completions/min_terminated_length": 140.0, "epoch": 0.9355497229512978, "grad_norm": 0.19146040081977844, "learning_rate": 1e-06, "loss": 0.0087, "num_tokens": 58750119.0, "reward": 0.598214328289032, "reward_std": 0.1705547422170639, "rewards/verify_math_reward/mean": 0.5982142686843872, "rewards/verify_math_reward/std": 0.49053287506103516, "step": 401 }, { "clip_ratio/high_max": 0.0027311747180647217, "clip_ratio/high_mean": 0.0010011168033088325, "clip_ratio/low_mean": 0.0009044360249390593, "clip_ratio/low_min": 0.00011685471326927654, "clip_ratio/region_mean": 0.001905552842799807, "epoch": 0.9378827646544182, "grad_norm": 0.15168845653533936, "learning_rate": 1e-06, "loss": 0.0086, "step": 402 }, { "clip_ratio/high_max": 0.003238987599615939, "clip_ratio/high_mean": 0.001256776216905564, "clip_ratio/low_mean": 0.001312849057285348, "clip_ratio/low_min": 0.00018910317157860845, "clip_ratio/region_mean": 0.0025696252268971875, "epoch": 0.9402158063575387, "grad_norm": 0.12434040755033493, "learning_rate": 1e-06, "loss": 0.0082, "step": 403 }, { "clip_ratio/high_max": 0.004443838945007883, "clip_ratio/high_mean": 0.0016180718666873872, "clip_ratio/low_mean": 0.0019500963353493717, "clip_ratio/low_min": 0.00018892160915129352, "clip_ratio/region_mean": 0.00356816819839878, "epoch": 0.9425488480606591, "grad_norm": 0.10805650800466537, "learning_rate": 1e-06, "loss": 0.0079, "step": 404 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0279017857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 2747.0, "completions/mean_length": 685.5569458007812, "completions/mean_terminated_length": 587.668212890625, "completions/min_length": 139.0, "completions/min_terminated_length": 139.0, "epoch": 0.9448818897637795, "grad_norm": 0.20526070892810822, "learning_rate": 1e-06, "loss": -0.0035, "num_tokens": 59361890.0, "reward": 0.5613839626312256, "reward_std": 0.18614405393600464, "rewards/verify_math_reward/mean": 0.5613839030265808, "rewards/verify_math_reward/std": 0.496494859457016, "step": 405 }, { "clip_ratio/high_max": 0.002885247733502183, "clip_ratio/high_mean": 0.0010342465939174872, "clip_ratio/low_mean": 0.0009991056504077278, "clip_ratio/low_min": 5.837020216858946e-05, "clip_ratio/region_mean": 0.002033352247963194, "epoch": 0.9472149314669, "grad_norm": 0.15546680986881256, "learning_rate": 1e-06, "loss": -0.0036, "step": 406 }, { "clip_ratio/high_max": 0.0041122128095594235, "clip_ratio/high_mean": 0.0013555393306887709, "clip_ratio/low_mean": 0.0015122175609576516, "clip_ratio/low_min": 9.730963756737765e-05, "clip_ratio/region_mean": 0.0028677568770945072, "epoch": 0.9495479731700204, "grad_norm": 0.13455148041248322, "learning_rate": 1e-06, "loss": -0.004, "step": 407 }, { "clip_ratio/high_max": 0.0054704915746697225, "clip_ratio/high_mean": 0.0018156521364289802, "clip_ratio/low_mean": 0.0021746665879618376, "clip_ratio/low_min": 0.00016912786304601468, "clip_ratio/region_mean": 0.003990318757132627, "epoch": 0.9518810148731408, "grad_norm": 0.11528573930263519, "learning_rate": 1e-06, "loss": -0.0043, "step": 408 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.049107142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3779.0, "completions/mean_length": 809.4542846679688, "completions/mean_terminated_length": 639.7265625, "completions/min_length": 144.0, "completions/min_terminated_length": 144.0, "epoch": 0.9542140565762613, "grad_norm": 0.18671628832817078, "learning_rate": 1e-06, "loss": -0.0167, "num_tokens": 59994401.0, "reward": 0.5379464626312256, "reward_std": 0.1624750792980194, "rewards/verify_math_reward/mean": 0.5379464030265808, "rewards/verify_math_reward/std": 0.4988364577293396, "step": 409 }, { "clip_ratio/high_max": 0.00202916701164213, "clip_ratio/high_mean": 0.0007917138300399529, "clip_ratio/low_mean": 0.0008262629071396077, "clip_ratio/low_min": 9.870728899841197e-05, "clip_ratio/region_mean": 0.0016179767408175394, "epoch": 0.9565470982793818, "grad_norm": 0.14466002583503723, "learning_rate": 1e-06, "loss": -0.0168, "step": 410 }, { "clip_ratio/high_max": 0.0027568448640522547, "clip_ratio/high_mean": 0.0010336395534977783, "clip_ratio/low_mean": 0.001176010860945098, "clip_ratio/low_min": 0.00017182665396830998, "clip_ratio/region_mean": 0.002209650439908728, "epoch": 0.9588801399825022, "grad_norm": 0.11883289366960526, "learning_rate": 1e-06, "loss": -0.0171, "step": 411 }, { "clip_ratio/high_max": 0.003965290539781563, "clip_ratio/high_mean": 0.001435778867744375, "clip_ratio/low_mean": 0.0016489656372868922, "clip_ratio/low_min": 0.00021331881362129934, "clip_ratio/region_mean": 0.0030847444650135003, "epoch": 0.9612131816856226, "grad_norm": 0.10178620368242264, "learning_rate": 1e-06, "loss": -0.0173, "step": 412 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3751.0, "completions/mean_length": 657.2489013671875, "completions/mean_terminated_length": 606.6217041015625, "completions/min_length": 84.0, "completions/min_terminated_length": 84.0, "epoch": 0.9635462233887431, "grad_norm": 0.2038157731294632, "learning_rate": 1e-06, "loss": 0.0034, "num_tokens": 60622992.0, "reward": 0.6194196939468384, "reward_std": 0.21388129889965057, "rewards/verify_math_reward/mean": 0.6194196343421936, "rewards/verify_math_reward/std": 0.48580074310302734, "step": 413 }, { "clip_ratio/high_max": 0.002602387889055535, "clip_ratio/high_mean": 0.0011320918820274528, "clip_ratio/low_mean": 0.0011194719845661893, "clip_ratio/low_min": 0.00022245531363296323, "clip_ratio/region_mean": 0.002251563884783536, "epoch": 0.9658792650918635, "grad_norm": 0.16812457144260406, "learning_rate": 1e-06, "loss": 0.0033, "step": 414 }, { "clip_ratio/high_max": 0.0034457334913895465, "clip_ratio/high_mean": 0.0014259210729505867, "clip_ratio/low_mean": 0.0015751544342492707, "clip_ratio/low_min": 0.0003133871505269781, "clip_ratio/region_mean": 0.0030010755290277302, "epoch": 0.9682123067949839, "grad_norm": 0.1362789273262024, "learning_rate": 1e-06, "loss": 0.0029, "step": 415 }, { "clip_ratio/high_max": 0.004355253950052429, "clip_ratio/high_mean": 0.0018887740661739372, "clip_ratio/low_mean": 0.0022385373813449405, "clip_ratio/low_min": 0.00042794342880370095, "clip_ratio/region_mean": 0.0041273114329669625, "epoch": 0.9705453484981044, "grad_norm": 0.11682573705911636, "learning_rate": 1e-06, "loss": 0.0025, "step": 416 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.025669642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3668.0, "completions/mean_length": 647.052490234375, "completions/mean_terminated_length": 556.186767578125, "completions/min_length": 154.0, "completions/min_terminated_length": 154.0, "epoch": 0.9728783902012248, "grad_norm": 0.20953932404518127, "learning_rate": 1e-06, "loss": -0.0018, "num_tokens": 61196439.0, "reward": 0.5457589626312256, "reward_std": 0.1935766041278839, "rewards/verify_math_reward/mean": 0.5457589030265808, "rewards/verify_math_reward/std": 0.4981797933578491, "step": 417 }, { "clip_ratio/high_max": 0.002564146889199037, "clip_ratio/high_mean": 0.0010276731045451015, "clip_ratio/low_mean": 0.0009804080254980363, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0020080811591469683, "epoch": 0.9752114319043453, "grad_norm": 0.16335834562778473, "learning_rate": 1e-06, "loss": -0.002, "step": 418 }, { "clip_ratio/high_max": 0.003282769837824162, "clip_ratio/high_mean": 0.0013788585674774367, "clip_ratio/low_mean": 0.0014575193636119366, "clip_ratio/low_min": 6.77219832141418e-05, "clip_ratio/region_mean": 0.0028363778546918184, "epoch": 0.9775444736074658, "grad_norm": 0.13176748156547546, "learning_rate": 1e-06, "loss": -0.0024, "step": 419 }, { "clip_ratio/high_max": 0.004400962279760279, "clip_ratio/high_mean": 0.0018007998769462574, "clip_ratio/low_mean": 0.00214578011946287, "clip_ratio/low_min": 6.947611109353602e-05, "clip_ratio/region_mean": 0.003946580007323064, "epoch": 0.9798775153105862, "grad_norm": 0.11315272748470306, "learning_rate": 1e-06, "loss": -0.0027, "step": 420 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.024553571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3039.0, "completions/mean_length": 647.8515625, "completions/mean_terminated_length": 561.0560302734375, "completions/min_length": 164.0, "completions/min_terminated_length": 164.0, "epoch": 0.9822105570137066, "grad_norm": 0.21322235465049744, "learning_rate": 1e-06, "loss": -0.0001, "num_tokens": 61768994.0, "reward": 0.5647321939468384, "reward_std": 0.16345219314098358, "rewards/verify_math_reward/mean": 0.5647321343421936, "rewards/verify_math_reward/std": 0.49606895446777344, "step": 421 }, { "clip_ratio/high_max": 0.0022108581761131063, "clip_ratio/high_mean": 0.0007710883292020299, "clip_ratio/low_mean": 0.0009346009537694044, "clip_ratio/low_min": 1.2577983397932258e-05, "clip_ratio/region_mean": 0.0017056892975233495, "epoch": 0.9845435987168271, "grad_norm": 0.15555015206336975, "learning_rate": 1e-06, "loss": -0.0002, "step": 422 }, { "clip_ratio/high_max": 0.0031601890550518874, "clip_ratio/high_mean": 0.0010656489703251282, "clip_ratio/low_mean": 0.0014062915288377553, "clip_ratio/low_min": 5.9959162172162905e-05, "clip_ratio/region_mean": 0.0024719405046198517, "epoch": 0.9868766404199475, "grad_norm": 0.12208113074302673, "learning_rate": 1e-06, "loss": -0.0006, "step": 423 }, { "clip_ratio/high_max": 0.0044620617190958, "clip_ratio/high_mean": 0.0014520882214128505, "clip_ratio/low_mean": 0.00199620021157898, "clip_ratio/low_min": 8.458552474621683e-05, "clip_ratio/region_mean": 0.0034482883638702333, "epoch": 0.9892096821230679, "grad_norm": 0.10144519805908203, "learning_rate": 1e-06, "loss": -0.0008, "step": 424 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0424107142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 4038.0, "completions/mean_length": 716.9308471679688, "completions/mean_terminated_length": 567.2750854492188, "completions/min_length": 88.0, "completions/min_terminated_length": 88.0, "epoch": 0.9915427238261884, "grad_norm": 0.21655309200286865, "learning_rate": 1e-06, "loss": -0.013, "num_tokens": 62352220.0, "reward": 0.5267857313156128, "reward_std": 0.19997519254684448, "rewards/verify_math_reward/mean": 0.5267857313156128, "rewards/verify_math_reward/std": 0.4995608627796173, "step": 425 }, { "clip_ratio/high_max": 0.002403864353254903, "clip_ratio/high_mean": 0.00097228530466964, "clip_ratio/low_mean": 0.000971529574599117, "clip_ratio/low_min": 4.872150930168573e-05, "clip_ratio/region_mean": 0.001943814946571365, "epoch": 0.9938757655293088, "grad_norm": 0.16491401195526123, "learning_rate": 1e-06, "loss": -0.0131, "step": 426 }, { "clip_ratio/high_max": 0.003585181388189085, "clip_ratio/high_mean": 0.0014034464475116692, "clip_ratio/low_mean": 0.0014284370045061223, "clip_ratio/low_min": 8.967983194452245e-05, "clip_ratio/region_mean": 0.0028318834956735373, "epoch": 0.9962088072324293, "grad_norm": 0.12873291969299316, "learning_rate": 1e-06, "loss": -0.0134, "step": 427 }, { "clip_ratio/high_max": 0.004441768382093869, "clip_ratio/high_mean": 0.0018141897635359783, "clip_ratio/low_mean": 0.0021208556281635538, "clip_ratio/low_min": 0.00014593701780540869, "clip_ratio/region_mean": 0.003935045417165384, "epoch": 0.9985418489355498, "grad_norm": 0.11089959740638733, "learning_rate": 1e-06, "loss": -0.0138, "step": 428 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.029017857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 2194.0, "completions/mean_length": 648.7131958007812, "completions/mean_terminated_length": 545.6907958984375, "completions/min_length": 163.0, "completions/min_terminated_length": 163.0, "epoch": 1.0023330417031204, "grad_norm": 0.2355014681816101, "learning_rate": 1e-06, "loss": -0.0062, "num_tokens": 62917395.0, "reward": 0.6194196939468384, "reward_std": 0.20793874561786652, "rewards/verify_math_reward/mean": 0.6194196343421936, "rewards/verify_math_reward/std": 0.48580074310302734, "step": 429 }, { "clip_ratio/high_max": 0.0025678299280116335, "clip_ratio/high_mean": 0.0011635770388238598, "clip_ratio/low_mean": 0.0009015460582304513, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0020651230370276608, "epoch": 1.0046660834062409, "grad_norm": 0.17264880239963531, "learning_rate": 1e-06, "loss": -0.0063, "step": 430 }, { "clip_ratio/high_max": 0.0034158040798502043, "clip_ratio/high_mean": 0.0016181860264623538, "clip_ratio/low_mean": 0.0014406662849069107, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0030588523513870314, "epoch": 1.0069991251093613, "grad_norm": 0.13857561349868774, "learning_rate": 1e-06, "loss": -0.0067, "step": 431 }, { "clip_ratio/high_max": 0.004653934563975781, "clip_ratio/high_mean": 0.0021315110570867546, "clip_ratio/low_mean": 0.002095640593324788, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.004227151730447076, "epoch": 1.0093321668124817, "grad_norm": 0.10838635265827179, "learning_rate": 1e-06, "loss": -0.0071, "step": 432 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0345982142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 4034.0, "completions/mean_length": 668.75, "completions/mean_terminated_length": 545.9237060546875, "completions/min_length": 142.0, "completions/min_terminated_length": 142.0, "epoch": 1.0116652085156022, "grad_norm": 0.20345765352249146, "learning_rate": 1e-06, "loss": -0.0099, "num_tokens": 63486067.0, "reward": 0.6160714626312256, "reward_std": 0.15660808980464935, "rewards/verify_math_reward/mean": 0.6160714030265808, "rewards/verify_math_reward/std": 0.486612468957901, "step": 433 }, { "clip_ratio/high_max": 0.0024225516244769096, "clip_ratio/high_mean": 0.0009241931438737083, "clip_ratio/low_mean": 0.0008500129588355776, "clip_ratio/low_min": 1.5269974028342403e-05, "clip_ratio/region_mean": 0.0017742060954333283, "epoch": 1.0139982502187226, "grad_norm": 0.16214466094970703, "learning_rate": 1e-06, "loss": -0.01, "step": 434 }, { "clip_ratio/high_max": 0.0033991930831689388, "clip_ratio/high_mean": 0.0013122124219080433, "clip_ratio/low_mean": 0.0013200318353483453, "clip_ratio/low_min": 8.488964522257447e-05, "clip_ratio/region_mean": 0.002632244271808304, "epoch": 1.016331291921843, "grad_norm": 0.12308251112699509, "learning_rate": 1e-06, "loss": -0.0103, "step": 435 }, { "clip_ratio/high_max": 0.004655162832932547, "clip_ratio/high_mean": 0.0017371050198562443, "clip_ratio/low_mean": 0.0018231674475828186, "clip_ratio/low_min": 8.488964522257447e-05, "clip_ratio/region_mean": 0.003560272467439063, "epoch": 1.0186643336249634, "grad_norm": 0.10307466983795166, "learning_rate": 1e-06, "loss": -0.0106, "step": 436 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0401785714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3810.0, "completions/mean_length": 728.044677734375, "completions/mean_terminated_length": 587.0604858398438, "completions/min_length": 114.0, "completions/min_terminated_length": 114.0, "epoch": 1.020997375328084, "grad_norm": 0.2075859159231186, "learning_rate": 1e-06, "loss": -0.0169, "num_tokens": 64083939.0, "reward": 0.5658482313156128, "reward_std": 0.1734876036643982, "rewards/verify_math_reward/mean": 0.5658482313156128, "rewards/verify_math_reward/std": 0.49592188000679016, "step": 437 }, { "clip_ratio/high_max": 0.0027714637690223753, "clip_ratio/high_mean": 0.0009435021602257621, "clip_ratio/low_mean": 0.0009021788664540509, "clip_ratio/low_min": 5.918410897720605e-05, "clip_ratio/region_mean": 0.00184568103577476, "epoch": 1.0233304170312045, "grad_norm": 0.15365564823150635, "learning_rate": 1e-06, "loss": -0.017, "step": 438 }, { "clip_ratio/high_max": 0.004012125398730859, "clip_ratio/high_mean": 0.0012697527272393927, "clip_ratio/low_mean": 0.0013391255470196484, "clip_ratio/low_min": 9.317967851529829e-05, "clip_ratio/region_mean": 0.0026088782542501576, "epoch": 1.025663458734325, "grad_norm": 0.13083483278751373, "learning_rate": 1e-06, "loss": -0.0174, "step": 439 }, { "clip_ratio/high_max": 0.005090185011795256, "clip_ratio/high_mean": 0.001745759065670427, "clip_ratio/low_mean": 0.001986657236557221, "clip_ratio/low_min": 0.0001129777156165801, "clip_ratio/region_mean": 0.003732416211278178, "epoch": 1.0279965004374454, "grad_norm": 0.11261268705129623, "learning_rate": 1e-06, "loss": -0.0177, "step": 440 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0424107142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 2600.0, "completions/mean_length": 722.7142944335938, "completions/mean_terminated_length": 573.314697265625, "completions/min_length": 96.0, "completions/min_terminated_length": 96.0, "epoch": 1.0303295421405658, "grad_norm": 0.21606624126434326, "learning_rate": 1e-06, "loss": 0.0013, "num_tokens": 64680219.0, "reward": 0.5446428656578064, "reward_std": 0.16401740908622742, "rewards/verify_math_reward/mean": 0.5446428656578064, "rewards/verify_math_reward/std": 0.49828118085861206, "step": 441 }, { "clip_ratio/high_max": 0.0025396077398909256, "clip_ratio/high_mean": 0.0009883620350592537, "clip_ratio/low_mean": 0.0010189603544858983, "clip_ratio/low_min": 4.460460877453443e-05, "clip_ratio/region_mean": 0.0020073223786312155, "epoch": 1.0326625838436863, "grad_norm": 0.16001273691654205, "learning_rate": 1e-06, "loss": 0.0011, "step": 442 }, { "clip_ratio/high_max": 0.003260525998484809, "clip_ratio/high_mean": 0.0013401331052591559, "clip_ratio/low_mean": 0.0015497480308113154, "clip_ratio/low_min": 0.00010330160421290202, "clip_ratio/region_mean": 0.002889881143346429, "epoch": 1.0349956255468067, "grad_norm": 0.1283690184354782, "learning_rate": 1e-06, "loss": 0.0008, "step": 443 }, { "clip_ratio/high_max": 0.0040881314416765235, "clip_ratio/high_mean": 0.0016648460550641175, "clip_ratio/low_mean": 0.0021845515657332726, "clip_ratio/low_min": 0.00024445776944048703, "clip_ratio/region_mean": 0.003849397544399835, "epoch": 1.0373286672499271, "grad_norm": 0.10615712404251099, "learning_rate": 1e-06, "loss": 0.0005, "step": 444 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 2930.0, "completions/mean_length": 581.9364013671875, "completions/mean_terminated_length": 534.2341918945312, "completions/min_length": 105.0, "completions/min_terminated_length": 105.0, "epoch": 1.0396617089530475, "grad_norm": 0.21255074441432953, "learning_rate": 1e-06, "loss": -0.001, "num_tokens": 65247058.0, "reward": 0.660714328289032, "reward_std": 0.15649932622909546, "rewards/verify_math_reward/mean": 0.6607142686843872, "rewards/verify_math_reward/std": 0.4737313687801361, "step": 445 }, { "clip_ratio/high_max": 0.002600744614028372, "clip_ratio/high_mean": 0.0010431954797240905, "clip_ratio/low_mean": 0.0005680154295077955, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0016112109005916864, "epoch": 1.041994750656168, "grad_norm": 0.15392909944057465, "learning_rate": 1e-06, "loss": -0.001, "step": 446 }, { "clip_ratio/high_max": 0.0038975388670223765, "clip_ratio/high_mean": 0.0014430998744501267, "clip_ratio/low_mean": 0.0010503042367417947, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002493404143024236, "epoch": 1.0443277923592884, "grad_norm": 0.1171441599726677, "learning_rate": 1e-06, "loss": -0.0014, "step": 447 }, { "clip_ratio/high_max": 0.0043462939211167395, "clip_ratio/high_mean": 0.0017867828646558337, "clip_ratio/low_mean": 0.0014104585334280273, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.003197241443558596, "epoch": 1.0466608340624088, "grad_norm": 0.1055760383605957, "learning_rate": 1e-06, "loss": -0.0016, "step": 448 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.022321428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3834.0, "completions/mean_length": 661.0748291015625, "completions/mean_terminated_length": 582.6517944335938, "completions/min_length": 160.0, "completions/min_terminated_length": 160.0, "epoch": 1.0489938757655293, "grad_norm": 0.19000589847564697, "learning_rate": 1e-06, "loss": -0.0144, "num_tokens": 65848093.0, "reward": 0.609375, "reward_std": 0.1326035112142563, "rewards/verify_math_reward/mean": 0.609375, "rewards/verify_math_reward/std": 0.48816296458244324, "step": 449 }, { "clip_ratio/high_max": 0.0020253730835975148, "clip_ratio/high_mean": 0.0008690582762937993, "clip_ratio/low_mean": 0.0007496903099308838, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0016187485780392308, "epoch": 1.0513269174686497, "grad_norm": 0.1457156389951706, "learning_rate": 1e-06, "loss": -0.0145, "step": 450 }, { "clip_ratio/high_max": 0.00266711697622668, "clip_ratio/high_mean": 0.0011617431591730565, "clip_ratio/low_mean": 0.0010922782557827304, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0022540214704349637, "epoch": 1.0536599591717701, "grad_norm": 0.11161735653877258, "learning_rate": 1e-06, "loss": -0.0148, "step": 451 }, { "clip_ratio/high_max": 0.0037150537536945194, "clip_ratio/high_mean": 0.0015533559671894182, "clip_ratio/low_mean": 0.001505238870777248, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.003058594898902811, "epoch": 1.0559930008748906, "grad_norm": 0.09098061919212341, "learning_rate": 1e-06, "loss": -0.015, "step": 452 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0323660714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 2158.0, "completions/mean_length": 676.1551513671875, "completions/mean_terminated_length": 561.765869140625, "completions/min_length": 138.0, "completions/min_terminated_length": 138.0, "epoch": 1.058326042578011, "grad_norm": 0.21457985043525696, "learning_rate": 1e-06, "loss": 0.0011, "num_tokens": 66446592.0, "reward": 0.5267857313156128, "reward_std": 0.16322463750839233, "rewards/verify_math_reward/mean": 0.5267857313156128, "rewards/verify_math_reward/std": 0.4995608627796173, "step": 453 }, { "clip_ratio/high_max": 0.0028716153974528424, "clip_ratio/high_mean": 0.0010389866856712615, "clip_ratio/low_mean": 0.0009301038044213783, "clip_ratio/low_min": 1.6344141840818338e-05, "clip_ratio/region_mean": 0.001969090517377481, "epoch": 1.0606590842811314, "grad_norm": 0.15919674932956696, "learning_rate": 1e-06, "loss": 0.0009, "step": 454 }, { "clip_ratio/high_max": 0.003920428433048073, "clip_ratio/high_mean": 0.0013767091986665037, "clip_ratio/low_mean": 0.0013608229055535048, "clip_ratio/low_min": 3.393051156308502e-05, "clip_ratio/region_mean": 0.002737532093306072, "epoch": 1.0629921259842519, "grad_norm": 0.12705349922180176, "learning_rate": 1e-06, "loss": 0.0006, "step": 455 }, { "clip_ratio/high_max": 0.004810141050256789, "clip_ratio/high_mean": 0.001804948267817963, "clip_ratio/low_mean": 0.0019217152948840521, "clip_ratio/low_min": 5.089576370664872e-05, "clip_ratio/region_mean": 0.003726663562702015, "epoch": 1.0653251676873725, "grad_norm": 0.1068810224533081, "learning_rate": 1e-06, "loss": 0.0003, "step": 456 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.025669642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3395.0, "completions/mean_length": 658.078125, "completions/mean_terminated_length": 567.5028686523438, "completions/min_length": 131.0, "completions/min_terminated_length": 131.0, "epoch": 1.067658209390493, "grad_norm": 0.21178612112998962, "learning_rate": 1e-06, "loss": 0.0065, "num_tokens": 67029934.0, "reward": 0.5736607313156128, "reward_std": 0.15300673246383667, "rewards/verify_math_reward/mean": 0.5736607313156128, "rewards/verify_math_reward/std": 0.4948205351829529, "step": 457 }, { "clip_ratio/high_max": 0.0022588237916352227, "clip_ratio/high_mean": 0.0008791460604697932, "clip_ratio/low_mean": 0.0007500107712985482, "clip_ratio/low_min": 1.198695827042684e-05, "clip_ratio/region_mean": 0.0016291568099404685, "epoch": 1.0699912510936134, "grad_norm": 0.16025596857070923, "learning_rate": 1e-06, "loss": 0.0064, "step": 458 }, { "clip_ratio/high_max": 0.0030497866682708263, "clip_ratio/high_mean": 0.0011408918890083442, "clip_ratio/low_mean": 0.0011933429959753994, "clip_ratio/low_min": 4.049676135764457e-05, "clip_ratio/region_mean": 0.002334234901354648, "epoch": 1.0723242927967338, "grad_norm": 0.122371144592762, "learning_rate": 1e-06, "loss": 0.0061, "step": 459 }, { "clip_ratio/high_max": 0.004076709185028449, "clip_ratio/high_mean": 0.001511773054517107, "clip_ratio/low_mean": 0.001708974228677107, "clip_ratio/low_min": 3.596087481128052e-05, "clip_ratio/region_mean": 0.0032207472395384684, "epoch": 1.0746573344998542, "grad_norm": 0.09936907142400742, "learning_rate": 1e-06, "loss": 0.0058, "step": 460 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0200892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3040.0, "completions/mean_length": 652.505615234375, "completions/mean_terminated_length": 581.9100341796875, "completions/min_length": 137.0, "completions/min_terminated_length": 137.0, "epoch": 1.0769903762029747, "grad_norm": 0.2032153457403183, "learning_rate": 1e-06, "loss": -0.0084, "num_tokens": 67628691.0, "reward": 0.6651785969734192, "reward_std": 0.15728957951068878, "rewards/verify_math_reward/mean": 0.6651785969734192, "rewards/verify_math_reward/std": 0.47219157218933105, "step": 461 }, { "clip_ratio/high_max": 0.001916628927574493, "clip_ratio/high_mean": 0.0008462735204375349, "clip_ratio/low_mean": 0.0009258620812033769, "clip_ratio/low_min": 3.087944787694141e-05, "clip_ratio/region_mean": 0.0017721356489346363, "epoch": 1.079323417906095, "grad_norm": 0.15796014666557312, "learning_rate": 1e-06, "loss": -0.0084, "step": 462 }, { "clip_ratio/high_max": 0.0026852015434997156, "clip_ratio/high_mean": 0.0011735382558981655, "clip_ratio/low_mean": 0.0013067050731478957, "clip_ratio/low_min": 6.175889575388283e-05, "clip_ratio/region_mean": 0.0024802433254080825, "epoch": 1.0816564596092155, "grad_norm": 0.13092151284217834, "learning_rate": 1e-06, "loss": -0.0087, "step": 463 }, { "clip_ratio/high_max": 0.0031940455082803965, "clip_ratio/high_mean": 0.001424788912117947, "clip_ratio/low_mean": 0.0018441887696099002, "clip_ratio/low_min": 6.754323112545535e-05, "clip_ratio/region_mean": 0.003268977758125402, "epoch": 1.083989501312336, "grad_norm": 0.10226713865995407, "learning_rate": 1e-06, "loss": -0.009, "step": 464 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3406.0, "completions/mean_length": 617.9330444335938, "completions/mean_terminated_length": 554.6954345703125, "completions/min_length": 133.0, "completions/min_terminated_length": 133.0, "epoch": 1.0863225430154564, "grad_norm": 0.21115198731422424, "learning_rate": 1e-06, "loss": 0.011, "num_tokens": 68201943.0, "reward": 0.6116071939468384, "reward_std": 0.15642264485359192, "rewards/verify_math_reward/mean": 0.6116071343421936, "rewards/verify_math_reward/std": 0.4876568913459778, "step": 465 }, { "clip_ratio/high_max": 0.0023514752901974134, "clip_ratio/high_mean": 0.0008778714036452584, "clip_ratio/low_mean": 0.000709651594661409, "clip_ratio/low_min": 4.033125514979474e-05, "clip_ratio/region_mean": 0.001587523005582625, "epoch": 1.0886555847185768, "grad_norm": 0.1636190563440323, "learning_rate": 1e-06, "loss": 0.0109, "step": 466 }, { "clip_ratio/high_max": 0.002951555241452297, "clip_ratio/high_mean": 0.0011870932075908058, "clip_ratio/low_mean": 0.0010537585549172945, "clip_ratio/low_min": 6.163716898299754e-05, "clip_ratio/region_mean": 0.0022408517106669024, "epoch": 1.0909886264216972, "grad_norm": 0.11352956295013428, "learning_rate": 1e-06, "loss": 0.0106, "step": 467 }, { "clip_ratio/high_max": 0.003606210957514122, "clip_ratio/high_mean": 0.0015339023157139309, "clip_ratio/low_mean": 0.0015272204727807548, "clip_ratio/low_min": 8.920070831663907e-05, "clip_ratio/region_mean": 0.003061122784856707, "epoch": 1.0933216681248177, "grad_norm": 0.09395275264978409, "learning_rate": 1e-06, "loss": 0.0104, "step": 468 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.044642857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3194.0, "completions/mean_length": 721.4185791015625, "completions/mean_terminated_length": 563.727783203125, "completions/min_length": 117.0, "completions/min_terminated_length": 117.0, "epoch": 1.0956547098279381, "grad_norm": 0.19188307225704193, "learning_rate": 1e-06, "loss": -0.0116, "num_tokens": 68758846.0, "reward": 0.660714328289032, "reward_std": 0.15968744456768036, "rewards/verify_math_reward/mean": 0.6607142686843872, "rewards/verify_math_reward/std": 0.4737313687801361, "step": 469 }, { "clip_ratio/high_max": 0.002506887714844197, "clip_ratio/high_mean": 0.0009400933595316019, "clip_ratio/low_mean": 0.0006335680136544397, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001573661393194925, "epoch": 1.0979877515310585, "grad_norm": 0.14592349529266357, "learning_rate": 1e-06, "loss": -0.0117, "step": 470 }, { "clip_ratio/high_max": 0.0032938888616627082, "clip_ratio/high_mean": 0.0012763892955263145, "clip_ratio/low_mean": 0.000980786191576044, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002257175525301136, "epoch": 1.100320793234179, "grad_norm": 0.11236696690320969, "learning_rate": 1e-06, "loss": -0.0119, "step": 471 }, { "clip_ratio/high_max": 0.003995928855147213, "clip_ratio/high_mean": 0.001544177379400935, "clip_ratio/low_mean": 0.0013732063707720954, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0029173837538110092, "epoch": 1.1026538349372994, "grad_norm": 0.09258055686950684, "learning_rate": 1e-06, "loss": -0.0121, "step": 472 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.029017857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3034.0, "completions/mean_length": 630.0357666015625, "completions/mean_terminated_length": 526.4552001953125, "completions/min_length": 102.0, "completions/min_terminated_length": 102.0, "epoch": 1.10498687664042, "grad_norm": 0.22358477115631104, "learning_rate": 1e-06, "loss": -0.0044, "num_tokens": 69290486.0, "reward": 0.6774553656578064, "reward_std": 0.14710471034049988, "rewards/verify_math_reward/mean": 0.6774553656578064, "rewards/verify_math_reward/std": 0.4677111804485321, "step": 473 }, { "clip_ratio/high_max": 0.002306347931153141, "clip_ratio/high_mean": 0.0008877994350768859, "clip_ratio/low_mean": 0.0007401595989904308, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001627959052711958, "epoch": 1.1073199183435405, "grad_norm": 0.15918849408626556, "learning_rate": 1e-06, "loss": -0.0046, "step": 474 }, { "clip_ratio/high_max": 0.0034575856625451706, "clip_ratio/high_mean": 0.0013134165301380563, "clip_ratio/low_mean": 0.0012974281617061934, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0026108447200385854, "epoch": 1.109652960046661, "grad_norm": 0.12264557927846909, "learning_rate": 1e-06, "loss": -0.0049, "step": 475 }, { "clip_ratio/high_max": 0.0045041953708278015, "clip_ratio/high_mean": 0.0016967974843282718, "clip_ratio/low_mean": 0.0019061095208599, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.003602907083404716, "epoch": 1.1119860017497813, "grad_norm": 0.0992920845746994, "learning_rate": 1e-06, "loss": -0.0051, "step": 476 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.03125, "completions/max_length": 4096.0, "completions/max_terminated_length": 2169.0, "completions/mean_length": 683.8035888671875, "completions/mean_terminated_length": 573.7327270507812, "completions/min_length": 127.0, "completions/min_terminated_length": 127.0, "epoch": 1.1143190434529018, "grad_norm": 0.22281140089035034, "learning_rate": 1e-06, "loss": -0.0076, "num_tokens": 69885126.0, "reward": 0.606026828289032, "reward_std": 0.18798920512199402, "rewards/verify_math_reward/mean": 0.6060267686843872, "rewards/verify_math_reward/std": 0.48890194296836853, "step": 477 }, { "clip_ratio/high_max": 0.002515683831006754, "clip_ratio/high_mean": 0.001098281816666713, "clip_ratio/low_mean": 0.0009356910140922992, "clip_ratio/low_min": 3.162955545121804e-05, "clip_ratio/region_mean": 0.002033972821664065, "epoch": 1.1166520851560222, "grad_norm": 0.1639271080493927, "learning_rate": 1e-06, "loss": -0.0077, "step": 478 }, { "clip_ratio/high_max": 0.003652712191978935, "clip_ratio/high_mean": 0.0015059465404192451, "clip_ratio/low_mean": 0.0013621303369291127, "clip_ratio/low_min": 6.325911090243608e-05, "clip_ratio/region_mean": 0.0028680768227786757, "epoch": 1.1189851268591426, "grad_norm": 0.13559524714946747, "learning_rate": 1e-06, "loss": -0.008, "step": 479 }, { "clip_ratio/high_max": 0.00480717200844083, "clip_ratio/high_mean": 0.0019393003713048529, "clip_ratio/low_mean": 0.001997923416638514, "clip_ratio/low_min": 0.00010096930782310665, "clip_ratio/region_mean": 0.00393722380977124, "epoch": 1.121318168562263, "grad_norm": 0.11107579618692398, "learning_rate": 1e-06, "loss": -0.0083, "step": 480 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0345982142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3227.0, "completions/mean_length": 692.5346069335938, "completions/mean_terminated_length": 570.5606689453125, "completions/min_length": 153.0, "completions/min_terminated_length": 153.0, "epoch": 1.1236512102653835, "grad_norm": 0.22359482944011688, "learning_rate": 1e-06, "loss": -0.0088, "num_tokens": 70475861.0, "reward": 0.5703125, "reward_std": 0.1617264747619629, "rewards/verify_math_reward/mean": 0.5703125, "rewards/verify_math_reward/std": 0.49530795216560364, "step": 481 }, { "clip_ratio/high_max": 0.0026844485255423933, "clip_ratio/high_mean": 0.0010383818662376143, "clip_ratio/low_mean": 0.0009371188516524853, "clip_ratio/low_min": 2.11291408049874e-05, "clip_ratio/region_mean": 0.0019755006942432374, "epoch": 1.125984251968504, "grad_norm": 0.20877031981945038, "learning_rate": 1e-06, "loss": -0.0089, "step": 482 }, { "clip_ratio/high_max": 0.0038317930084303953, "clip_ratio/high_mean": 0.0014030474012542982, "clip_ratio/low_mean": 0.0014988795264798682, "clip_ratio/low_min": 5.40116016054526e-05, "clip_ratio/region_mean": 0.0029019269786658697, "epoch": 1.1283172936716244, "grad_norm": 0.1356377899646759, "learning_rate": 1e-06, "loss": -0.0092, "step": 483 }, { "clip_ratio/high_max": 0.00455052824690938, "clip_ratio/high_mean": 0.0016818577205413021, "clip_ratio/low_mean": 0.0020646882621804252, "clip_ratio/low_min": 9.190334822051227e-05, "clip_ratio/region_mean": 0.003746545989997685, "epoch": 1.1306503353747448, "grad_norm": 0.11094866693019867, "learning_rate": 1e-06, "loss": -0.0095, "step": 484 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.024553571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 2890.0, "completions/mean_length": 634.466552734375, "completions/mean_terminated_length": 547.3340454101562, "completions/min_length": 107.0, "completions/min_terminated_length": 107.0, "epoch": 1.1329833770778652, "grad_norm": 0.2206052839756012, "learning_rate": 1e-06, "loss": -0.0071, "num_tokens": 71032799.0, "reward": 0.6618303656578064, "reward_std": 0.15018658339977264, "rewards/verify_math_reward/mean": 0.6618303656578064, "rewards/verify_math_reward/std": 0.4733508229255676, "step": 485 }, { "clip_ratio/high_max": 0.0021790850041725207, "clip_ratio/high_mean": 0.0009035823204612825, "clip_ratio/low_mean": 0.0008458992779196706, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0017494815692771226, "epoch": 1.1353164187809857, "grad_norm": 0.16208414733409882, "learning_rate": 1e-06, "loss": -0.0072, "step": 486 }, { "clip_ratio/high_max": 0.0028241389663890004, "clip_ratio/high_mean": 0.0011764140108425636, "clip_ratio/low_mean": 0.0012130580107623246, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0023894719925010577, "epoch": 1.137649460484106, "grad_norm": 0.11980120837688446, "learning_rate": 1e-06, "loss": -0.0075, "step": 487 }, { "clip_ratio/high_max": 0.0037201778395683505, "clip_ratio/high_mean": 0.001559718257340137, "clip_ratio/low_mean": 0.0017528375719848555, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0033125558111350983, "epoch": 1.1399825021872265, "grad_norm": 0.097875215113163, "learning_rate": 1e-06, "loss": -0.0077, "step": 488 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.030133928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3026.0, "completions/mean_length": 689.7913208007812, "completions/mean_terminated_length": 583.959716796875, "completions/min_length": 170.0, "completions/min_terminated_length": 170.0, "epoch": 1.142315543890347, "grad_norm": 0.2403462678194046, "learning_rate": 1e-06, "loss": 0.0012, "num_tokens": 71630348.0, "reward": 0.5993303656578064, "reward_std": 0.18855473399162292, "rewards/verify_math_reward/mean": 0.5993303656578064, "rewards/verify_math_reward/std": 0.49030786752700806, "step": 489 }, { "clip_ratio/high_max": 0.002373559618717991, "clip_ratio/high_mean": 0.000944201834499836, "clip_ratio/low_mean": 0.001128559266362572, "clip_ratio/low_min": 2.9725268177571706e-05, "clip_ratio/region_mean": 0.002072761068120599, "epoch": 1.1446485855934676, "grad_norm": 0.16639593243598938, "learning_rate": 1e-06, "loss": 0.0011, "step": 490 }, { "clip_ratio/high_max": 0.0030215455044526607, "clip_ratio/high_mean": 0.001276046212296933, "clip_ratio/low_mean": 0.0017108348438341636, "clip_ratio/low_min": 4.8676010919734836e-05, "clip_ratio/region_mean": 0.002986881081596948, "epoch": 1.1469816272965878, "grad_norm": 0.13124065101146698, "learning_rate": 1e-06, "loss": 0.0007, "step": 491 }, { "clip_ratio/high_max": 0.0038660251448163763, "clip_ratio/high_mean": 0.0016847834413056262, "clip_ratio/low_mean": 0.002494680287782103, "clip_ratio/low_min": 9.684655015007593e-05, "clip_ratio/region_mean": 0.004179463678156026, "epoch": 1.1493146689997085, "grad_norm": 0.10689432919025421, "learning_rate": 1e-06, "loss": 0.0004, "step": 492 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 2174.0, "completions/mean_length": 609.2064819335938, "completions/mean_terminated_length": 561.8744506835938, "completions/min_length": 101.0, "completions/min_terminated_length": 101.0, "epoch": 1.151647710702829, "grad_norm": 0.2170422226190567, "learning_rate": 1e-06, "loss": -0.0072, "num_tokens": 72216557.0, "reward": 0.6272321939468384, "reward_std": 0.1533765196800232, "rewards/verify_math_reward/mean": 0.6272321343421936, "rewards/verify_math_reward/std": 0.4838111698627472, "step": 493 }, { "clip_ratio/high_max": 0.0024790230236249045, "clip_ratio/high_mean": 0.0008695018859725678, "clip_ratio/low_mean": 0.0010824769087776076, "clip_ratio/low_min": 9.44787771004485e-05, "clip_ratio/region_mean": 0.0019519788038451225, "epoch": 1.1539807524059493, "grad_norm": 0.16782456636428833, "learning_rate": 1e-06, "loss": -0.0073, "step": 494 }, { "clip_ratio/high_max": 0.003365920187206939, "clip_ratio/high_mean": 0.0011844122873299057, "clip_ratio/low_mean": 0.0016755548749642912, "clip_ratio/low_min": 0.0001371486305288272, "clip_ratio/region_mean": 0.002859967207768932, "epoch": 1.1563137941090698, "grad_norm": 0.13180799782276154, "learning_rate": 1e-06, "loss": -0.0077, "step": 495 }, { "clip_ratio/high_max": 0.0041879126511048526, "clip_ratio/high_mean": 0.0015270216772478307, "clip_ratio/low_mean": 0.0021424044753075577, "clip_ratio/low_min": 0.00031002668765722774, "clip_ratio/region_mean": 0.0036694261361844838, "epoch": 1.1586468358121902, "grad_norm": 0.11174175143241882, "learning_rate": 1e-06, "loss": -0.008, "step": 496 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0580357142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3710.0, "completions/mean_length": 803.5313110351562, "completions/mean_terminated_length": 600.677734375, "completions/min_length": 156.0, "completions/min_terminated_length": 156.0, "epoch": 1.1609798775153106, "grad_norm": 0.18429267406463623, "learning_rate": 1e-06, "loss": 0.0026, "num_tokens": 72813873.0, "reward": 0.5245535969734192, "reward_std": 0.13182073831558228, "rewards/verify_math_reward/mean": 0.5245535969734192, "rewards/verify_math_reward/std": 0.4996756911277771, "step": 497 }, { "clip_ratio/high_max": 0.0021437152645376045, "clip_ratio/high_mean": 0.0007318118241528282, "clip_ratio/low_mean": 0.00070394719568867, "clip_ratio/low_min": 2.1215206288616173e-05, "clip_ratio/region_mean": 0.0014357590043800883, "epoch": 1.163312919218431, "grad_norm": 0.1357305943965912, "learning_rate": 1e-06, "loss": 0.0024, "step": 498 }, { "clip_ratio/high_max": 0.00272973244136665, "clip_ratio/high_mean": 0.0010095107263623504, "clip_ratio/low_mean": 0.0009321053948951885, "clip_ratio/low_min": 3.1822812161408365e-05, "clip_ratio/region_mean": 0.001941616130352486, "epoch": 1.1656459609215515, "grad_norm": 0.10415661334991455, "learning_rate": 1e-06, "loss": 0.0022, "step": 499 }, { "clip_ratio/high_max": 0.0035424725865595974, "clip_ratio/high_mean": 0.0013053411657892866, "clip_ratio/low_mean": 0.0013016939356020885, "clip_ratio/low_min": 4.5662101911148056e-05, "clip_ratio/region_mean": 0.002607035137771163, "epoch": 1.167979002624672, "grad_norm": 0.08420634269714355, "learning_rate": 1e-06, "loss": 0.002, "step": 500 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.024553571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 2551.0, "completions/mean_length": 645.1908569335938, "completions/mean_terminated_length": 558.328369140625, "completions/min_length": 136.0, "completions/min_terminated_length": 136.0, "epoch": 1.1703120443277923, "grad_norm": 0.22355516254901886, "learning_rate": 1e-06, "loss": -0.0038, "num_tokens": 73390332.0, "reward": 0.640625, "reward_std": 0.15570908784866333, "rewards/verify_math_reward/mean": 0.640625, "rewards/verify_math_reward/std": 0.48008525371551514, "step": 501 }, { "clip_ratio/high_max": 0.00243851534469286, "clip_ratio/high_mean": 0.0010534417124290485, "clip_ratio/low_mean": 0.0007751993589408812, "clip_ratio/low_min": 3.675073185149813e-05, "clip_ratio/region_mean": 0.0018286410631844774, "epoch": 1.1726450860309128, "grad_norm": 0.15515461564064026, "learning_rate": 1e-06, "loss": -0.0039, "step": 502 }, { "clip_ratio/high_max": 0.003189172850397881, "clip_ratio/high_mean": 0.0013587860121333506, "clip_ratio/low_mean": 0.0011839194285130361, "clip_ratio/low_min": 6.090445276640821e-05, "clip_ratio/region_mean": 0.0025427054497413337, "epoch": 1.1749781277340332, "grad_norm": 0.11949562281370163, "learning_rate": 1e-06, "loss": -0.0042, "step": 503 }, { "clip_ratio/high_max": 0.004078530982951634, "clip_ratio/high_mean": 0.0017213686296599917, "clip_ratio/low_mean": 0.0016828909174364526, "clip_ratio/low_min": 0.00010462155478307977, "clip_ratio/region_mean": 0.0034042595216305926, "epoch": 1.1773111694371536, "grad_norm": 0.10191718488931656, "learning_rate": 1e-06, "loss": -0.0044, "step": 504 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.025669642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3551.0, "completions/mean_length": 640.685302734375, "completions/mean_terminated_length": 549.6517944335938, "completions/min_length": 114.0, "completions/min_terminated_length": 114.0, "epoch": 1.179644211140274, "grad_norm": 0.27846455574035645, "learning_rate": 1e-06, "loss": -0.0053, "num_tokens": 73952778.0, "reward": 0.6484375, "reward_std": 0.17957350611686707, "rewards/verify_math_reward/mean": 0.6484375, "rewards/verify_math_reward/std": 0.4777248501777649, "step": 505 }, { "clip_ratio/high_max": 0.0034870567469624802, "clip_ratio/high_mean": 0.0013359370823309291, "clip_ratio/low_mean": 0.001117403402531636, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002453340588544961, "epoch": 1.1819772528433945, "grad_norm": 0.19863107800483704, "learning_rate": 1e-06, "loss": -0.0056, "step": 506 }, { "clip_ratio/high_max": 0.004620431820512749, "clip_ratio/high_mean": 0.0018403498179395683, "clip_ratio/low_mean": 0.0018532696885813493, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.003693619481055066, "epoch": 1.184310294546515, "grad_norm": 0.14556729793548584, "learning_rate": 1e-06, "loss": -0.006, "step": 507 }, { "clip_ratio/high_max": 0.00561136617034208, "clip_ratio/high_mean": 0.0022467659509857185, "clip_ratio/low_mean": 0.002562952518928796, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.004809718520846218, "epoch": 1.1866433362496354, "grad_norm": 0.1205429807305336, "learning_rate": 1e-06, "loss": -0.0063, "step": 508 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0323660714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 1805.0, "completions/mean_length": 668.8136596679688, "completions/mean_terminated_length": 554.1787719726562, "completions/min_length": 156.0, "completions/min_terminated_length": 156.0, "epoch": 1.188976377952756, "grad_norm": 0.22660039365291595, "learning_rate": 1e-06, "loss": -0.0111, "num_tokens": 74525171.0, "reward": 0.5926339626312256, "reward_std": 0.1722804605960846, "rewards/verify_math_reward/mean": 0.5926339030265808, "rewards/verify_math_reward/std": 0.49161845445632935, "step": 509 }, { "clip_ratio/high_max": 0.002612501077237539, "clip_ratio/high_mean": 0.0010833214182639495, "clip_ratio/low_mean": 0.0009355960737593705, "clip_ratio/low_min": 0.00010367288905399619, "clip_ratio/region_mean": 0.002018917497480288, "epoch": 1.1913094196558764, "grad_norm": 0.15740567445755005, "learning_rate": 1e-06, "loss": -0.0113, "step": 510 }, { "clip_ratio/high_max": 0.0033850307372631505, "clip_ratio/high_mean": 0.0014603216259274632, "clip_ratio/low_mean": 0.0014604823190893512, "clip_ratio/low_min": 0.00014281002222560346, "clip_ratio/region_mean": 0.002920803912275005, "epoch": 1.1936424613589969, "grad_norm": 0.12539918720722198, "learning_rate": 1e-06, "loss": -0.0116, "step": 511 }, { "clip_ratio/high_max": 0.004745133817777969, "clip_ratio/high_mean": 0.0018958086329803336, "clip_ratio/low_mean": 0.001954285296960734, "clip_ratio/low_min": 0.00023392707043967675, "clip_ratio/region_mean": 0.0038500939481309615, "epoch": 1.1959755030621173, "grad_norm": 0.10812674462795258, "learning_rate": 1e-06, "loss": -0.0119, "step": 512 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.024553571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3781.0, "completions/mean_length": 651.6517944335938, "completions/mean_terminated_length": 564.951904296875, "completions/min_length": 110.0, "completions/min_terminated_length": 110.0, "epoch": 1.1983085447652377, "grad_norm": 0.23825913667678833, "learning_rate": 1e-06, "loss": 0.0022, "num_tokens": 75112027.0, "reward": 0.5970982313156128, "reward_std": 0.18576568365097046, "rewards/verify_math_reward/mean": 0.5970982313156128, "rewards/verify_math_reward/std": 0.49075525999069214, "step": 513 }, { "clip_ratio/high_max": 0.0031745013329782523, "clip_ratio/high_mean": 0.0011811270142061403, "clip_ratio/low_mean": 0.0011903465046998463, "clip_ratio/low_min": 7.069818911986658e-05, "clip_ratio/region_mean": 0.002371473587118089, "epoch": 1.2006415864683582, "grad_norm": 0.1786256581544876, "learning_rate": 1e-06, "loss": 0.0021, "step": 514 }, { "clip_ratio/high_max": 0.0041359809547429904, "clip_ratio/high_mean": 0.0015824252186575904, "clip_ratio/low_mean": 0.0016670102795615094, "clip_ratio/low_min": 0.0001138832994911354, "clip_ratio/region_mean": 0.0032494354964001104, "epoch": 1.2029746281714786, "grad_norm": 0.1408441960811615, "learning_rate": 1e-06, "loss": 0.0017, "step": 515 }, { "clip_ratio/high_max": 0.00543760642176494, "clip_ratio/high_mean": 0.0020427647505130153, "clip_ratio/low_mean": 0.0023891310484032147, "clip_ratio/low_min": 8.227226317103487e-05, "clip_ratio/region_mean": 0.004431895664311014, "epoch": 1.205307669874599, "grad_norm": 0.12087538838386536, "learning_rate": 1e-06, "loss": 0.0014, "step": 516 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.036830357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 4036.0, "completions/mean_length": 754.435302734375, "completions/mean_terminated_length": 626.6581420898438, "completions/min_length": 148.0, "completions/min_terminated_length": 148.0, "epoch": 1.2076407115777195, "grad_norm": 0.22044897079467773, "learning_rate": 1e-06, "loss": -0.0047, "num_tokens": 75745977.0, "reward": 0.5703125, "reward_std": 0.19550618529319763, "rewards/verify_math_reward/mean": 0.5703125, "rewards/verify_math_reward/std": 0.49530795216560364, "step": 517 }, { "clip_ratio/high_max": 0.0029824026350979693, "clip_ratio/high_mean": 0.0010979945000144653, "clip_ratio/low_mean": 0.0011068997464462882, "clip_ratio/low_min": 4.95168869747431e-05, "clip_ratio/region_mean": 0.002204894284659531, "epoch": 1.20997375328084, "grad_norm": 0.1768798679113388, "learning_rate": 1e-06, "loss": -0.0047, "step": 518 }, { "clip_ratio/high_max": 0.0037713210404035635, "clip_ratio/high_mean": 0.001381472306093201, "clip_ratio/low_mean": 0.0017111638153437525, "clip_ratio/low_min": 8.322346093336819e-05, "clip_ratio/region_mean": 0.003092636150540784, "epoch": 1.2123067949839603, "grad_norm": 0.13626928627490997, "learning_rate": 1e-06, "loss": -0.0052, "step": 519 }, { "clip_ratio/high_max": 0.0048701268242439255, "clip_ratio/high_mean": 0.001822183512558695, "clip_ratio/low_mean": 0.0023592889119754545, "clip_ratio/low_min": 0.00014184681640472263, "clip_ratio/region_mean": 0.004181472409982234, "epoch": 1.2146398366870808, "grad_norm": 0.11198854446411133, "learning_rate": 1e-06, "loss": -0.0055, "step": 520 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.021205357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 4046.0, "completions/mean_length": 626.9877319335938, "completions/mean_terminated_length": 551.8323974609375, "completions/min_length": 130.0, "completions/min_terminated_length": 130.0, "epoch": 1.2169728783902012, "grad_norm": 0.24142029881477356, "learning_rate": 1e-06, "loss": 0.0066, "num_tokens": 76315086.0, "reward": 0.6662946939468384, "reward_std": 0.1324954479932785, "rewards/verify_math_reward/mean": 0.6662946343421936, "rewards/verify_math_reward/std": 0.47179922461509705, "step": 521 }, { "clip_ratio/high_max": 0.0023131094203563407, "clip_ratio/high_mean": 0.0008910490832931828, "clip_ratio/low_mean": 0.0006676005004919716, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015586495937895961, "epoch": 1.2193059200933216, "grad_norm": 0.14988140761852264, "learning_rate": 1e-06, "loss": 0.0065, "step": 522 }, { "clip_ratio/high_max": 0.0032906454580370337, "clip_ratio/high_mean": 0.001219542566104792, "clip_ratio/low_mean": 0.0011823125787486788, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0024018551921471953, "epoch": 1.221638961796442, "grad_norm": 0.10665763914585114, "learning_rate": 1e-06, "loss": 0.0062, "step": 523 }, { "clip_ratio/high_max": 0.00423041895555798, "clip_ratio/high_mean": 0.001591364332853118, "clip_ratio/low_mean": 0.001624816006369656, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.003216180302842986, "epoch": 1.2239720034995625, "grad_norm": 0.08333440124988556, "learning_rate": 1e-06, "loss": 0.006, "step": 524 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0345982142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 2575.0, "completions/mean_length": 656.6842041015625, "completions/mean_terminated_length": 533.4254150390625, "completions/min_length": 98.0, "completions/min_terminated_length": 98.0, "epoch": 1.226305045202683, "grad_norm": 0.21423287689685822, "learning_rate": 1e-06, "loss": -0.0172, "num_tokens": 76861995.0, "reward": 0.6395089626312256, "reward_std": 0.137939453125, "rewards/verify_math_reward/mean": 0.6395089030265808, "rewards/verify_math_reward/std": 0.4804111421108246, "step": 525 }, { "clip_ratio/high_max": 0.002899590996094048, "clip_ratio/high_mean": 0.0009388163816765882, "clip_ratio/low_mean": 0.0007370505863946164, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001675866988080088, "epoch": 1.2286380869058036, "grad_norm": 0.1612105518579483, "learning_rate": 1e-06, "loss": -0.0173, "step": 526 }, { "clip_ratio/high_max": 0.0042486454476602376, "clip_ratio/high_mean": 0.0012909534889331553, "clip_ratio/low_mean": 0.0012072297067788895, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0024981831884360872, "epoch": 1.2309711286089238, "grad_norm": 0.12303381413221359, "learning_rate": 1e-06, "loss": -0.0177, "step": 527 }, { "clip_ratio/high_max": 0.0055098685988923535, "clip_ratio/high_mean": 0.0016294645793095697, "clip_ratio/low_mean": 0.0017366952633892652, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0033661598863545805, "epoch": 1.2333041703120444, "grad_norm": 0.10457725077867508, "learning_rate": 1e-06, "loss": -0.0179, "step": 528 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.024553571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 2538.0, "completions/mean_length": 646.5256958007812, "completions/mean_terminated_length": 559.69677734375, "completions/min_length": 109.0, "completions/min_terminated_length": 109.0, "epoch": 1.2356372120151649, "grad_norm": 0.18144333362579346, "learning_rate": 1e-06, "loss": -0.0084, "num_tokens": 77442042.0, "reward": 0.5837053656578064, "reward_std": 0.10787961632013321, "rewards/verify_math_reward/mean": 0.5837053656578064, "rewards/verify_math_reward/std": 0.49321892857551575, "step": 529 }, { "clip_ratio/high_max": 0.0020366440949146636, "clip_ratio/high_mean": 0.0006959709917282453, "clip_ratio/low_mean": 0.0005520284303202061, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001247999411134515, "epoch": 1.2379702537182853, "grad_norm": 0.1310746669769287, "learning_rate": 1e-06, "loss": -0.0085, "step": 530 }, { "clip_ratio/high_max": 0.0029549129467341118, "clip_ratio/high_mean": 0.0010006844713643659, "clip_ratio/low_mean": 0.0008346334125235444, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0018353178893448785, "epoch": 1.2403032954214057, "grad_norm": 0.1024223044514656, "learning_rate": 1e-06, "loss": -0.0088, "step": 531 }, { "clip_ratio/high_max": 0.003425974580750335, "clip_ratio/high_mean": 0.0012426803332346026, "clip_ratio/low_mean": 0.0011277768462605309, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0023704572013230063, "epoch": 1.2426363371245261, "grad_norm": 0.08635548502206802, "learning_rate": 1e-06, "loss": -0.0089, "step": 532 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.022321428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 2652.0, "completions/mean_length": 629.6975708007812, "completions/mean_terminated_length": 550.5581665039062, "completions/min_length": 157.0, "completions/min_terminated_length": 157.0, "epoch": 1.2449693788276466, "grad_norm": 0.22805599868297577, "learning_rate": 1e-06, "loss": -0.0087, "num_tokens": 78005595.0, "reward": 0.6551339626312256, "reward_std": 0.17435340583324432, "rewards/verify_math_reward/mean": 0.6551339030265808, "rewards/verify_math_reward/std": 0.4755900502204895, "step": 533 }, { "clip_ratio/high_max": 0.002336297242436558, "clip_ratio/high_mean": 0.0009479099171585403, "clip_ratio/low_mean": 0.0008213288219849346, "clip_ratio/low_min": 3.2860145438462496e-05, "clip_ratio/region_mean": 0.0017692387627903372, "epoch": 1.247302420530767, "grad_norm": 0.1717034876346588, "learning_rate": 1e-06, "loss": -0.0088, "step": 534 }, { "clip_ratio/high_max": 0.002954067771497648, "clip_ratio/high_mean": 0.0012759139463014435, "clip_ratio/low_mean": 0.0014048425400687847, "clip_ratio/low_min": 5.9666921515599824e-05, "clip_ratio/region_mean": 0.0026807564790942706, "epoch": 1.2496354622338874, "grad_norm": 0.12467467784881592, "learning_rate": 1e-06, "loss": -0.0092, "step": 535 }, { "clip_ratio/high_max": 0.00400357368926052, "clip_ratio/high_mean": 0.0016883648168004584, "clip_ratio/low_mean": 0.0019507455217535608, "clip_ratio/low_min": 0.0001033831058521173, "clip_ratio/region_mean": 0.0036391103494679555, "epoch": 1.2519685039370079, "grad_norm": 0.10322923213243484, "learning_rate": 1e-06, "loss": -0.0094, "step": 536 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0390625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3968.0, "completions/mean_length": 664.4710083007812, "completions/mean_terminated_length": 524.9779052734375, "completions/min_length": 131.0, "completions/min_terminated_length": 131.0, "epoch": 1.2543015456401283, "grad_norm": 0.25793924927711487, "learning_rate": 1e-06, "loss": -0.0241, "num_tokens": 78541617.0, "reward": 0.6551339626312256, "reward_std": 0.17412586510181427, "rewards/verify_math_reward/mean": 0.6551339030265808, "rewards/verify_math_reward/std": 0.4755900800228119, "step": 537 }, { "clip_ratio/high_max": 0.0027835873843287118, "clip_ratio/high_mean": 0.0010560064702076488, "clip_ratio/low_mean": 0.0011051347028114833, "clip_ratio/low_min": 5.596041228272952e-05, "clip_ratio/region_mean": 0.00216114116483368, "epoch": 1.2566345873432487, "grad_norm": 0.18892988562583923, "learning_rate": 1e-06, "loss": -0.0243, "step": 538 }, { "clip_ratio/high_max": 0.004141990655625705, "clip_ratio/high_mean": 0.001529416909761494, "clip_ratio/low_mean": 0.0018209115114586893, "clip_ratio/low_min": 0.00017169447346532252, "clip_ratio/region_mean": 0.003350328406668268, "epoch": 1.2589676290463692, "grad_norm": 0.14527657628059387, "learning_rate": 1e-06, "loss": -0.0247, "step": 539 }, { "clip_ratio/high_max": 0.005610551961581223, "clip_ratio/high_mean": 0.0020720437132695224, "clip_ratio/low_mean": 0.002541237212426495, "clip_ratio/low_min": 0.00016435841826023534, "clip_ratio/region_mean": 0.004613280951161869, "epoch": 1.2613006707494896, "grad_norm": 0.11794157326221466, "learning_rate": 1e-06, "loss": -0.025, "step": 540 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0390625, "completions/max_length": 4096.0, "completions/max_terminated_length": 2534.0, "completions/mean_length": 663.9732666015625, "completions/mean_terminated_length": 524.4598999023438, "completions/min_length": 135.0, "completions/min_terminated_length": 135.0, "epoch": 1.26363371245261, "grad_norm": 0.2214980274438858, "learning_rate": 1e-06, "loss": -0.0016, "num_tokens": 79084777.0, "reward": 0.613839328289032, "reward_std": 0.1288822591304779, "rewards/verify_math_reward/mean": 0.6138392686843872, "rewards/verify_math_reward/std": 0.48714008927345276, "step": 541 }, { "clip_ratio/high_max": 0.0024396353765041567, "clip_ratio/high_mean": 0.0008580957146477886, "clip_ratio/low_mean": 0.000747677053368534, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0016057727189036086, "epoch": 1.2659667541557305, "grad_norm": 0.1597742736339569, "learning_rate": 1e-06, "loss": -0.0018, "step": 542 }, { "clip_ratio/high_max": 0.003179614635882899, "clip_ratio/high_mean": 0.0011454676277935505, "clip_ratio/low_mean": 0.0012472307862481102, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002392698370385915, "epoch": 1.268299795858851, "grad_norm": 0.13212434947490692, "learning_rate": 1e-06, "loss": -0.002, "step": 543 }, { "clip_ratio/high_max": 0.004267054202500731, "clip_ratio/high_mean": 0.0014962572786316741, "clip_ratio/low_mean": 0.0016898401954676956, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.003186097485013306, "epoch": 1.2706328375619713, "grad_norm": 0.10002051293849945, "learning_rate": 1e-06, "loss": -0.0023, "step": 544 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0379464285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 4021.0, "completions/mean_length": 706.0513916015625, "completions/mean_terminated_length": 572.341064453125, "completions/min_length": 108.0, "completions/min_terminated_length": 108.0, "epoch": 1.272965879265092, "grad_norm": 0.21236126124858856, "learning_rate": 1e-06, "loss": -0.0014, "num_tokens": 79664359.0, "reward": 0.609375, "reward_std": 0.1528957188129425, "rewards/verify_math_reward/mean": 0.609375, "rewards/verify_math_reward/std": 0.48816296458244324, "step": 545 }, { "clip_ratio/high_max": 0.0026792281205416657, "clip_ratio/high_mean": 0.0009029014745465247, "clip_ratio/low_mean": 0.0007800454795869882, "clip_ratio/low_min": 4.869169879384572e-05, "clip_ratio/region_mean": 0.001682946938672103, "epoch": 1.2752989209682122, "grad_norm": 0.15671397745609283, "learning_rate": 1e-06, "loss": -0.0016, "step": 546 }, { "clip_ratio/high_max": 0.0035080669913440943, "clip_ratio/high_mean": 0.0012359166375972563, "clip_ratio/low_mean": 0.0013968840066809207, "clip_ratio/low_min": 0.00011499119318614248, "clip_ratio/region_mean": 0.0026328006788389757, "epoch": 1.2776319626713328, "grad_norm": 0.12957827746868134, "learning_rate": 1e-06, "loss": -0.0019, "step": 547 }, { "clip_ratio/high_max": 0.004534070365480147, "clip_ratio/high_mean": 0.0015764306699566077, "clip_ratio/low_mean": 0.0019492662650009152, "clip_ratio/low_min": 0.00013732873412664048, "clip_ratio/region_mean": 0.0035256969422334805, "epoch": 1.2799650043744533, "grad_norm": 0.10706808418035507, "learning_rate": 1e-06, "loss": -0.0021, "step": 548 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.03125, "completions/max_length": 4096.0, "completions/max_terminated_length": 3750.0, "completions/mean_length": 683.7678833007812, "completions/mean_terminated_length": 573.6958618164062, "completions/min_length": 111.0, "completions/min_terminated_length": 111.0, "epoch": 1.2822980460775737, "grad_norm": 0.24017496407032013, "learning_rate": 1e-06, "loss": -0.0038, "num_tokens": 80254639.0, "reward": 0.5948660969734192, "reward_std": 0.16671767830848694, "rewards/verify_math_reward/mean": 0.5948660969734192, "rewards/verify_math_reward/std": 0.49119213223457336, "step": 549 }, { "clip_ratio/high_max": 0.002629236907523591, "clip_ratio/high_mean": 0.001001704418740701, "clip_ratio/low_mean": 0.0010980229199049063, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0020997272440581582, "epoch": 1.2846310877806941, "grad_norm": 0.168660506606102, "learning_rate": 1e-06, "loss": -0.0039, "step": 550 }, { "clip_ratio/high_max": 0.003279433265561238, "clip_ratio/high_mean": 0.0013049909612163901, "clip_ratio/low_mean": 0.001557305920869112, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002862296867533587, "epoch": 1.2869641294838146, "grad_norm": 0.12763658165931702, "learning_rate": 1e-06, "loss": -0.0042, "step": 551 }, { "clip_ratio/high_max": 0.004213701075059362, "clip_ratio/high_mean": 0.0016255740774795413, "clip_ratio/low_mean": 0.002180467199650593, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.00380604132078588, "epoch": 1.289297171186935, "grad_norm": 0.10905377566814423, "learning_rate": 1e-06, "loss": -0.0045, "step": 552 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.033482142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3245.0, "completions/mean_length": 708.5223388671875, "completions/mean_terminated_length": 591.1732177734375, "completions/min_length": 156.0, "completions/min_terminated_length": 156.0, "epoch": 1.2916302128900554, "grad_norm": 0.23428334295749664, "learning_rate": 1e-06, "loss": -0.0052, "num_tokens": 80862379.0, "reward": 0.5558035969734192, "reward_std": 0.18775734305381775, "rewards/verify_math_reward/mean": 0.5558035969734192, "rewards/verify_math_reward/std": 0.49715369939804077, "step": 553 }, { "clip_ratio/high_max": 0.002697399308090098, "clip_ratio/high_mean": 0.0011326516359986272, "clip_ratio/low_mean": 0.0012473232363845455, "clip_ratio/low_min": 9.187501473206794e-05, "clip_ratio/region_mean": 0.0023799748305464163, "epoch": 1.2939632545931758, "grad_norm": 0.19335955381393433, "learning_rate": 1e-06, "loss": -0.0053, "step": 554 }, { "clip_ratio/high_max": 0.0033717429214448202, "clip_ratio/high_mean": 0.0015382270230475115, "clip_ratio/low_mean": 0.001684933806245681, "clip_ratio/low_min": 0.00012098671231797198, "clip_ratio/region_mean": 0.0032231608129222877, "epoch": 1.2962962962962963, "grad_norm": 0.14299902319908142, "learning_rate": 1e-06, "loss": -0.0057, "step": 555 }, { "clip_ratio/high_max": 0.004473239117942285, "clip_ratio/high_mean": 0.001996771607082337, "clip_ratio/low_mean": 0.0025586869196558837, "clip_ratio/low_min": 0.00021508698682737304, "clip_ratio/region_mean": 0.004555458566755988, "epoch": 1.2986293379994167, "grad_norm": 0.1171770989894867, "learning_rate": 1e-06, "loss": -0.006, "step": 556 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.033482142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 2858.0, "completions/mean_length": 708.5938110351562, "completions/mean_terminated_length": 591.2471313476562, "completions/min_length": 172.0, "completions/min_terminated_length": 172.0, "epoch": 1.3009623797025371, "grad_norm": 0.22910988330841064, "learning_rate": 1e-06, "loss": -0.0084, "num_tokens": 81467455.0, "reward": 0.5457589626312256, "reward_std": 0.15837474167346954, "rewards/verify_math_reward/mean": 0.5457589030265808, "rewards/verify_math_reward/std": 0.4981797933578491, "step": 557 }, { "clip_ratio/high_max": 0.0025557284971000627, "clip_ratio/high_mean": 0.0009926378897944232, "clip_ratio/low_mean": 0.0009378916020068573, "clip_ratio/low_min": 4.551747588266153e-05, "clip_ratio/region_mean": 0.0019305295281810686, "epoch": 1.3032954214056576, "grad_norm": 0.17126646637916565, "learning_rate": 1e-06, "loss": -0.0085, "step": 558 }, { "clip_ratio/high_max": 0.003438048828684259, "clip_ratio/high_mean": 0.0013028185931034386, "clip_ratio/low_mean": 0.0013529365678550676, "clip_ratio/low_min": 5.8287593674322125e-05, "clip_ratio/region_mean": 0.0026557551536825486, "epoch": 1.305628463108778, "grad_norm": 0.12889428436756134, "learning_rate": 1e-06, "loss": -0.0089, "step": 559 }, { "clip_ratio/high_max": 0.004370617898530327, "clip_ratio/high_mean": 0.001671064495894825, "clip_ratio/low_mean": 0.0019517128966981545, "clip_ratio/low_min": 9.460540422878694e-05, "clip_ratio/region_mean": 0.0036227773962309584, "epoch": 1.3079615048118984, "grad_norm": 0.10692799091339111, "learning_rate": 1e-06, "loss": -0.0091, "step": 560 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0435267857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 2796.0, "completions/mean_length": 760.0881958007812, "completions/mean_terminated_length": 608.2788696289062, "completions/min_length": 160.0, "completions/min_terminated_length": 160.0, "epoch": 1.3102945465150189, "grad_norm": 0.23453675210475922, "learning_rate": 1e-06, "loss": -0.0087, "num_tokens": 82082678.0, "reward": 0.5323660969734192, "reward_std": 0.1580038070678711, "rewards/verify_math_reward/mean": 0.5323660969734192, "rewards/verify_math_reward/std": 0.4992299973964691, "step": 561 }, { "clip_ratio/high_max": 0.002480583560100058, "clip_ratio/high_mean": 0.0009224565164913656, "clip_ratio/low_mean": 0.0009516798118056613, "clip_ratio/low_min": 5.296743620419875e-05, "clip_ratio/region_mean": 0.0018741363273875322, "epoch": 1.3126275882181395, "grad_norm": 0.19108590483665466, "learning_rate": 1e-06, "loss": -0.0088, "step": 562 }, { "clip_ratio/high_max": 0.0035710921947611496, "clip_ratio/high_mean": 0.0013707173311559018, "clip_ratio/low_mean": 0.001363798673992278, "clip_ratio/low_min": 5.296743620419875e-05, "clip_ratio/region_mean": 0.0027345160488039255, "epoch": 1.3149606299212597, "grad_norm": 0.12561996281147003, "learning_rate": 1e-06, "loss": -0.0092, "step": 563 }, { "clip_ratio/high_max": 0.0044725585175910965, "clip_ratio/high_mean": 0.001745570782077266, "clip_ratio/low_mean": 0.001992074085137574, "clip_ratio/low_min": 0.00012939302541781217, "clip_ratio/region_mean": 0.0037376448017312214, "epoch": 1.3172936716243804, "grad_norm": 0.11073022335767746, "learning_rate": 1e-06, "loss": -0.0094, "step": 564 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0535714285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 2832.0, "completions/mean_length": 789.3047485351562, "completions/mean_terminated_length": 602.1332397460938, "completions/min_length": 106.0, "completions/min_terminated_length": 106.0, "epoch": 1.3196267133275008, "grad_norm": 0.27315497398376465, "learning_rate": 1e-06, "loss": -0.0078, "num_tokens": 82686695.0, "reward": 0.520089328289032, "reward_std": 0.20020271837711334, "rewards/verify_math_reward/mean": 0.5200892686843872, "rewards/verify_math_reward/std": 0.4998753070831299, "step": 565 }, { "clip_ratio/high_max": 0.002906338260800112, "clip_ratio/high_mean": 0.001107799473174964, "clip_ratio/low_mean": 0.001107951691665221, "clip_ratio/low_min": 2.9335989893297665e-05, "clip_ratio/region_mean": 0.0022157511702971533, "epoch": 1.3219597550306212, "grad_norm": 0.1810997873544693, "learning_rate": 1e-06, "loss": -0.0079, "step": 566 }, { "clip_ratio/high_max": 0.0036773524479940534, "clip_ratio/high_mean": 0.0014413066746783443, "clip_ratio/low_mean": 0.0016805729355837684, "clip_ratio/low_min": 5.740031883760821e-05, "clip_ratio/region_mean": 0.0031218796357279643, "epoch": 1.3242927967337417, "grad_norm": 0.13618353009223938, "learning_rate": 1e-06, "loss": -0.0083, "step": 567 }, { "clip_ratio/high_max": 0.004656884993892163, "clip_ratio/high_mean": 0.0018471341682015918, "clip_ratio/low_mean": 0.002336142017156817, "clip_ratio/low_min": 5.9722422520280816e-05, "clip_ratio/region_mean": 0.004183276207186282, "epoch": 1.326625838436862, "grad_norm": 0.11023623496294022, "learning_rate": 1e-06, "loss": -0.0086, "step": 568 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.056919642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3492.0, "completions/mean_length": 793.8895263671875, "completions/mean_terminated_length": 594.5905151367188, "completions/min_length": 161.0, "completions/min_terminated_length": 161.0, "epoch": 1.3289588801399825, "grad_norm": 0.22156088054180145, "learning_rate": 1e-06, "loss": -0.0078, "num_tokens": 83280692.0, "reward": 0.5558035969734192, "reward_std": 0.1543956845998764, "rewards/verify_math_reward/mean": 0.5558035969734192, "rewards/verify_math_reward/std": 0.49715369939804077, "step": 569 }, { "clip_ratio/high_max": 0.002889941883040592, "clip_ratio/high_mean": 0.0010416978893772466, "clip_ratio/low_mean": 0.0008341545326402411, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0018758523947326466, "epoch": 1.331291921843103, "grad_norm": 0.16367006301879883, "learning_rate": 1e-06, "loss": -0.008, "step": 570 }, { "clip_ratio/high_max": 0.0036530728539219126, "clip_ratio/high_mean": 0.0013171115460863803, "clip_ratio/low_mean": 0.0012606099371623714, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0025777215196285397, "epoch": 1.3336249635462234, "grad_norm": 0.12600426375865936, "learning_rate": 1e-06, "loss": -0.0083, "step": 571 }, { "clip_ratio/high_max": 0.004447303828783333, "clip_ratio/high_mean": 0.0015894851239863783, "clip_ratio/low_mean": 0.0017951038753381, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.003384588926564902, "epoch": 1.3359580052493438, "grad_norm": 0.1044960543513298, "learning_rate": 1e-06, "loss": -0.0086, "step": 572 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0323660714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 2802.0, "completions/mean_length": 724.302490234375, "completions/mean_terminated_length": 611.5236206054688, "completions/min_length": 83.0, "completions/min_terminated_length": 83.0, "epoch": 1.3382910469524643, "grad_norm": 0.23477703332901, "learning_rate": 1e-06, "loss": -0.0171, "num_tokens": 83907099.0, "reward": 0.5546875, "reward_std": 0.16142338514328003, "rewards/verify_math_reward/mean": 0.5546875, "rewards/verify_math_reward/std": 0.4972778558731079, "step": 573 }, { "clip_ratio/high_max": 0.0022663504278170876, "clip_ratio/high_mean": 0.0009205204742102069, "clip_ratio/low_mean": 0.000829865815830999, "clip_ratio/low_min": 2.5955148885259405e-05, "clip_ratio/region_mean": 0.001750386301864637, "epoch": 1.3406240886555847, "grad_norm": 0.16626527905464172, "learning_rate": 1e-06, "loss": -0.0173, "step": 574 }, { "clip_ratio/high_max": 0.0032878115598578006, "clip_ratio/high_mean": 0.0013069258766336134, "clip_ratio/low_mean": 0.0013278172082209494, "clip_ratio/low_min": 5.2182672334311064e-05, "clip_ratio/region_mean": 0.002634743068483658, "epoch": 1.3429571303587051, "grad_norm": 0.12581634521484375, "learning_rate": 1e-06, "loss": -0.0176, "step": 575 }, { "clip_ratio/high_max": 0.004432856025232468, "clip_ratio/high_mean": 0.0017492416664026678, "clip_ratio/low_mean": 0.001776605025952449, "clip_ratio/low_min": 0.00011238103434152436, "clip_ratio/region_mean": 0.00352584665233735, "epoch": 1.3452901720618256, "grad_norm": 0.09932567179203033, "learning_rate": 1e-06, "loss": -0.0179, "step": 576 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.029017857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 2849.0, "completions/mean_length": 647.6596069335938, "completions/mean_terminated_length": 544.6057739257812, "completions/min_length": 153.0, "completions/min_terminated_length": 153.0, "epoch": 1.347623213764946, "grad_norm": 0.2533736824989319, "learning_rate": 1e-06, "loss": -0.0119, "num_tokens": 84467562.0, "reward": 0.6116071939468384, "reward_std": 0.16841015219688416, "rewards/verify_math_reward/mean": 0.6116071343421936, "rewards/verify_math_reward/std": 0.48765692114830017, "step": 577 }, { "clip_ratio/high_max": 0.0029357335224631242, "clip_ratio/high_mean": 0.001143776153185172, "clip_ratio/low_mean": 0.0009881281766865868, "clip_ratio/low_min": 1.3885803127777763e-05, "clip_ratio/region_mean": 0.002131904322595801, "epoch": 1.3499562554680664, "grad_norm": 0.20586083829402924, "learning_rate": 1e-06, "loss": -0.012, "step": 578 }, { "clip_ratio/high_max": 0.004051976029586513, "clip_ratio/high_mean": 0.0015299060978577472, "clip_ratio/low_mean": 0.0015539485393674113, "clip_ratio/low_min": 5.554321251111105e-05, "clip_ratio/region_mean": 0.0030838546226732433, "epoch": 1.352289297171187, "grad_norm": 0.13357383012771606, "learning_rate": 1e-06, "loss": -0.0124, "step": 579 }, { "clip_ratio/high_max": 0.00500328691850882, "clip_ratio/high_mean": 0.0018976723222294822, "clip_ratio/low_mean": 0.0021090631125844084, "clip_ratio/low_min": 2.7771606255555525e-05, "clip_ratio/region_mean": 0.004006735369330272, "epoch": 1.3546223388743073, "grad_norm": 0.11385779082775116, "learning_rate": 1e-06, "loss": -0.0127, "step": 580 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0357142857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3832.0, "completions/mean_length": 739.9910888671875, "completions/mean_terminated_length": 615.6944580078125, "completions/min_length": 163.0, "completions/min_terminated_length": 163.0, "epoch": 1.356955380577428, "grad_norm": 0.2521180808544159, "learning_rate": 1e-06, "loss": 0.0155, "num_tokens": 85102906.0, "reward": 0.5770089626312256, "reward_std": 0.1997506022453308, "rewards/verify_math_reward/mean": 0.5770089030265808, "rewards/verify_math_reward/std": 0.4943099319934845, "step": 581 }, { "clip_ratio/high_max": 0.0022479596300399862, "clip_ratio/high_mean": 0.0010426281223772094, "clip_ratio/low_mean": 0.0012415924429660663, "clip_ratio/low_min": 7.433879272866761e-05, "clip_ratio/region_mean": 0.0022842205289634876, "epoch": 1.3592884222805481, "grad_norm": 0.18325793743133545, "learning_rate": 1e-06, "loss": 0.0154, "step": 582 }, { "clip_ratio/high_max": 0.003323168959468603, "clip_ratio/high_mean": 0.0014435686825891025, "clip_ratio/low_mean": 0.0018831871857400984, "clip_ratio/low_min": 9.18100013223011e-05, "clip_ratio/region_mean": 0.0033267558610532433, "epoch": 1.3616214639836688, "grad_norm": 0.13363049924373627, "learning_rate": 1e-06, "loss": 0.015, "step": 583 }, { "clip_ratio/high_max": 0.003699228836921975, "clip_ratio/high_mean": 0.0017330284354102332, "clip_ratio/low_mean": 0.002500845381291583, "clip_ratio/low_min": 0.00015332821021729615, "clip_ratio/region_mean": 0.0042338737403042614, "epoch": 1.3639545056867892, "grad_norm": 0.11606963723897934, "learning_rate": 1e-06, "loss": 0.0147, "step": 584 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.044642857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 4016.0, "completions/mean_length": 764.1317138671875, "completions/mean_terminated_length": 608.4368896484375, "completions/min_length": 110.0, "completions/min_terminated_length": 110.0, "epoch": 1.3662875473899097, "grad_norm": 0.2340804487466812, "learning_rate": 1e-06, "loss": 0.0044, "num_tokens": 85711336.0, "reward": 0.535714328289032, "reward_std": 0.17096522450447083, "rewards/verify_math_reward/mean": 0.5357142686843872, "rewards/verify_math_reward/std": 0.4990014135837555, "step": 585 }, { "clip_ratio/high_max": 0.0026040291704703122, "clip_ratio/high_mean": 0.0010282762777933385, "clip_ratio/low_mean": 0.0009455679301026976, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001973844220628962, "epoch": 1.36862058909303, "grad_norm": 0.1700149029493332, "learning_rate": 1e-06, "loss": 0.0043, "step": 586 }, { "clip_ratio/high_max": 0.0032864149325178005, "clip_ratio/high_mean": 0.0013598096084024291, "clip_ratio/low_mean": 0.0014411222364287823, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.00280093181936536, "epoch": 1.3709536307961505, "grad_norm": 0.13006344437599182, "learning_rate": 1e-06, "loss": 0.004, "step": 587 }, { "clip_ratio/high_max": 0.004180324423941784, "clip_ratio/high_mean": 0.0017121468881668989, "clip_ratio/low_mean": 0.0019727757608052343, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0036849227180937305, "epoch": 1.373286672499271, "grad_norm": 0.1052432507276535, "learning_rate": 1e-06, "loss": 0.0037, "step": 588 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0401785714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 2315.0, "completions/mean_length": 742.4967041015625, "completions/mean_terminated_length": 602.117431640625, "completions/min_length": 124.0, "completions/min_terminated_length": 124.0, "epoch": 1.3756197142023914, "grad_norm": 0.26939982175827026, "learning_rate": 1e-06, "loss": 0.0059, "num_tokens": 86312605.0, "reward": 0.5870535969734192, "reward_std": 0.18182870745658875, "rewards/verify_math_reward/mean": 0.5870535969734192, "rewards/verify_math_reward/std": 0.49263837933540344, "step": 589 }, { "clip_ratio/high_max": 0.0030560618397430517, "clip_ratio/high_mean": 0.0010927548937615938, "clip_ratio/low_mean": 0.0012002500734524801, "clip_ratio/low_min": 8.33747963042697e-05, "clip_ratio/region_mean": 0.002293004967214074, "epoch": 1.3779527559055118, "grad_norm": 0.20453085005283356, "learning_rate": 1e-06, "loss": 0.0058, "step": 590 }, { "clip_ratio/high_max": 0.003606366321037058, "clip_ratio/high_mean": 0.0013432383493636735, "clip_ratio/low_mean": 0.001642971888941247, "clip_ratio/low_min": 0.00011954377805523109, "clip_ratio/region_mean": 0.0029862102237530053, "epoch": 1.3802857976086322, "grad_norm": 0.1425563395023346, "learning_rate": 1e-06, "loss": 0.0054, "step": 591 }, { "clip_ratio/high_max": 0.004744893245515414, "clip_ratio/high_mean": 0.0017353657312924042, "clip_ratio/low_mean": 0.0023175275928224437, "clip_ratio/low_min": 0.00016481472266605124, "clip_ratio/region_mean": 0.004052893404150382, "epoch": 1.3826188393117527, "grad_norm": 0.11088960617780685, "learning_rate": 1e-06, "loss": 0.0051, "step": 592 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0357142857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 2501.0, "completions/mean_length": 697.2422485351562, "completions/mean_terminated_length": 571.3622436523438, "completions/min_length": 155.0, "completions/min_terminated_length": 155.0, "epoch": 1.384951881014873, "grad_norm": 0.2289675921201706, "learning_rate": 1e-06, "loss": -0.014, "num_tokens": 86890886.0, "reward": 0.6160714626312256, "reward_std": 0.15762588381767273, "rewards/verify_math_reward/mean": 0.6160714030265808, "rewards/verify_math_reward/std": 0.486612468957901, "step": 593 }, { "clip_ratio/high_max": 0.002659368998138234, "clip_ratio/high_mean": 0.0010071282376884483, "clip_ratio/low_mean": 0.0009476954546698835, "clip_ratio/low_min": 1.2028483070025686e-05, "clip_ratio/region_mean": 0.0019548236959963106, "epoch": 1.3872849227179935, "grad_norm": 0.1582915335893631, "learning_rate": 1e-06, "loss": -0.0142, "step": 594 }, { "clip_ratio/high_max": 0.0035935581399826333, "clip_ratio/high_mean": 0.0013516057915694546, "clip_ratio/low_mean": 0.0013463508148561232, "clip_ratio/low_min": 2.6743688067654148e-05, "clip_ratio/region_mean": 0.0026979565736837685, "epoch": 1.389617964421114, "grad_norm": 0.13142192363739014, "learning_rate": 1e-06, "loss": -0.0144, "step": 595 }, { "clip_ratio/high_max": 0.004420243014465086, "clip_ratio/high_mean": 0.0016669631622789893, "clip_ratio/low_mean": 0.001836692481447244, "clip_ratio/low_min": 2.6743688067654148e-05, "clip_ratio/region_mean": 0.003503655767417513, "epoch": 1.3919510061242344, "grad_norm": 0.10620908439159393, "learning_rate": 1e-06, "loss": -0.0147, "step": 596 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0390625, "completions/max_length": 4096.0, "completions/max_terminated_length": 2697.0, "completions/mean_length": 706.9766235351562, "completions/mean_terminated_length": 569.2113647460938, "completions/min_length": 169.0, "completions/min_terminated_length": 169.0, "epoch": 1.3942840478273548, "grad_norm": 0.24431443214416504, "learning_rate": 1e-06, "loss": -0.0037, "num_tokens": 87477233.0, "reward": 0.6495535969734192, "reward_std": 0.1303144097328186, "rewards/verify_math_reward/mean": 0.6495535969734192, "rewards/verify_math_reward/std": 0.477376252412796, "step": 597 }, { "clip_ratio/high_max": 0.0025874205748550594, "clip_ratio/high_mean": 0.0009072033826669212, "clip_ratio/low_mean": 0.0009314012222603196, "clip_ratio/low_min": 3.0720078939339146e-05, "clip_ratio/region_mean": 0.0018386046249361243, "epoch": 1.3966170895304755, "grad_norm": 0.15889666974544525, "learning_rate": 1e-06, "loss": -0.0037, "step": 598 }, { "clip_ratio/high_max": 0.003629087055742275, "clip_ratio/high_mean": 0.001268865631573135, "clip_ratio/low_mean": 0.001367871696857037, "clip_ratio/low_min": 3.0720078939339146e-05, "clip_ratio/region_mean": 0.002636737310240278, "epoch": 1.3989501312335957, "grad_norm": 0.12015816569328308, "learning_rate": 1e-06, "loss": -0.004, "step": 599 }, { "clip_ratio/high_max": 0.004353770505986176, "clip_ratio/high_mean": 0.0015112133187358268, "clip_ratio/low_mean": 0.0017712689586915076, "clip_ratio/low_min": 6.144015787867829e-05, "clip_ratio/region_mean": 0.0032824822701513767, "epoch": 1.4012831729367163, "grad_norm": 0.09728114306926727, "learning_rate": 1e-06, "loss": -0.0041, "step": 600 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.049107142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 4064.0, "completions/mean_length": 761.6160888671875, "completions/mean_terminated_length": 589.4178466796875, "completions/min_length": 122.0, "completions/min_terminated_length": 122.0, "epoch": 1.4036162146398368, "grad_norm": 0.2660142481327057, "learning_rate": 1e-06, "loss": -0.0146, "num_tokens": 88076617.0, "reward": 0.5736607313156128, "reward_std": 0.17167635262012482, "rewards/verify_math_reward/mean": 0.5736607313156128, "rewards/verify_math_reward/std": 0.4948205351829529, "step": 601 }, { "clip_ratio/high_max": 0.0028252918418729678, "clip_ratio/high_mean": 0.001011983236821834, "clip_ratio/low_mean": 0.001190659917483572, "clip_ratio/low_min": 6.948993359401356e-05, "clip_ratio/region_mean": 0.0022026431470294483, "epoch": 1.4059492563429572, "grad_norm": 0.18407492339611053, "learning_rate": 1e-06, "loss": -0.0147, "step": 602 }, { "clip_ratio/high_max": 0.004073882766533643, "clip_ratio/high_mean": 0.0014376516082847957, "clip_ratio/low_mean": 0.0017701081160339527, "clip_ratio/low_min": 0.00011685118442983367, "clip_ratio/region_mean": 0.0032077597570605576, "epoch": 1.4082822980460776, "grad_norm": 0.1498991996049881, "learning_rate": 1e-06, "loss": -0.015, "step": 603 }, { "clip_ratio/high_max": 0.005246884495136328, "clip_ratio/high_mean": 0.0018543464721005876, "clip_ratio/low_mean": 0.0024905688333092257, "clip_ratio/low_min": 0.000183977244887501, "clip_ratio/region_mean": 0.00434491531632375, "epoch": 1.410615339749198, "grad_norm": 0.11694962531328201, "learning_rate": 1e-06, "loss": -0.0154, "step": 604 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0279017857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 2045.0, "completions/mean_length": 657.5535888671875, "completions/mean_terminated_length": 558.861083984375, "completions/min_length": 131.0, "completions/min_terminated_length": 131.0, "epoch": 1.4129483814523185, "grad_norm": 0.2518817186355591, "learning_rate": 1e-06, "loss": 0.0005, "num_tokens": 88647865.0, "reward": 0.6640625, "reward_std": 0.16856423020362854, "rewards/verify_math_reward/mean": 0.6640625, "rewards/verify_math_reward/std": 0.4725809693336487, "step": 605 }, { "clip_ratio/high_max": 0.002815589527017437, "clip_ratio/high_mean": 0.0010664238943718374, "clip_ratio/low_mean": 0.00120110178613686, "clip_ratio/low_min": 7.438800457748584e-05, "clip_ratio/region_mean": 0.002267525684146676, "epoch": 1.415281423155439, "grad_norm": 0.17306607961654663, "learning_rate": 1e-06, "loss": 0.0004, "step": 606 }, { "clip_ratio/high_max": 0.003523818413668778, "clip_ratio/high_mean": 0.0013611506365123205, "clip_ratio/low_mean": 0.0019441491749603301, "clip_ratio/low_min": 0.0002188601138186641, "clip_ratio/region_mean": 0.0033052997750928625, "epoch": 1.4176144648585594, "grad_norm": 0.15049561858177185, "learning_rate": 1e-06, "loss": 0.0001, "step": 607 }, { "clip_ratio/high_max": 0.004726093102362938, "clip_ratio/high_mean": 0.0016930464989854954, "clip_ratio/low_mean": 0.0025370415241923183, "clip_ratio/low_min": 0.00026239894941681996, "clip_ratio/region_mean": 0.004230087986798026, "epoch": 1.4199475065616798, "grad_norm": 0.11360776424407959, "learning_rate": 1e-06, "loss": -0.0002, "step": 608 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.041294642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 2969.0, "completions/mean_length": 711.4252319335938, "completions/mean_terminated_length": 565.6402587890625, "completions/min_length": 120.0, "completions/min_terminated_length": 120.0, "epoch": 1.4222805482648002, "grad_norm": 0.27330055832862854, "learning_rate": 1e-06, "loss": -0.014, "num_tokens": 89222974.0, "reward": 0.5714285969734192, "reward_std": 0.1948240101337433, "rewards/verify_math_reward/mean": 0.5714285969734192, "rewards/verify_math_reward/std": 0.49514806270599365, "step": 609 }, { "clip_ratio/high_max": 0.0032176113018067554, "clip_ratio/high_mean": 0.0011535034682310652, "clip_ratio/low_mean": 0.0011342341531417333, "clip_ratio/low_min": 1.896813409985043e-05, "clip_ratio/region_mean": 0.0022877376250107773, "epoch": 1.4246135899679206, "grad_norm": 0.2532554566860199, "learning_rate": 1e-06, "loss": -0.014, "step": 610 }, { "clip_ratio/high_max": 0.004885990674665663, "clip_ratio/high_mean": 0.001690005032287445, "clip_ratio/low_mean": 0.0016284515004372224, "clip_ratio/low_min": 3.793626819970086e-05, "clip_ratio/region_mean": 0.003318456481792964, "epoch": 1.426946631671041, "grad_norm": 0.15884840488433838, "learning_rate": 1e-06, "loss": -0.0145, "step": 611 }, { "clip_ratio/high_max": 0.006012012032442726, "clip_ratio/high_mean": 0.0020612109583453275, "clip_ratio/low_mean": 0.0022640417664661072, "clip_ratio/low_min": 5.690440229955129e-05, "clip_ratio/region_mean": 0.004325252681155689, "epoch": 1.4292796733741615, "grad_norm": 0.12222730368375778, "learning_rate": 1e-06, "loss": -0.0148, "step": 612 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0357142857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 2632.0, "completions/mean_length": 686.4933471679688, "completions/mean_terminated_length": 560.2152709960938, "completions/min_length": 137.0, "completions/min_terminated_length": 137.0, "epoch": 1.431612715077282, "grad_norm": 0.237994983792305, "learning_rate": 1e-06, "loss": -0.0251, "num_tokens": 89789528.0, "reward": 0.6428571939468384, "reward_std": 0.14537762105464935, "rewards/verify_math_reward/mean": 0.6428571343421936, "rewards/verify_math_reward/std": 0.4794250428676605, "step": 613 }, { "clip_ratio/high_max": 0.002468155031237984, "clip_ratio/high_mean": 0.0010186191411776235, "clip_ratio/low_mean": 0.0007648663204236072, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001783485491614556, "epoch": 1.4339457567804024, "grad_norm": 0.1650596708059311, "learning_rate": 1e-06, "loss": -0.0252, "step": 614 }, { "clip_ratio/high_max": 0.003713901460287161, "clip_ratio/high_mean": 0.001392907883200678, "clip_ratio/low_mean": 0.0012619663757504895, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0026548742898739874, "epoch": 1.436278798483523, "grad_norm": 0.12073415517807007, "learning_rate": 1e-06, "loss": -0.0255, "step": 615 }, { "clip_ratio/high_max": 0.004765011326526292, "clip_ratio/high_mean": 0.0017390751563652884, "clip_ratio/low_mean": 0.0018097065694746561, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.003548781663994305, "epoch": 1.4386118401866432, "grad_norm": 0.10291443020105362, "learning_rate": 1e-06, "loss": -0.0257, "step": 616 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0323660714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 4096.0, "completions/mean_length": 668.3928833007812, "completions/mean_terminated_length": 553.743896484375, "completions/min_length": 114.0, "completions/min_terminated_length": 114.0, "epoch": 1.4409448818897639, "grad_norm": 0.215545654296875, "learning_rate": 1e-06, "loss": -0.0088, "num_tokens": 90353952.0, "reward": 0.6741071939468384, "reward_std": 0.14902471005916595, "rewards/verify_math_reward/mean": 0.6741071343421936, "rewards/verify_math_reward/std": 0.4689692556858063, "step": 617 }, { "clip_ratio/high_max": 0.0022404942428693175, "clip_ratio/high_mean": 0.000997783357888693, "clip_ratio/low_mean": 0.0009028914955706568, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0019006748480023816, "epoch": 1.443277923592884, "grad_norm": 0.1691480129957199, "learning_rate": 1e-06, "loss": -0.0089, "step": 618 }, { "clip_ratio/high_max": 0.003299728239653632, "clip_ratio/high_mean": 0.0013288722693687305, "clip_ratio/low_mean": 0.0013514717902580742, "clip_ratio/low_min": 4.224876101943664e-05, "clip_ratio/region_mean": 0.0026803440632647835, "epoch": 1.4456109652960047, "grad_norm": 0.13145700097084045, "learning_rate": 1e-06, "loss": -0.0092, "step": 619 }, { "clip_ratio/high_max": 0.00435585695959162, "clip_ratio/high_mean": 0.0017003100656438619, "clip_ratio/low_mean": 0.0019414539492572658, "clip_ratio/low_min": 4.584127964335494e-05, "clip_ratio/region_mean": 0.003641764065832831, "epoch": 1.4479440069991252, "grad_norm": 0.10277307033538818, "learning_rate": 1e-06, "loss": -0.0094, "step": 620 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0200892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 2870.0, "completions/mean_length": 609.125, "completions/mean_terminated_length": 537.64013671875, "completions/min_length": 101.0, "completions/min_terminated_length": 101.0, "epoch": 1.4502770487022456, "grad_norm": 0.2550145983695984, "learning_rate": 1e-06, "loss": -0.0113, "num_tokens": 90915304.0, "reward": 0.59375, "reward_std": 0.13820087909698486, "rewards/verify_math_reward/mean": 0.59375, "rewards/verify_math_reward/std": 0.4914066195487976, "step": 621 }, { "clip_ratio/high_max": 0.0024581433899584226, "clip_ratio/high_mean": 0.0008031039324123412, "clip_ratio/low_mean": 0.0012449456844478846, "clip_ratio/low_min": 6.858757842564955e-05, "clip_ratio/region_mean": 0.002048049616860226, "epoch": 1.452610090405366, "grad_norm": 0.1727628856897354, "learning_rate": 1e-06, "loss": -0.0114, "step": 622 }, { "clip_ratio/high_max": 0.0031743153049319517, "clip_ratio/high_mean": 0.0011290204001852544, "clip_ratio/low_mean": 0.001713043217023369, "clip_ratio/low_min": 0.00014949930846341886, "clip_ratio/region_mean": 0.002842063709977083, "epoch": 1.4549431321084865, "grad_norm": 0.13041099905967712, "learning_rate": 1e-06, "loss": -0.0118, "step": 623 }, { "clip_ratio/high_max": 0.0039047517493600026, "clip_ratio/high_mean": 0.0014085070515648113, "clip_ratio/low_mean": 0.0023611213473486714, "clip_ratio/low_min": 0.00018652138533070683, "clip_ratio/region_mean": 0.003769628456211649, "epoch": 1.457276173811607, "grad_norm": 0.10398651659488678, "learning_rate": 1e-06, "loss": -0.012, "step": 624 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0345982142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 2957.0, "completions/mean_length": 733.630615234375, "completions/mean_terminated_length": 613.1294555664062, "completions/min_length": 171.0, "completions/min_terminated_length": 171.0, "epoch": 1.4596092155147273, "grad_norm": 0.23882876336574554, "learning_rate": 1e-06, "loss": -0.0212, "num_tokens": 91537165.0, "reward": 0.5558035969734192, "reward_std": 0.1770561784505844, "rewards/verify_math_reward/mean": 0.5558035969734192, "rewards/verify_math_reward/std": 0.49715372920036316, "step": 625 }, { "clip_ratio/high_max": 0.0026795370868057944, "clip_ratio/high_mean": 0.0010660166444722563, "clip_ratio/low_mean": 0.0009282323753723176, "clip_ratio/low_min": 8.009831890376518e-05, "clip_ratio/region_mean": 0.001994249047129415, "epoch": 1.4619422572178478, "grad_norm": 0.17713265120983124, "learning_rate": 1e-06, "loss": -0.0213, "step": 626 }, { "clip_ratio/high_max": 0.003393834507733118, "clip_ratio/high_mean": 0.0013139545590092894, "clip_ratio/low_mean": 0.0013925045677751768, "clip_ratio/low_min": 0.00014765804098715307, "clip_ratio/region_mean": 0.0027064591631642543, "epoch": 1.4642752989209682, "grad_norm": 0.12807483971118927, "learning_rate": 1e-06, "loss": -0.0216, "step": 627 }, { "clip_ratio/high_max": 0.0043478118313942105, "clip_ratio/high_mean": 0.0017073181734303944, "clip_ratio/low_mean": 0.0019296742684673518, "clip_ratio/low_min": 0.0002691058034542948, "clip_ratio/region_mean": 0.0036369925073813647, "epoch": 1.4666083406240886, "grad_norm": 0.1054125726222992, "learning_rate": 1e-06, "loss": -0.0219, "step": 628 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.029017857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3715.0, "completions/mean_length": 698.7745971679688, "completions/mean_terminated_length": 597.248291015625, "completions/min_length": 176.0, "completions/min_terminated_length": 176.0, "epoch": 1.468941382327209, "grad_norm": 0.211578831076622, "learning_rate": 1e-06, "loss": -0.0016, "num_tokens": 92147627.0, "reward": 0.5680803656578064, "reward_std": 0.14053022861480713, "rewards/verify_math_reward/mean": 0.5680803656578064, "rewards/verify_math_reward/std": 0.4956200420856476, "step": 629 }, { "clip_ratio/high_max": 0.002169916348066181, "clip_ratio/high_mean": 0.0007913657991593936, "clip_ratio/low_mean": 0.0007795464534865459, "clip_ratio/low_min": 1.1172684935445432e-05, "clip_ratio/region_mean": 0.0015709122671978548, "epoch": 1.4712744240303295, "grad_norm": 0.14407220482826233, "learning_rate": 1e-06, "loss": -0.0017, "step": 630 }, { "clip_ratio/high_max": 0.002954186587885488, "clip_ratio/high_mean": 0.0010976574158121366, "clip_ratio/low_mean": 0.0011841713130706921, "clip_ratio/low_min": 1.3213531019573566e-05, "clip_ratio/region_mean": 0.0022818286597612314, "epoch": 1.47360746573345, "grad_norm": 0.11402598768472672, "learning_rate": 1e-06, "loss": -0.002, "step": 631 }, { "clip_ratio/high_max": 0.003658107088995166, "clip_ratio/high_mean": 0.0014325862175610382, "clip_ratio/low_mean": 0.0016285706878989004, "clip_ratio/low_min": 3.9640592149225995e-05, "clip_ratio/region_mean": 0.0030611569018219598, "epoch": 1.4759405074365703, "grad_norm": 0.09539580345153809, "learning_rate": 1e-06, "loss": -0.0022, "step": 632 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0323660714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3932.0, "completions/mean_length": 735.1217041015625, "completions/mean_terminated_length": 622.7047119140625, "completions/min_length": 117.0, "completions/min_terminated_length": 117.0, "epoch": 1.4782735491396908, "grad_norm": 0.255553275346756, "learning_rate": 1e-06, "loss": -0.0168, "num_tokens": 92766056.0, "reward": 0.598214328289032, "reward_std": 0.17972365021705627, "rewards/verify_math_reward/mean": 0.5982142686843872, "rewards/verify_math_reward/std": 0.49053287506103516, "step": 633 }, { "clip_ratio/high_max": 0.003113851969828829, "clip_ratio/high_mean": 0.0011458525186753832, "clip_ratio/low_mean": 0.001056854580383515, "clip_ratio/low_min": 5.2830973800155334e-05, "clip_ratio/region_mean": 0.0022027070517651737, "epoch": 1.4806065908428114, "grad_norm": 0.18853828310966492, "learning_rate": 1e-06, "loss": -0.0169, "step": 634 }, { "clip_ratio/high_max": 0.004100943406228907, "clip_ratio/high_mean": 0.0015470044454559684, "clip_ratio/low_mean": 0.0015419793053297326, "clip_ratio/low_min": 0.00011606154475884978, "clip_ratio/region_mean": 0.0030889837944414467, "epoch": 1.4829396325459316, "grad_norm": 0.13696949183940887, "learning_rate": 1e-06, "loss": -0.0173, "step": 635 }, { "clip_ratio/high_max": 0.004982385769835673, "clip_ratio/high_mean": 0.0018905659890151583, "clip_ratio/low_mean": 0.0021749539591837674, "clip_ratio/low_min": 0.00010716425094869919, "clip_ratio/region_mean": 0.004065519955474883, "epoch": 1.4852726742490523, "grad_norm": 0.11640757322311401, "learning_rate": 1e-06, "loss": -0.0175, "step": 636 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0279017857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3670.0, "completions/mean_length": 699.6261596679688, "completions/mean_terminated_length": 602.1411743164062, "completions/min_length": 107.0, "completions/min_terminated_length": 107.0, "epoch": 1.4876057159521727, "grad_norm": 0.23275621235370636, "learning_rate": 1e-06, "loss": -0.0055, "num_tokens": 93380625.0, "reward": 0.6082589626312256, "reward_std": 0.15488353371620178, "rewards/verify_math_reward/mean": 0.6082589030265808, "rewards/verify_math_reward/std": 0.4884119927883148, "step": 637 }, { "clip_ratio/high_max": 0.00261154140389408, "clip_ratio/high_mean": 0.000951551668549655, "clip_ratio/low_mean": 0.0008668691243656212, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0018184208311140537, "epoch": 1.4899387576552932, "grad_norm": 0.15933369100093842, "learning_rate": 1e-06, "loss": -0.0056, "step": 638 }, { "clip_ratio/high_max": 0.0035992080738651566, "clip_ratio/high_mean": 0.0012352936064417008, "clip_ratio/low_mean": 0.0014215341543604154, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0026568277753540315, "epoch": 1.4922717993584136, "grad_norm": 0.12040871381759644, "learning_rate": 1e-06, "loss": -0.0059, "step": 639 }, { "clip_ratio/high_max": 0.004385348096548114, "clip_ratio/high_mean": 0.0015442943295056466, "clip_ratio/low_mean": 0.0018983990648848703, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0034426932979840785, "epoch": 1.494604841061534, "grad_norm": 0.09643445909023285, "learning_rate": 1e-06, "loss": -0.0061, "step": 640 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.025669642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3440.0, "completions/mean_length": 675.2489013671875, "completions/mean_terminated_length": 585.1260375976562, "completions/min_length": 106.0, "completions/min_terminated_length": 106.0, "epoch": 1.4969378827646544, "grad_norm": 0.268905907869339, "learning_rate": 1e-06, "loss": -0.0013, "num_tokens": 93973832.0, "reward": 0.637276828289032, "reward_std": 0.1760031133890152, "rewards/verify_math_reward/mean": 0.6372767686843872, "rewards/verify_math_reward/std": 0.481054425239563, "step": 641 }, { "clip_ratio/high_max": 0.00284417415241478, "clip_ratio/high_mean": 0.0010783430934679927, "clip_ratio/low_mean": 0.0011786076247517485, "clip_ratio/low_min": 0.00012883207637059968, "clip_ratio/region_mean": 0.002256950676382985, "epoch": 1.4992709244677749, "grad_norm": 0.17745041847229004, "learning_rate": 1e-06, "loss": -0.0014, "step": 642 }, { "clip_ratio/high_max": 0.0037779470876557752, "clip_ratio/high_mean": 0.0014584932341676904, "clip_ratio/low_mean": 0.0017879890110634733, "clip_ratio/low_min": 0.00023553237406304106, "clip_ratio/region_mean": 0.003246482214308344, "epoch": 1.5016039661708953, "grad_norm": 0.13850215077400208, "learning_rate": 1e-06, "loss": -0.0018, "step": 643 }, { "clip_ratio/high_max": 0.0045592223686981015, "clip_ratio/high_mean": 0.0018511574326112168, "clip_ratio/low_mean": 0.002360485479584895, "clip_ratio/low_min": 0.0002514655552658951, "clip_ratio/region_mean": 0.004211642881273292, "epoch": 1.5039370078740157, "grad_norm": 0.11378243565559387, "learning_rate": 1e-06, "loss": -0.0021, "step": 644 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0424107142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3070.0, "completions/mean_length": 755.5692138671875, "completions/mean_terminated_length": 607.6246948242188, "completions/min_length": 134.0, "completions/min_terminated_length": 134.0, "epoch": 1.5062700495771362, "grad_norm": 0.25229859352111816, "learning_rate": 1e-06, "loss": -0.0191, "num_tokens": 94590614.0, "reward": 0.5647321939468384, "reward_std": 0.17829401791095734, "rewards/verify_math_reward/mean": 0.5647321343421936, "rewards/verify_math_reward/std": 0.49606895446777344, "step": 645 }, { "clip_ratio/high_max": 0.0022578874668397475, "clip_ratio/high_mean": 0.0008689554506418062, "clip_ratio/low_mean": 0.0011077263370680157, "clip_ratio/low_min": 6.04938504693564e-05, "clip_ratio/region_mean": 0.0019766818077187054, "epoch": 1.5086030912802566, "grad_norm": 0.17806486785411835, "learning_rate": 1e-06, "loss": -0.0192, "step": 646 }, { "clip_ratio/high_max": 0.003191494368365966, "clip_ratio/high_mean": 0.0012481854737416143, "clip_ratio/low_mean": 0.0015191793936537579, "clip_ratio/low_min": 0.00015425561105075758, "clip_ratio/region_mean": 0.0027673648146446794, "epoch": 1.510936132983377, "grad_norm": 0.1491885632276535, "learning_rate": 1e-06, "loss": -0.0195, "step": 647 }, { "clip_ratio/high_max": 0.0042654540375224315, "clip_ratio/high_mean": 0.0016534514834347647, "clip_ratio/low_mean": 0.002113633629051037, "clip_ratio/low_min": 0.00022989338685874827, "clip_ratio/region_mean": 0.003767085145227611, "epoch": 1.5132691746864975, "grad_norm": 1.3036670684814453, "learning_rate": 1e-06, "loss": -0.0196, "step": 648 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.029017857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 2929.0, "completions/mean_length": 697.7824096679688, "completions/mean_terminated_length": 596.2264404296875, "completions/min_length": 166.0, "completions/min_terminated_length": 166.0, "epoch": 1.5156022163896181, "grad_norm": 0.24837520718574524, "learning_rate": 1e-06, "loss": 0.0026, "num_tokens": 95210947.0, "reward": 0.53125, "reward_std": 0.17438729107379913, "rewards/verify_math_reward/mean": 0.53125, "rewards/verify_math_reward/std": 0.4993011951446533, "step": 649 }, { "clip_ratio/high_max": 0.0029166430249460973, "clip_ratio/high_mean": 0.0011180057099409169, "clip_ratio/low_mean": 0.0010227817365375813, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0021407874082797207, "epoch": 1.5179352580927383, "grad_norm": 0.17126323282718658, "learning_rate": 1e-06, "loss": 0.0026, "step": 650 }, { "clip_ratio/high_max": 0.003571574248780962, "clip_ratio/high_mean": 0.0014517231065838132, "clip_ratio/low_mean": 0.0015848874281800818, "clip_ratio/low_min": 4.088774585397914e-05, "clip_ratio/region_mean": 0.0030366105420398526, "epoch": 1.520268299795859, "grad_norm": 0.1440151482820511, "learning_rate": 1e-06, "loss": 0.0023, "step": 651 }, { "clip_ratio/high_max": 0.004772054577188101, "clip_ratio/high_mean": 0.0018286405902472325, "clip_ratio/low_mean": 0.0021784825876238756, "clip_ratio/low_min": 0.0001548134459881112, "clip_ratio/region_mean": 0.004007123221526854, "epoch": 1.5226013414989792, "grad_norm": 0.11769486963748932, "learning_rate": 1e-06, "loss": 0.0021, "step": 652 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0502232142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 2669.0, "completions/mean_length": 719.2098388671875, "completions/mean_terminated_length": 540.6486206054688, "completions/min_length": 169.0, "completions/min_terminated_length": 169.0, "epoch": 1.5249343832020998, "grad_norm": 0.24668866395950317, "learning_rate": 1e-06, "loss": -0.0085, "num_tokens": 95764527.0, "reward": 0.5837053656578064, "reward_std": 0.13786135613918304, "rewards/verify_math_reward/mean": 0.5837053656578064, "rewards/verify_math_reward/std": 0.49321892857551575, "step": 653 }, { "clip_ratio/high_max": 0.0030278684716904536, "clip_ratio/high_mean": 0.0009351862317998894, "clip_ratio/low_mean": 0.0010637700506777037, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0019989562351838686, "epoch": 1.52726742490522, "grad_norm": 0.17412272095680237, "learning_rate": 1e-06, "loss": -0.0086, "step": 654 }, { "clip_ratio/high_max": 0.003672920254757628, "clip_ratio/high_mean": 0.001250979339602054, "clip_ratio/low_mean": 0.001453944671084173, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0027049241034546867, "epoch": 1.5296004666083407, "grad_norm": 0.14083853363990784, "learning_rate": 1e-06, "loss": -0.0089, "step": 655 }, { "clip_ratio/high_max": 0.00441933218826307, "clip_ratio/high_mean": 0.0015361731493612751, "clip_ratio/low_mean": 0.0020172359072603285, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.003553409085725434, "epoch": 1.531933508311461, "grad_norm": 0.2874014675617218, "learning_rate": 1e-06, "loss": -0.0091, "step": 656 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.03125, "completions/max_length": 4096.0, "completions/max_terminated_length": 2583.0, "completions/mean_length": 723.9364013671875, "completions/mean_terminated_length": 615.16015625, "completions/min_length": 135.0, "completions/min_terminated_length": 135.0, "epoch": 1.5342665500145816, "grad_norm": 0.24704347550868988, "learning_rate": 1e-06, "loss": -0.0148, "num_tokens": 96390374.0, "reward": 0.5758928656578064, "reward_std": 0.18159864842891693, "rewards/verify_math_reward/mean": 0.5758928656578064, "rewards/verify_math_reward/std": 0.49448272585868835, "step": 657 }, { "clip_ratio/high_max": 0.0026748333730211016, "clip_ratio/high_mean": 0.0011489792341308203, "clip_ratio/low_mean": 0.0009813399010454305, "clip_ratio/low_min": 8.158203854691237e-06, "clip_ratio/region_mean": 0.0021303191533661447, "epoch": 1.536599591717702, "grad_norm": 0.1916906088590622, "learning_rate": 1e-06, "loss": -0.0149, "step": 658 }, { "clip_ratio/high_max": 0.003937670095183421, "clip_ratio/high_mean": 0.0015830356715014204, "clip_ratio/low_mean": 0.001576752183609642, "clip_ratio/low_min": 3.263281541876495e-05, "clip_ratio/region_mean": 0.003159787884214893, "epoch": 1.5389326334208224, "grad_norm": 0.14322027564048767, "learning_rate": 1e-06, "loss": -0.0153, "step": 659 }, { "clip_ratio/high_max": 0.004746602819068357, "clip_ratio/high_mean": 0.0019692013411258813, "clip_ratio/low_mean": 0.002144063910236582, "clip_ratio/low_min": 3.263281541876495e-05, "clip_ratio/region_mean": 0.004113265182240866, "epoch": 1.5412656751239429, "grad_norm": 0.1273566037416458, "learning_rate": 1e-06, "loss": -0.0156, "step": 660 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0200892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 2198.0, "completions/mean_length": 614.4364013671875, "completions/mean_terminated_length": 543.0603637695312, "completions/min_length": 145.0, "completions/min_terminated_length": 145.0, "epoch": 1.5435987168270633, "grad_norm": 0.2377653419971466, "learning_rate": 1e-06, "loss": -0.0084, "num_tokens": 96960741.0, "reward": 0.5714285969734192, "reward_std": 0.13876360654830933, "rewards/verify_math_reward/mean": 0.5714285969734192, "rewards/verify_math_reward/std": 0.49514803290367126, "step": 661 }, { "clip_ratio/high_max": 0.0022007717452652287, "clip_ratio/high_mean": 0.0008474415317323292, "clip_ratio/low_mean": 0.0009551286493660882, "clip_ratio/low_min": 3.9316419133683667e-05, "clip_ratio/region_mean": 0.0018025701501755975, "epoch": 1.5459317585301837, "grad_norm": 0.1649816632270813, "learning_rate": 1e-06, "loss": -0.0085, "step": 662 }, { "clip_ratio/high_max": 0.0032669806241756305, "clip_ratio/high_mean": 0.0012382956283545354, "clip_ratio/low_mean": 0.0014223038597265258, "clip_ratio/low_min": 5.8506902860244736e-05, "clip_ratio/region_mean": 0.002660599464434199, "epoch": 1.5482648002333042, "grad_norm": 0.12957312166690826, "learning_rate": 1e-06, "loss": -0.0088, "step": 663 }, { "clip_ratio/high_max": 0.004052964053698815, "clip_ratio/high_mean": 0.0015383152185677318, "clip_ratio/low_mean": 0.00196084774506744, "clip_ratio/low_min": 6.55273615848273e-05, "clip_ratio/region_mean": 0.003499162965454161, "epoch": 1.5505978419364246, "grad_norm": 0.10375001281499863, "learning_rate": 1e-06, "loss": -0.009, "step": 664 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0345982142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3794.0, "completions/mean_length": 696.6517944335938, "completions/mean_terminated_length": 574.825439453125, "completions/min_length": 151.0, "completions/min_terminated_length": 151.0, "epoch": 1.552930883639545, "grad_norm": 0.2455303966999054, "learning_rate": 1e-06, "loss": -0.0205, "num_tokens": 97540869.0, "reward": 0.6473214626312256, "reward_std": 0.18103593587875366, "rewards/verify_math_reward/mean": 0.6473214030265808, "rewards/verify_math_reward/std": 0.47807058691978455, "step": 665 }, { "clip_ratio/high_max": 0.002507605546270497, "clip_ratio/high_mean": 0.001140863820182858, "clip_ratio/low_mean": 0.0009105432354772347, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0020514069983619265, "epoch": 1.5552639253426657, "grad_norm": 0.23204277455806732, "learning_rate": 1e-06, "loss": -0.0205, "step": 666 }, { "clip_ratio/high_max": 0.0036795603373320773, "clip_ratio/high_mean": 0.0015336996875703335, "clip_ratio/low_mean": 0.0016542968896828825, "clip_ratio/low_min": 7.553920659120195e-05, "clip_ratio/region_mean": 0.003187996582710184, "epoch": 1.5575969670457859, "grad_norm": 0.13570688664913177, "learning_rate": 1e-06, "loss": -0.0209, "step": 667 }, { "clip_ratio/high_max": 0.004394473522552289, "clip_ratio/high_mean": 0.001863567747932393, "clip_ratio/low_mean": 0.0022248705790843815, "clip_ratio/low_min": 0.00011330880806781352, "clip_ratio/region_mean": 0.004088438377948478, "epoch": 1.5599300087489065, "grad_norm": 0.11078991740942001, "learning_rate": 1e-06, "loss": -0.0212, "step": 668 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0513392857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 4007.0, "completions/mean_length": 799.732177734375, "completions/mean_terminated_length": 621.3458862304688, "completions/min_length": 115.0, "completions/min_terminated_length": 115.0, "epoch": 1.5622630504520267, "grad_norm": 0.22503575682640076, "learning_rate": 1e-06, "loss": 0.0009, "num_tokens": 98153357.0, "reward": 0.5714285969734192, "reward_std": 0.14045538008213043, "rewards/verify_math_reward/mean": 0.5714285969734192, "rewards/verify_math_reward/std": 0.49514803290367126, "step": 669 }, { "clip_ratio/high_max": 0.002499974027159624, "clip_ratio/high_mean": 0.0008402636503888061, "clip_ratio/low_mean": 0.0007986827404238284, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0016389463999075815, "epoch": 1.5645960921551474, "grad_norm": 0.1848660707473755, "learning_rate": 1e-06, "loss": 0.0007, "step": 670 }, { "clip_ratio/high_max": 0.003215509670553729, "clip_ratio/high_mean": 0.0011113211876363494, "clip_ratio/low_mean": 0.0012933098914800212, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0024046311009442434, "epoch": 1.5669291338582676, "grad_norm": 0.11702371388673782, "learning_rate": 1e-06, "loss": 0.0004, "step": 671 }, { "clip_ratio/high_max": 0.004009252130344976, "clip_ratio/high_mean": 0.0014165052016323898, "clip_ratio/low_mean": 0.0017802458023652434, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0031967509785317816, "epoch": 1.5692621755613883, "grad_norm": 0.09714484214782715, "learning_rate": 1e-06, "loss": 0.0003, "step": 672 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0401785714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 2777.0, "completions/mean_length": 716.9844360351562, "completions/mean_terminated_length": 575.5372314453125, "completions/min_length": 119.0, "completions/min_terminated_length": 119.0, "epoch": 1.5715952172645085, "grad_norm": 0.24592353403568268, "learning_rate": 1e-06, "loss": -0.0178, "num_tokens": 98736119.0, "reward": 0.6383928656578064, "reward_std": 0.16529367864131927, "rewards/verify_math_reward/mean": 0.6383928656578064, "rewards/verify_math_reward/std": 0.4807341694831848, "step": 673 }, { "clip_ratio/high_max": 0.002768927668512333, "clip_ratio/high_mean": 0.0011065197086281842, "clip_ratio/low_mean": 0.0008939317121985368, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002000451411731774, "epoch": 1.5739282589676291, "grad_norm": 0.18526984751224518, "learning_rate": 1e-06, "loss": -0.0179, "step": 674 }, { "clip_ratio/high_max": 0.003912720050720964, "clip_ratio/high_mean": 0.0014485209831036627, "clip_ratio/low_mean": 0.0014226161474653054, "clip_ratio/low_min": 1.688276643108111e-05, "clip_ratio/region_mean": 0.0028711371778626926, "epoch": 1.5762613006707495, "grad_norm": 0.13849732279777527, "learning_rate": 1e-06, "loss": -0.0182, "step": 675 }, { "clip_ratio/high_max": 0.004992340531316586, "clip_ratio/high_mean": 0.0018439176928950474, "clip_ratio/low_mean": 0.002002959903620649, "clip_ratio/low_min": 6.753106572432443e-05, "clip_ratio/region_mean": 0.003846877531032078, "epoch": 1.57859434237387, "grad_norm": 0.10880594700574875, "learning_rate": 1e-06, "loss": -0.0185, "step": 676 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0267857142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3790.0, "completions/mean_length": 698.9152221679688, "completions/mean_terminated_length": 605.4174194335938, "completions/min_length": 72.0, "completions/min_terminated_length": 72.0, "epoch": 1.5809273840769904, "grad_norm": 0.2611704170703888, "learning_rate": 1e-06, "loss": -0.012, "num_tokens": 99353211.0, "reward": 0.6004464626312256, "reward_std": 0.1900157481431961, "rewards/verify_math_reward/mean": 0.6004464030265808, "rewards/verify_math_reward/std": 0.49008017778396606, "step": 677 }, { "clip_ratio/high_max": 0.0027919506901525892, "clip_ratio/high_mean": 0.0012261914052942302, "clip_ratio/low_mean": 0.0009402136620337842, "clip_ratio/low_min": 6.632885288127e-05, "clip_ratio/region_mean": 0.0021664050145773217, "epoch": 1.5832604257801108, "grad_norm": 0.17302879691123962, "learning_rate": 1e-06, "loss": -0.0121, "step": 678 }, { "clip_ratio/high_max": 0.003695393505040556, "clip_ratio/high_mean": 0.0016035606640798505, "clip_ratio/low_mean": 0.0014895543645252474, "clip_ratio/low_min": 0.00013027916429564357, "clip_ratio/region_mean": 0.003093115010415204, "epoch": 1.5855934674832313, "grad_norm": 0.14353615045547485, "learning_rate": 1e-06, "loss": -0.0124, "step": 679 }, { "clip_ratio/high_max": 0.004682419370510615, "clip_ratio/high_mean": 0.0020075191132491454, "clip_ratio/low_mean": 0.0020148561998212244, "clip_ratio/low_min": 0.00022043317949282937, "clip_ratio/region_mean": 0.004022375389467925, "epoch": 1.5879265091863517, "grad_norm": 0.11383099108934402, "learning_rate": 1e-06, "loss": -0.0127, "step": 680 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0546875, "completions/max_length": 4096.0, "completions/max_terminated_length": 3345.0, "completions/mean_length": 773.677490234375, "completions/mean_terminated_length": 581.4769897460938, "completions/min_length": 142.0, "completions/min_terminated_length": 142.0, "epoch": 1.5902595508894721, "grad_norm": 0.28525733947753906, "learning_rate": 1e-06, "loss": -0.0155, "num_tokens": 99936810.0, "reward": 0.5892857313156128, "reward_std": 0.16679365932941437, "rewards/verify_math_reward/mean": 0.5892857313156128, "rewards/verify_math_reward/std": 0.49223825335502625, "step": 681 }, { "clip_ratio/high_max": 0.003151006509142462, "clip_ratio/high_mean": 0.001222032702571596, "clip_ratio/low_mean": 0.0012604854491655715, "clip_ratio/low_min": 2.082430182781536e-05, "clip_ratio/region_mean": 0.0024825181099004112, "epoch": 1.5925925925925926, "grad_norm": 0.19139081239700317, "learning_rate": 1e-06, "loss": -0.0157, "step": 682 }, { "clip_ratio/high_max": 0.004364979802630842, "clip_ratio/high_mean": 0.0016738818449084647, "clip_ratio/low_mean": 0.0018759743397822604, "clip_ratio/low_min": 8.855794294504449e-05, "clip_ratio/region_mean": 0.0035498562283464707, "epoch": 1.594925634295713, "grad_norm": 0.1408088505268097, "learning_rate": 1e-06, "loss": -0.0161, "step": 683 }, { "clip_ratio/high_max": 0.0057402113161515445, "clip_ratio/high_mean": 0.0021023931039962918, "clip_ratio/low_mean": 0.0026208754497929476, "clip_ratio/low_min": 0.00013463620052789338, "clip_ratio/region_mean": 0.0047232685319613665, "epoch": 1.5972586759988334, "grad_norm": 0.11426497250795364, "learning_rate": 1e-06, "loss": -0.0164, "step": 684 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0323660714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 4046.0, "completions/mean_length": 772.3973388671875, "completions/mean_terminated_length": 661.2271728515625, "completions/min_length": 156.0, "completions/min_terminated_length": 156.0, "epoch": 1.599591717701954, "grad_norm": 0.24368947744369507, "learning_rate": 1e-06, "loss": 0.0113, "num_tokens": 100601846.0, "reward": 0.609375, "reward_std": 0.1873084455728531, "rewards/verify_math_reward/mean": 0.609375, "rewards/verify_math_reward/std": 0.48816296458244324, "step": 685 }, { "clip_ratio/high_max": 0.0024836231750668958, "clip_ratio/high_mean": 0.0009393668169650482, "clip_ratio/low_mean": 0.0011657883878797293, "clip_ratio/low_min": 0.00013321530332177645, "clip_ratio/region_mean": 0.0021051551739219576, "epoch": 1.6019247594050743, "grad_norm": 0.1742807924747467, "learning_rate": 1e-06, "loss": 0.0112, "step": 686 }, { "clip_ratio/high_max": 0.003526618704199791, "clip_ratio/high_mean": 0.001331138021669176, "clip_ratio/low_mean": 0.0017143283475888893, "clip_ratio/low_min": 0.0002011150081671076, "clip_ratio/region_mean": 0.0030454663647105917, "epoch": 1.604257801108195, "grad_norm": 0.1398981660604477, "learning_rate": 1e-06, "loss": 0.0108, "step": 687 }, { "clip_ratio/high_max": 0.004449798339919653, "clip_ratio/high_mean": 0.001675060322668287, "clip_ratio/low_mean": 0.0022632075342698954, "clip_ratio/low_min": 0.00030479050838039257, "clip_ratio/region_mean": 0.003938267793273553, "epoch": 1.6065908428113151, "grad_norm": 0.11432594805955887, "learning_rate": 1e-06, "loss": 0.0106, "step": 688 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0424107142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 2977.0, "completions/mean_length": 710.9642944335938, "completions/mean_terminated_length": 561.0443115234375, "completions/min_length": 90.0, "completions/min_terminated_length": 90.0, "epoch": 1.6089238845144358, "grad_norm": 0.25037357211112976, "learning_rate": 1e-06, "loss": -0.0106, "num_tokens": 101163670.0, "reward": 0.6328125, "reward_std": 0.18088439106941223, "rewards/verify_math_reward/mean": 0.6328125, "rewards/verify_math_reward/std": 0.48230743408203125, "step": 689 }, { "clip_ratio/high_max": 0.003082485702179838, "clip_ratio/high_mean": 0.0011470698118500877, "clip_ratio/low_mean": 0.0009345052612843574, "clip_ratio/low_min": 3.389370976947248e-05, "clip_ratio/region_mean": 0.00208157504312112, "epoch": 1.611256926217556, "grad_norm": 0.229627326130867, "learning_rate": 1e-06, "loss": -0.0107, "step": 690 }, { "clip_ratio/high_max": 0.004096759861567989, "clip_ratio/high_mean": 0.0016260844822681975, "clip_ratio/low_mean": 0.0015687161176174413, "clip_ratio/low_min": 0.00010149399167858064, "clip_ratio/region_mean": 0.0031948006217135116, "epoch": 1.6135899679206767, "grad_norm": 0.14112234115600586, "learning_rate": 1e-06, "loss": -0.0111, "step": 691 }, { "clip_ratio/high_max": 0.005045362006057985, "clip_ratio/high_mean": 0.0020410839351825416, "clip_ratio/low_mean": 0.002241917962237494, "clip_ratio/low_min": 0.00015252169396262616, "clip_ratio/region_mean": 0.004283001879230142, "epoch": 1.6159230096237969, "grad_norm": 0.11660942435264587, "learning_rate": 1e-06, "loss": -0.0114, "step": 692 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0535714285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3750.0, "completions/mean_length": 773.2589721679688, "completions/mean_terminated_length": 585.1792602539062, "completions/min_length": 113.0, "completions/min_terminated_length": 113.0, "epoch": 1.6182560513269175, "grad_norm": 0.2728084623813629, "learning_rate": 1e-06, "loss": -0.0228, "num_tokens": 101750214.0, "reward": 0.5926339626312256, "reward_std": 0.20654867589473724, "rewards/verify_math_reward/mean": 0.5926339030265808, "rewards/verify_math_reward/std": 0.49161848425865173, "step": 693 }, { "clip_ratio/high_max": 0.0029227424602140673, "clip_ratio/high_mean": 0.0013024689178564586, "clip_ratio/low_mean": 0.0012634358063223772, "clip_ratio/low_min": 0.000178847858478548, "clip_ratio/region_mean": 0.002565904753282666, "epoch": 1.620589093030038, "grad_norm": 0.29820919036865234, "learning_rate": 1e-06, "loss": -0.0228, "step": 694 }, { "clip_ratio/high_max": 0.0037310707702999935, "clip_ratio/high_mean": 0.0017264762209379114, "clip_ratio/low_mean": 0.0018601285410113633, "clip_ratio/low_min": 0.0002608320392027963, "clip_ratio/region_mean": 0.003586604754673317, "epoch": 1.6229221347331584, "grad_norm": 0.14371053874492645, "learning_rate": 1e-06, "loss": -0.0233, "step": 695 }, { "clip_ratio/high_max": 0.004788444901350886, "clip_ratio/high_mean": 0.0021462989825522527, "clip_ratio/low_mean": 0.0026394200831418857, "clip_ratio/low_min": 0.00040253415863844566, "clip_ratio/region_mean": 0.004785719065694138, "epoch": 1.6252551764362788, "grad_norm": 0.12184319645166397, "learning_rate": 1e-06, "loss": -0.0235, "step": 696 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0424107142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3338.0, "completions/mean_length": 753.6406860351562, "completions/mean_terminated_length": 605.6107177734375, "completions/min_length": 147.0, "completions/min_terminated_length": 147.0, "epoch": 1.6275882181393992, "grad_norm": 0.24385374784469604, "learning_rate": 1e-06, "loss": 0.0028, "num_tokens": 102378196.0, "reward": 0.5569196939468384, "reward_std": 0.15213938057422638, "rewards/verify_math_reward/mean": 0.5569196343421936, "rewards/verify_math_reward/std": 0.49702703952789307, "step": 697 }, { "clip_ratio/high_max": 0.0025684576030471362, "clip_ratio/high_mean": 0.0009102690801228164, "clip_ratio/low_mean": 0.0008276896787720034, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0017379587079631165, "epoch": 1.6299212598425197, "grad_norm": 0.17472940683364868, "learning_rate": 1e-06, "loss": 0.0027, "step": 698 }, { "clip_ratio/high_max": 0.003790906826907303, "clip_ratio/high_mean": 0.0012977864062122535, "clip_ratio/low_mean": 0.0013074482521915343, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0026052346438518725, "epoch": 1.63225430154564, "grad_norm": 0.13095299899578094, "learning_rate": 1e-06, "loss": 0.0024, "step": 699 }, { "clip_ratio/high_max": 0.004736442206194624, "clip_ratio/high_mean": 0.0015826626331545413, "clip_ratio/low_mean": 0.0017405366197635885, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0033231993147637695, "epoch": 1.6345873432487605, "grad_norm": 0.10208440572023392, "learning_rate": 1e-06, "loss": 0.0022, "step": 700 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0345982142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3317.0, "completions/mean_length": 751.8560791015625, "completions/mean_terminated_length": 632.008056640625, "completions/min_length": 144.0, "completions/min_terminated_length": 144.0, "epoch": 1.636920384951881, "grad_norm": 0.2590714991092682, "learning_rate": 1e-06, "loss": -0.0029, "num_tokens": 103015187.0, "reward": 0.5792410969734192, "reward_std": 0.18475648760795593, "rewards/verify_math_reward/mean": 0.5792410969734192, "rewards/verify_math_reward/std": 0.49395665526390076, "step": 701 }, { "clip_ratio/high_max": 0.0029010725775151514, "clip_ratio/high_mean": 0.001204462634632364, "clip_ratio/low_mean": 0.0010697117486415664, "clip_ratio/low_min": 5.740711458201986e-05, "clip_ratio/region_mean": 0.0022741744032828137, "epoch": 1.6392534266550016, "grad_norm": 0.20327256619930267, "learning_rate": 1e-06, "loss": -0.003, "step": 702 }, { "clip_ratio/high_max": 0.003871840570354834, "clip_ratio/high_mean": 0.0015929009459796362, "clip_ratio/low_mean": 0.0015724249969935045, "clip_ratio/low_min": 0.00010001105147239286, "clip_ratio/region_mean": 0.003165325993904844, "epoch": 1.6415864683581218, "grad_norm": 0.1429632306098938, "learning_rate": 1e-06, "loss": -0.0034, "step": 703 }, { "clip_ratio/high_max": 0.004779217852046713, "clip_ratio/high_mean": 0.001964239861990791, "clip_ratio/low_mean": 0.0022544344355992507, "clip_ratio/low_min": 0.00013561344712798018, "clip_ratio/region_mean": 0.0042186744103673846, "epoch": 1.6439195100612425, "grad_norm": 0.1153990849852562, "learning_rate": 1e-06, "loss": -0.0037, "step": 704 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0390625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3181.0, "completions/mean_length": 749.8560791015625, "completions/mean_terminated_length": 613.8339233398438, "completions/min_length": 148.0, "completions/min_terminated_length": 148.0, "epoch": 1.6462525517643627, "grad_norm": 0.2646535634994507, "learning_rate": 1e-06, "loss": -0.005, "num_tokens": 103634314.0, "reward": 0.5546875, "reward_std": 0.1880227029323578, "rewards/verify_math_reward/mean": 0.5546875, "rewards/verify_math_reward/std": 0.4972778558731079, "step": 705 }, { "clip_ratio/high_max": 0.0028080921874789055, "clip_ratio/high_mean": 0.0011137539258925244, "clip_ratio/low_mean": 0.000992838342426694, "clip_ratio/low_min": 3.410951467230916e-05, "clip_ratio/region_mean": 0.0021065922373963986, "epoch": 1.6485855934674833, "grad_norm": 0.18400846421718597, "learning_rate": 1e-06, "loss": -0.0052, "step": 706 }, { "clip_ratio/high_max": 0.0035435195241007023, "clip_ratio/high_mean": 0.0014770448869967368, "clip_ratio/low_mean": 0.001655503801885061, "clip_ratio/low_min": 5.582768062595278e-05, "clip_ratio/region_mean": 0.003132548605208285, "epoch": 1.6509186351706036, "grad_norm": 0.13828006386756897, "learning_rate": 1e-06, "loss": -0.0055, "step": 707 }, { "clip_ratio/high_max": 0.0042818090951186605, "clip_ratio/high_mean": 0.0018058502100757323, "clip_ratio/low_mean": 0.0022060039118514396, "clip_ratio/low_min": 9.160281842923723e-05, "clip_ratio/region_mean": 0.004011854136479087, "epoch": 1.6532516768737242, "grad_norm": 0.11974550783634186, "learning_rate": 1e-06, "loss": -0.0058, "step": 708 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0435267857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 2574.0, "completions/mean_length": 724.3035888671875, "completions/mean_terminated_length": 570.8658447265625, "completions/min_length": 144.0, "completions/min_terminated_length": 144.0, "epoch": 1.6555847185768444, "grad_norm": 0.2521364092826843, "learning_rate": 1e-06, "loss": -0.0152, "num_tokens": 104207002.0, "reward": 0.629464328289032, "reward_std": 0.17085009813308716, "rewards/verify_math_reward/mean": 0.6294642686843872, "rewards/verify_math_reward/std": 0.4832179844379425, "step": 709 }, { "clip_ratio/high_max": 0.00274680648726644, "clip_ratio/high_mean": 0.0011662962187983794, "clip_ratio/low_mean": 0.000847442392114317, "clip_ratio/low_min": 1.4667918549093883e-05, "clip_ratio/region_mean": 0.0020137386527494527, "epoch": 1.657917760279965, "grad_norm": 0.18540804088115692, "learning_rate": 1e-06, "loss": -0.0153, "step": 710 }, { "clip_ratio/high_max": 0.0032401318894699216, "clip_ratio/high_mean": 0.0014489531822619028, "clip_ratio/low_mean": 0.001390895804433967, "clip_ratio/low_min": 2.9335837098187767e-05, "clip_ratio/region_mean": 0.002839849032170605, "epoch": 1.6602508019830855, "grad_norm": 0.12968800961971283, "learning_rate": 1e-06, "loss": -0.0156, "step": 711 }, { "clip_ratio/high_max": 0.004254725223290734, "clip_ratio/high_mean": 0.0017713863489916548, "clip_ratio/low_mean": 0.0019297860853839666, "clip_ratio/low_min": 5.8671674196375534e-05, "clip_ratio/region_mean": 0.0037011724489275366, "epoch": 1.662583843686206, "grad_norm": 0.12188668549060822, "learning_rate": 1e-06, "loss": -0.0158, "step": 712 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.041294642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 1967.0, "completions/mean_length": 702.1272583007812, "completions/mean_terminated_length": 555.9417724609375, "completions/min_length": 125.0, "completions/min_terminated_length": 125.0, "epoch": 1.6649168853893264, "grad_norm": 0.28254270553588867, "learning_rate": 1e-06, "loss": -0.0113, "num_tokens": 104766740.0, "reward": 0.6037946939468384, "reward_std": 0.18516835570335388, "rewards/verify_math_reward/mean": 0.6037946343421936, "rewards/verify_math_reward/std": 0.48938122391700745, "step": 713 }, { "clip_ratio/high_max": 0.002944815256341826, "clip_ratio/high_mean": 0.001141206670581596, "clip_ratio/low_mean": 0.0010575862652331125, "clip_ratio/low_min": 6.439091703214217e-05, "clip_ratio/region_mean": 0.002198792928538751, "epoch": 1.6672499270924468, "grad_norm": 0.19902311265468597, "learning_rate": 1e-06, "loss": -0.0115, "step": 714 }, { "clip_ratio/high_max": 0.004043947141326498, "clip_ratio/high_mean": 0.0015545924870821182, "clip_ratio/low_mean": 0.001716645056148991, "clip_ratio/low_min": 0.00014237631876312662, "clip_ratio/region_mean": 0.0032712375541450456, "epoch": 1.6695829687955672, "grad_norm": 0.13362081348896027, "learning_rate": 1e-06, "loss": -0.0119, "step": 715 }, { "clip_ratio/high_max": 0.004989767548977397, "clip_ratio/high_mean": 0.0019327789013914298, "clip_ratio/low_mean": 0.0023398847624775954, "clip_ratio/low_min": 0.00024805239809211344, "clip_ratio/region_mean": 0.004272663631127216, "epoch": 1.6719160104986877, "grad_norm": 0.11047463864088058, "learning_rate": 1e-06, "loss": -0.0121, "step": 716 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.049107142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3021.0, "completions/mean_length": 754.0848388671875, "completions/mean_terminated_length": 581.4976806640625, "completions/min_length": 140.0, "completions/min_terminated_length": 140.0, "epoch": 1.674249052201808, "grad_norm": 0.270469069480896, "learning_rate": 1e-06, "loss": -0.0332, "num_tokens": 105349080.0, "reward": 0.6015625, "reward_std": 0.18568874895572662, "rewards/verify_math_reward/mean": 0.6015625, "rewards/verify_math_reward/std": 0.48984986543655396, "step": 717 }, { "clip_ratio/high_max": 0.003345504264871124, "clip_ratio/high_mean": 0.001276089114981005, "clip_ratio/low_mean": 0.0011062192224926548, "clip_ratio/low_min": 3.243383616791107e-05, "clip_ratio/region_mean": 0.0023823082810849883, "epoch": 1.6765820939049285, "grad_norm": 0.1938450187444687, "learning_rate": 1e-06, "loss": -0.0333, "step": 718 }, { "clip_ratio/high_max": 0.004337899612437468, "clip_ratio/high_mean": 0.0016962489826255478, "clip_ratio/low_mean": 0.0016802002719487064, "clip_ratio/low_min": 4.984815859643277e-05, "clip_ratio/region_mean": 0.003376449312781915, "epoch": 1.678915135608049, "grad_norm": 0.1568206250667572, "learning_rate": 1e-06, "loss": -0.0337, "step": 719 }, { "clip_ratio/high_max": 0.005719614870031364, "clip_ratio/high_mean": 0.0022415937783080153, "clip_ratio/low_mean": 0.0023315205617109314, "clip_ratio/low_min": 4.183792589174118e-05, "clip_ratio/region_mean": 0.004573114289087243, "epoch": 1.6812481773111694, "grad_norm": 0.12140702456235886, "learning_rate": 1e-06, "loss": -0.034, "step": 720 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0424107142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 4058.0, "completions/mean_length": 743.6328735351562, "completions/mean_terminated_length": 595.15966796875, "completions/min_length": 164.0, "completions/min_terminated_length": 164.0, "epoch": 1.68358121901429, "grad_norm": 0.33311599493026733, "learning_rate": 1e-06, "loss": 0.0003, "num_tokens": 105942575.0, "reward": 0.6305803656578064, "reward_std": 0.1674337536096573, "rewards/verify_math_reward/mean": 0.6305803656578064, "rewards/verify_math_reward/std": 0.4829172194004059, "step": 721 }, { "clip_ratio/high_max": 0.0028087594982935116, "clip_ratio/high_mean": 0.0010452608912601136, "clip_ratio/low_mean": 0.0009563301027810667, "clip_ratio/low_min": 8.778894698480144e-05, "clip_ratio/region_mean": 0.0020015910049551167, "epoch": 1.6859142607174102, "grad_norm": 0.19226394593715668, "learning_rate": 1e-06, "loss": 0.0003, "step": 722 }, { "clip_ratio/high_max": 0.003739192477951292, "clip_ratio/high_mean": 0.0014672776123916265, "clip_ratio/low_mean": 0.0014400067520909943, "clip_ratio/low_min": 5.7281764384242706e-05, "clip_ratio/region_mean": 0.002907284360844642, "epoch": 1.688247302420531, "grad_norm": 0.14065328240394592, "learning_rate": 1e-06, "loss": -0.0001, "step": 723 }, { "clip_ratio/high_max": 0.004647549809305929, "clip_ratio/high_mean": 0.0018880515854107216, "clip_ratio/low_mean": 0.00196352428974933, "clip_ratio/low_min": 9.871604925137945e-05, "clip_ratio/region_mean": 0.0038515758205903694, "epoch": 1.690580344123651, "grad_norm": 0.11352507770061493, "learning_rate": 1e-06, "loss": -0.0003, "step": 724 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0580357142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3005.0, "completions/mean_length": 862.6373291015625, "completions/mean_terminated_length": 663.4253540039062, "completions/min_length": 157.0, "completions/min_terminated_length": 157.0, "epoch": 1.6929133858267718, "grad_norm": 0.2704397737979889, "learning_rate": 1e-06, "loss": -0.0133, "num_tokens": 106607010.0, "reward": 0.515625, "reward_std": 0.16781283915042877, "rewards/verify_math_reward/mean": 0.515625, "rewards/verify_math_reward/std": 0.5000349283218384, "step": 725 }, { "clip_ratio/high_max": 0.0026500075873627793, "clip_ratio/high_mean": 0.001005624722893117, "clip_ratio/low_mean": 0.0010397373553132638, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002045362060016487, "epoch": 1.695246427529892, "grad_norm": 0.1803724467754364, "learning_rate": 1e-06, "loss": -0.0134, "step": 726 }, { "clip_ratio/high_max": 0.003607823462516535, "clip_ratio/high_mean": 0.0014111534546827897, "clip_ratio/low_mean": 0.0015317153556679841, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0029428688212647103, "epoch": 1.6975794692330126, "grad_norm": 0.1344105452299118, "learning_rate": 1e-06, "loss": -0.0137, "step": 727 }, { "clip_ratio/high_max": 0.004246437216352206, "clip_ratio/high_mean": 0.0017214626932400279, "clip_ratio/low_mean": 0.001986810253583826, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.003708272924995981, "epoch": 1.6999125109361328, "grad_norm": 0.10835554450750351, "learning_rate": 1e-06, "loss": -0.0139, "step": 728 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.036830357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3033.0, "completions/mean_length": 671.021240234375, "completions/mean_terminated_length": 540.054443359375, "completions/min_length": 155.0, "completions/min_terminated_length": 155.0, "epoch": 1.7022455526392535, "grad_norm": 0.29257550835609436, "learning_rate": 1e-06, "loss": -0.0079, "num_tokens": 107168853.0, "reward": 0.6473214626312256, "reward_std": 0.1776086390018463, "rewards/verify_math_reward/mean": 0.6473214030265808, "rewards/verify_math_reward/std": 0.47807058691978455, "step": 729 }, { "clip_ratio/high_max": 0.00364314422768075, "clip_ratio/high_mean": 0.001345762051641941, "clip_ratio/low_mean": 0.0012476648353185738, "clip_ratio/low_min": 9.697439963929355e-06, "clip_ratio/region_mean": 0.0025934268851415254, "epoch": 1.704578594342374, "grad_norm": 0.19777247309684753, "learning_rate": 1e-06, "loss": -0.008, "step": 730 }, { "clip_ratio/high_max": 0.004978687400580384, "clip_ratio/high_mean": 0.0018298692812095396, "clip_ratio/low_mean": 0.0019838477928715292, "clip_ratio/low_min": 1.939487992785871e-05, "clip_ratio/region_mean": 0.003813716975855641, "epoch": 1.7069116360454943, "grad_norm": 0.17871196568012238, "learning_rate": 1e-06, "loss": -0.0084, "step": 731 }, { "clip_ratio/high_max": 0.0060547338289325126, "clip_ratio/high_mean": 0.0022569881220988464, "clip_ratio/low_mean": 0.0026389037921035197, "clip_ratio/low_min": 2.9092319891788065e-05, "clip_ratio/region_mean": 0.0048958919942379, "epoch": 1.7092446777486148, "grad_norm": 0.12625852227210999, "learning_rate": 1e-06, "loss": -0.0087, "step": 732 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.036830357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 2784.0, "completions/mean_length": 749.8125610351562, "completions/mean_terminated_length": 621.858642578125, "completions/min_length": 134.0, "completions/min_terminated_length": 134.0, "epoch": 1.7115777194517352, "grad_norm": 0.3048950135707855, "learning_rate": 1e-06, "loss": -0.0037, "num_tokens": 107798365.0, "reward": 0.59375, "reward_std": 0.17134752869606018, "rewards/verify_math_reward/mean": 0.59375, "rewards/verify_math_reward/std": 0.4914066195487976, "step": 733 }, { "clip_ratio/high_max": 0.0032380399279645644, "clip_ratio/high_mean": 0.0012192441790830344, "clip_ratio/low_mean": 0.001155765523435548, "clip_ratio/low_min": 2.2760379579267465e-05, "clip_ratio/region_mean": 0.0023750097388983704, "epoch": 1.7139107611548556, "grad_norm": 0.19789312779903412, "learning_rate": 1e-06, "loss": -0.0038, "step": 734 }, { "clip_ratio/high_max": 0.004513056293944828, "clip_ratio/high_mean": 0.001639994436118286, "clip_ratio/low_mean": 0.0017721140447974904, "clip_ratio/low_min": 4.552075915853493e-05, "clip_ratio/region_mean": 0.003412108497286681, "epoch": 1.716243802857976, "grad_norm": 0.15578952431678772, "learning_rate": 1e-06, "loss": -0.0041, "step": 735 }, { "clip_ratio/high_max": 0.005259309822577052, "clip_ratio/high_mean": 0.0019401261015445925, "clip_ratio/low_mean": 0.002472073130775243, "clip_ratio/low_min": 5.6900946219684556e-05, "clip_ratio/region_mean": 0.0044121992905274965, "epoch": 1.7185768445610965, "grad_norm": 0.12054499238729477, "learning_rate": 1e-06, "loss": -0.0044, "step": 736 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0345982142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 2111.0, "completions/mean_length": 716.3582763671875, "completions/mean_terminated_length": 595.2381591796875, "completions/min_length": 108.0, "completions/min_terminated_length": 108.0, "epoch": 1.720909886264217, "grad_norm": 0.21959708631038666, "learning_rate": 1e-06, "loss": 0.0058, "num_tokens": 108399030.0, "reward": 0.5814732313156128, "reward_std": 0.13354277610778809, "rewards/verify_math_reward/mean": 0.5814732313156128, "rewards/verify_math_reward/std": 0.4935929775238037, "step": 737 }, { "clip_ratio/high_max": 0.0018732217722572386, "clip_ratio/high_mean": 0.0007269262841873569, "clip_ratio/low_mean": 0.0007380915867543081, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014650178673036862, "epoch": 1.7232429279673376, "grad_norm": 0.15491065382957458, "learning_rate": 1e-06, "loss": 0.0058, "step": 738 }, { "clip_ratio/high_max": 0.002366788437939249, "clip_ratio/high_mean": 0.0009086417776416056, "clip_ratio/low_mean": 0.0010985854769387515, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002007227249123389, "epoch": 1.7255759696704578, "grad_norm": 0.11508391052484512, "learning_rate": 1e-06, "loss": 0.0056, "step": 739 }, { "clip_ratio/high_max": 0.002898280421504751, "clip_ratio/high_mean": 0.0011356428331055213, "clip_ratio/low_mean": 0.001485578603023896, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0026212213415419683, "epoch": 1.7279090113735784, "grad_norm": 0.09881661832332611, "learning_rate": 1e-06, "loss": 0.0054, "step": 740 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0435267857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3546.0, "completions/mean_length": 765.3861694335938, "completions/mean_terminated_length": 613.8179931640625, "completions/min_length": 151.0, "completions/min_terminated_length": 151.0, "epoch": 1.7302420530766986, "grad_norm": 0.25159674882888794, "learning_rate": 1e-06, "loss": -0.0085, "num_tokens": 109016920.0, "reward": 0.5524553656578064, "reward_std": 0.16784563660621643, "rewards/verify_math_reward/mean": 0.5524553656578064, "rewards/verify_math_reward/std": 0.49751853942871094, "step": 741 }, { "clip_ratio/high_max": 0.002551504505390767, "clip_ratio/high_mean": 0.0010875955049414188, "clip_ratio/low_mean": 0.0008287665295938496, "clip_ratio/low_min": 1.1741499292838853e-05, "clip_ratio/region_mean": 0.0019163620381732471, "epoch": 1.7325750947798193, "grad_norm": 0.18649883568286896, "learning_rate": 1e-06, "loss": -0.0086, "step": 742 }, { "clip_ratio/high_max": 0.0034047478256979957, "clip_ratio/high_mean": 0.0014742709063284565, "clip_ratio/low_mean": 0.001306191352341557, "clip_ratio/low_min": 1.1741499292838853e-05, "clip_ratio/region_mean": 0.0027804622804978862, "epoch": 1.7349081364829395, "grad_norm": 0.1338178664445877, "learning_rate": 1e-06, "loss": -0.0089, "step": 743 }, { "clip_ratio/high_max": 0.0040784029988572, "clip_ratio/high_mean": 0.001775572614860721, "clip_ratio/low_mean": 0.0017559754924150184, "clip_ratio/low_min": 2.3482998585677706e-05, "clip_ratio/region_mean": 0.0035315481363795698, "epoch": 1.7372411781860602, "grad_norm": 0.11057519912719727, "learning_rate": 1e-06, "loss": -0.0091, "step": 744 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0357142857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 2048.0, "completions/mean_length": 690.8683471679688, "completions/mean_terminated_length": 564.7523193359375, "completions/min_length": 134.0, "completions/min_terminated_length": 134.0, "epoch": 1.7395742198891804, "grad_norm": 0.27115586400032043, "learning_rate": 1e-06, "loss": -0.0203, "num_tokens": 109592610.0, "reward": 0.6473214626312256, "reward_std": 0.1622154414653778, "rewards/verify_math_reward/mean": 0.6473214030265808, "rewards/verify_math_reward/std": 0.47807058691978455, "step": 745 }, { "clip_ratio/high_max": 0.003116787971521262, "clip_ratio/high_mean": 0.0011823594522866188, "clip_ratio/low_mean": 0.0011809444877144415, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002363303952733986, "epoch": 1.741907261592301, "grad_norm": 0.185161292552948, "learning_rate": 1e-06, "loss": -0.0204, "step": 746 }, { "clip_ratio/high_max": 0.003821617483481532, "clip_ratio/high_mean": 0.001523124399682274, "clip_ratio/low_mean": 0.0017286808506469242, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0032518052248633467, "epoch": 1.7442403032954215, "grad_norm": 0.13996219635009766, "learning_rate": 1e-06, "loss": -0.0208, "step": 747 }, { "clip_ratio/high_max": 0.005425166207714938, "clip_ratio/high_mean": 0.0019601670865085907, "clip_ratio/low_mean": 0.0023750387408654206, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.004335205900133587, "epoch": 1.7465733449985419, "grad_norm": 0.1147933080792427, "learning_rate": 1e-06, "loss": -0.021, "step": 748 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0479910714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 4087.0, "completions/mean_length": 803.4844360351562, "completions/mean_terminated_length": 637.5076293945312, "completions/min_length": 143.0, "completions/min_terminated_length": 143.0, "epoch": 1.7489063867016623, "grad_norm": 0.27062752842903137, "learning_rate": 1e-06, "loss": -0.018, "num_tokens": 110227420.0, "reward": 0.5736607313156128, "reward_std": 0.19490069150924683, "rewards/verify_math_reward/mean": 0.5736607313156128, "rewards/verify_math_reward/std": 0.4948205351829529, "step": 749 }, { "clip_ratio/high_max": 0.003249353416322265, "clip_ratio/high_mean": 0.0011485462891869247, "clip_ratio/low_mean": 0.0010337435232941061, "clip_ratio/low_min": 3.435605685808696e-05, "clip_ratio/region_mean": 0.002182289848860819, "epoch": 1.7512394284047827, "grad_norm": 0.19298133254051208, "learning_rate": 1e-06, "loss": -0.0182, "step": 750 }, { "clip_ratio/high_max": 0.004096483316970989, "clip_ratio/high_mean": 0.001508465240476653, "clip_ratio/low_mean": 0.0015127087172004394, "clip_ratio/low_min": 4.5040649638394825e-05, "clip_ratio/region_mean": 0.003021174023160711, "epoch": 1.7535724701079032, "grad_norm": 0.14680033922195435, "learning_rate": 1e-06, "loss": -0.0185, "step": 751 }, { "clip_ratio/high_max": 0.005126409174408764, "clip_ratio/high_mean": 0.0018541174576967023, "clip_ratio/low_mean": 0.002137948678864632, "clip_ratio/low_min": 6.937424041097984e-05, "clip_ratio/region_mean": 0.003992066136561334, "epoch": 1.7559055118110236, "grad_norm": 0.11550495028495789, "learning_rate": 1e-06, "loss": -0.0188, "step": 752 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0513392857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3778.0, "completions/mean_length": 797.513427734375, "completions/mean_terminated_length": 619.0070190429688, "completions/min_length": 153.0, "completions/min_terminated_length": 153.0, "epoch": 1.758238553514144, "grad_norm": 0.27915501594543457, "learning_rate": 1e-06, "loss": -0.0228, "num_tokens": 110850264.0, "reward": 0.5859375, "reward_std": 0.18167605996131897, "rewards/verify_math_reward/mean": 0.5859375, "rewards/verify_math_reward/std": 0.4928344786167145, "step": 753 }, { "clip_ratio/high_max": 0.003047874677577056, "clip_ratio/high_mean": 0.0012930156135553261, "clip_ratio/low_mean": 0.0009512596461718203, "clip_ratio/low_min": 3.394279337953776e-05, "clip_ratio/region_mean": 0.0022442752597271465, "epoch": 1.7605715952172645, "grad_norm": 0.1849624365568161, "learning_rate": 1e-06, "loss": -0.0229, "step": 754 }, { "clip_ratio/high_max": 0.004170355074165855, "clip_ratio/high_mean": 0.0017300570543739013, "clip_ratio/low_mean": 0.0014139776139927562, "clip_ratio/low_min": 2.192982537962962e-05, "clip_ratio/region_mean": 0.0031440346865565516, "epoch": 1.762904636920385, "grad_norm": 0.13482032716274261, "learning_rate": 1e-06, "loss": -0.0233, "step": 755 }, { "clip_ratio/high_max": 0.005111737256811466, "clip_ratio/high_mean": 0.0020774043769051787, "clip_ratio/low_mean": 0.0019421477991272695, "clip_ratio/low_min": 4.722684025182389e-05, "clip_ratio/region_mean": 0.004019552186946385, "epoch": 1.7652376786235053, "grad_norm": 0.1130194142460823, "learning_rate": 1e-06, "loss": -0.0235, "step": 756 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0513392857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3752.0, "completions/mean_length": 831.7600708007812, "completions/mean_terminated_length": 655.1070556640625, "completions/min_length": 110.0, "completions/min_terminated_length": 110.0, "epoch": 1.767570720326626, "grad_norm": 0.26193535327911377, "learning_rate": 1e-06, "loss": -0.0141, "num_tokens": 111491889.0, "reward": 0.5725446939468384, "reward_std": 0.17682795226573944, "rewards/verify_math_reward/mean": 0.5725446343421936, "rewards/verify_math_reward/std": 0.49498558044433594, "step": 757 }, { "clip_ratio/high_max": 0.002799454246996902, "clip_ratio/high_mean": 0.0010941377404378727, "clip_ratio/low_mean": 0.000876436677572201, "clip_ratio/low_min": 0.00010664550791261718, "clip_ratio/region_mean": 0.0019705743761733174, "epoch": 1.7699037620297462, "grad_norm": 0.17558331787586212, "learning_rate": 1e-06, "loss": -0.0142, "step": 758 }, { "clip_ratio/high_max": 0.003609936946304515, "clip_ratio/high_mean": 0.001478749378293287, "clip_ratio/low_mean": 0.0012972238200745778, "clip_ratio/low_min": 0.00015196781532722525, "clip_ratio/region_mean": 0.0027759732183767483, "epoch": 1.7722368037328668, "grad_norm": 0.1327654868364334, "learning_rate": 1e-06, "loss": -0.0146, "step": 759 }, { "clip_ratio/high_max": 0.004393696624902077, "clip_ratio/high_mean": 0.0018373758939560503, "clip_ratio/low_mean": 0.0018494570686016232, "clip_ratio/low_min": 0.0001798082812456414, "clip_ratio/region_mean": 0.0036868329189019278, "epoch": 1.774569845435987, "grad_norm": 0.10718127340078354, "learning_rate": 1e-06, "loss": -0.0148, "step": 760 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.056919642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 2622.0, "completions/mean_length": 770.0145263671875, "completions/mean_terminated_length": 569.2745361328125, "completions/min_length": 144.0, "completions/min_terminated_length": 144.0, "epoch": 1.7769028871391077, "grad_norm": 0.22143039107322693, "learning_rate": 1e-06, "loss": -0.0161, "num_tokens": 112063278.0, "reward": 0.6272321939468384, "reward_std": 0.14004239439964294, "rewards/verify_math_reward/mean": 0.6272321343421936, "rewards/verify_math_reward/std": 0.4838111698627472, "step": 761 }, { "clip_ratio/high_max": 0.0021692396585422102, "clip_ratio/high_mean": 0.0008811227298792801, "clip_ratio/low_mean": 0.0007433366863551782, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0016244594153249636, "epoch": 1.779235928842228, "grad_norm": 0.1757214516401291, "learning_rate": 1e-06, "loss": -0.0162, "step": 762 }, { "clip_ratio/high_max": 0.0026406293545733206, "clip_ratio/high_mean": 0.0010768076735985233, "clip_ratio/low_mean": 0.0010994964086421533, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002176304074964719, "epoch": 1.7815689705453486, "grad_norm": 0.11132561415433884, "learning_rate": 1e-06, "loss": -0.0164, "step": 763 }, { "clip_ratio/high_max": 0.00346944757620804, "clip_ratio/high_mean": 0.0013227682393335272, "clip_ratio/low_mean": 0.0015044467327243183, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0028272150084376335, "epoch": 1.7839020122484688, "grad_norm": 0.09376361221075058, "learning_rate": 1e-06, "loss": -0.0166, "step": 764 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0435267857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3356.0, "completions/mean_length": 735.7701416015625, "completions/mean_terminated_length": 582.8541259765625, "completions/min_length": 140.0, "completions/min_terminated_length": 140.0, "epoch": 1.7862350539515894, "grad_norm": 0.3067696988582611, "learning_rate": 1e-06, "loss": -0.033, "num_tokens": 112646432.0, "reward": 0.6171875, "reward_std": 0.20072630047798157, "rewards/verify_math_reward/mean": 0.6171875, "rewards/verify_math_reward/std": 0.4863446056842804, "step": 765 }, { "clip_ratio/high_max": 0.002782420961011667, "clip_ratio/high_mean": 0.0012823366232623812, "clip_ratio/low_mean": 0.0009907475196087034, "clip_ratio/low_min": 2.3955539290909655e-05, "clip_ratio/region_mean": 0.0022730841592419893, "epoch": 1.7885680956547099, "grad_norm": 0.20320212841033936, "learning_rate": 1e-06, "loss": -0.0331, "step": 766 }, { "clip_ratio/high_max": 0.004337506521551404, "clip_ratio/high_mean": 0.0018440278618072625, "clip_ratio/low_mean": 0.0015268329698301386, "clip_ratio/low_min": 4.851229095947929e-05, "clip_ratio/region_mean": 0.003370860853465274, "epoch": 1.7909011373578303, "grad_norm": 0.13549771904945374, "learning_rate": 1e-06, "loss": -0.0336, "step": 767 }, { "clip_ratio/high_max": 0.005815367185277864, "clip_ratio/high_mean": 0.00240308532374911, "clip_ratio/low_mean": 0.002059374797681812, "clip_ratio/low_min": 8.085381705313921e-05, "clip_ratio/region_mean": 0.0044624600996030495, "epoch": 1.7932341790609507, "grad_norm": 0.11234822124242783, "learning_rate": 1e-06, "loss": -0.0338, "step": 768 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0636160714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3529.0, "completions/mean_length": 893.8203735351562, "completions/mean_terminated_length": 676.2705688476562, "completions/min_length": 134.0, "completions/min_terminated_length": 134.0, "epoch": 1.7955672207640712, "grad_norm": 0.2709302604198456, "learning_rate": 1e-06, "loss": -0.0282, "num_tokens": 113306319.0, "reward": 0.5323660969734192, "reward_std": 0.168941468000412, "rewards/verify_math_reward/mean": 0.5323660969734192, "rewards/verify_math_reward/std": 0.4992299973964691, "step": 769 }, { "clip_ratio/high_max": 0.0027927862756769173, "clip_ratio/high_mean": 0.001099945991882123, "clip_ratio/low_mean": 0.0011486962666822365, "clip_ratio/low_min": 9.515179772279225e-05, "clip_ratio/region_mean": 0.002248642274935264, "epoch": 1.7979002624671916, "grad_norm": 0.21613603830337524, "learning_rate": 1e-06, "loss": -0.0283, "step": 770 }, { "clip_ratio/high_max": 0.003521000991895562, "clip_ratio/high_mean": 0.0014837128492217744, "clip_ratio/low_mean": 0.0017250511627935339, "clip_ratio/low_min": 0.00015399080803035758, "clip_ratio/region_mean": 0.0032087640138342977, "epoch": 1.800233304170312, "grad_norm": 0.1708793044090271, "learning_rate": 1e-06, "loss": -0.0286, "step": 771 }, { "clip_ratio/high_max": 0.004785408447787631, "clip_ratio/high_mean": 0.0019640610480564646, "clip_ratio/low_mean": 0.002234876717920997, "clip_ratio/low_min": 0.00022172122407937422, "clip_ratio/region_mean": 0.0041989377059508115, "epoch": 1.8025663458734325, "grad_norm": 0.1300148367881775, "learning_rate": 1e-06, "loss": -0.0289, "step": 772 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0647321428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 2555.0, "completions/mean_length": 783.5625610351562, "completions/mean_terminated_length": 554.3007202148438, "completions/min_length": 151.0, "completions/min_terminated_length": 151.0, "epoch": 1.8048993875765529, "grad_norm": 0.27698004245758057, "learning_rate": 1e-06, "loss": -0.0339, "num_tokens": 113856279.0, "reward": 0.5970982313156128, "reward_std": 0.15883007645606995, "rewards/verify_math_reward/mean": 0.5970982313156128, "rewards/verify_math_reward/std": 0.4907552897930145, "step": 773 }, { "clip_ratio/high_max": 0.0030652988170913886, "clip_ratio/high_mean": 0.001116118703976099, "clip_ratio/low_mean": 0.0010056811152026057, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0021217998073552735, "epoch": 1.8072324292796735, "grad_norm": 0.18501514196395874, "learning_rate": 1e-06, "loss": -0.0341, "step": 774 }, { "clip_ratio/high_max": 0.004217317997245118, "clip_ratio/high_mean": 0.0015465038595721126, "clip_ratio/low_mean": 0.0014114981568127405, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0029580020054709166, "epoch": 1.8095654709827937, "grad_norm": 0.14529399573802948, "learning_rate": 1e-06, "loss": -0.0344, "step": 775 }, { "clip_ratio/high_max": 0.005173735175048932, "clip_ratio/high_mean": 0.001916931381856557, "clip_ratio/low_mean": 0.002039694278209936, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.003956625616410747, "epoch": 1.8118985126859144, "grad_norm": 0.12197954952716827, "learning_rate": 1e-06, "loss": -0.0346, "step": 776 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.033482142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3411.0, "completions/mean_length": 698.6741333007812, "completions/mean_terminated_length": 580.9838256835938, "completions/min_length": 167.0, "completions/min_terminated_length": 167.0, "epoch": 1.8142315543890346, "grad_norm": 0.3075293302536011, "learning_rate": 1e-06, "loss": -0.0093, "num_tokens": 114443795.0, "reward": 0.6026785969734192, "reward_std": 0.17126448452472687, "rewards/verify_math_reward/mean": 0.6026785969734192, "rewards/verify_math_reward/std": 0.48961684107780457, "step": 777 }, { "clip_ratio/high_max": 0.004181560427241493, "clip_ratio/high_mean": 0.0015780921348778065, "clip_ratio/low_mean": 0.0009664074750617146, "clip_ratio/low_min": 6.724719241901767e-05, "clip_ratio/region_mean": 0.0025444996208534576, "epoch": 1.8165645960921553, "grad_norm": 0.24718306958675385, "learning_rate": 1e-06, "loss": -0.0095, "step": 778 }, { "clip_ratio/high_max": 0.005663218078552745, "clip_ratio/high_mean": 0.0020682055401266553, "clip_ratio/low_mean": 0.0015344222192652524, "clip_ratio/low_min": 8.059048741415609e-05, "clip_ratio/region_mean": 0.003602627730288077, "epoch": 1.8188976377952755, "grad_norm": 0.15628652274608612, "learning_rate": 1e-06, "loss": -0.0099, "step": 779 }, { "clip_ratio/high_max": 0.0064851664792513475, "clip_ratio/high_mean": 0.0024214475488406606, "clip_ratio/low_mean": 0.0020968000462744385, "clip_ratio/low_min": 9.393378240929451e-05, "clip_ratio/region_mean": 0.004518247733358294, "epoch": 1.8212306794983961, "grad_norm": 0.12898625433444977, "learning_rate": 1e-06, "loss": -0.0101, "step": 780 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.049107142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 2121.0, "completions/mean_length": 758.2678833007812, "completions/mean_terminated_length": 585.896728515625, "completions/min_length": 143.0, "completions/min_terminated_length": 143.0, "epoch": 1.8235637212015163, "grad_norm": 0.2408958375453949, "learning_rate": 1e-06, "loss": -0.0131, "num_tokens": 115025851.0, "reward": 0.6328125, "reward_std": 0.15326592326164246, "rewards/verify_math_reward/mean": 0.6328125, "rewards/verify_math_reward/std": 0.48230743408203125, "step": 781 }, { "clip_ratio/high_max": 0.0027655972589855082, "clip_ratio/high_mean": 0.0009400304243172286, "clip_ratio/low_mean": 0.0008918654930312186, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0018318958864256274, "epoch": 1.825896762904637, "grad_norm": 0.16706512868404388, "learning_rate": 1e-06, "loss": -0.0132, "step": 782 }, { "clip_ratio/high_max": 0.003754158693482168, "clip_ratio/high_mean": 0.0013101326039759442, "clip_ratio/low_mean": 0.0013702956530323718, "clip_ratio/low_min": 2.7663211767503526e-05, "clip_ratio/region_mean": 0.0026804282824741676, "epoch": 1.8282298046077574, "grad_norm": 0.1312631070613861, "learning_rate": 1e-06, "loss": -0.0135, "step": 783 }, { "clip_ratio/high_max": 0.004378141587949358, "clip_ratio/high_mean": 0.0015455379962077131, "clip_ratio/low_mean": 0.0018427088398311753, "clip_ratio/low_min": 4.3152679609193e-05, "clip_ratio/region_mean": 0.003388246761460323, "epoch": 1.8305628463108778, "grad_norm": 0.10174086689949036, "learning_rate": 1e-06, "loss": -0.0137, "step": 784 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0580357142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3598.0, "completions/mean_length": 866.2254638671875, "completions/mean_terminated_length": 667.234619140625, "completions/min_length": 98.0, "completions/min_terminated_length": 98.0, "epoch": 1.8328958880139983, "grad_norm": 0.2835047245025635, "learning_rate": 1e-06, "loss": -0.0306, "num_tokens": 115689805.0, "reward": 0.4687500298023224, "reward_std": 0.1743106245994568, "rewards/verify_math_reward/mean": 0.46875, "rewards/verify_math_reward/std": 0.4993011951446533, "step": 785 }, { "clip_ratio/high_max": 0.0027818107992061414, "clip_ratio/high_mean": 0.001033648846714641, "clip_ratio/low_mean": 0.0011188557982677594, "clip_ratio/low_min": 0.00012293513191252714, "clip_ratio/region_mean": 0.002152504661353305, "epoch": 1.8352289297171187, "grad_norm": 0.26034629344940186, "learning_rate": 1e-06, "loss": -0.0306, "step": 786 }, { "clip_ratio/high_max": 0.003687291595269926, "clip_ratio/high_mean": 0.0013959026164229726, "clip_ratio/low_mean": 0.0015357494266936556, "clip_ratio/low_min": 0.00023547612727270462, "clip_ratio/region_mean": 0.0029316519940039143, "epoch": 1.8375619714202391, "grad_norm": 0.14828462898731232, "learning_rate": 1e-06, "loss": -0.031, "step": 787 }, { "clip_ratio/high_max": 0.004551688049104996, "clip_ratio/high_mean": 0.0016692456665623467, "clip_ratio/low_mean": 0.0021169405881664716, "clip_ratio/low_min": 0.000257010520726908, "clip_ratio/region_mean": 0.0037861862365389243, "epoch": 1.8398950131233596, "grad_norm": 0.10793136060237885, "learning_rate": 1e-06, "loss": -0.0312, "step": 788 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0502232142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3400.0, "completions/mean_length": 742.8761596679688, "completions/mean_terminated_length": 565.56640625, "completions/min_length": 130.0, "completions/min_terminated_length": 130.0, "epoch": 1.84222805482648, "grad_norm": 0.34630733728408813, "learning_rate": 1e-06, "loss": 0.0031, "num_tokens": 116266638.0, "reward": 0.5379464626312256, "reward_std": 0.186038538813591, "rewards/verify_math_reward/mean": 0.5379464030265808, "rewards/verify_math_reward/std": 0.4988364577293396, "step": 789 }, { "clip_ratio/high_max": 0.0033699140694807284, "clip_ratio/high_mean": 0.0012714220247289632, "clip_ratio/low_mean": 0.0013436512963380665, "clip_ratio/low_min": 0.00015755012373119825, "clip_ratio/region_mean": 0.0026150732810492627, "epoch": 1.8445610965296004, "grad_norm": 0.21677204966545105, "learning_rate": 1e-06, "loss": 0.0028, "step": 790 }, { "clip_ratio/high_max": 0.004822187824174762, "clip_ratio/high_mean": 0.0017499806635896675, "clip_ratio/low_mean": 0.0019913783471565694, "clip_ratio/low_min": 0.00021530800404434558, "clip_ratio/region_mean": 0.003741358974366449, "epoch": 1.8468941382327209, "grad_norm": 0.154274120926857, "learning_rate": 1e-06, "loss": 0.0024, "step": 791 }, { "clip_ratio/high_max": 0.005698316148482263, "clip_ratio/high_mean": 0.0022303610167000443, "clip_ratio/low_mean": 0.002684169332496822, "clip_ratio/low_min": 0.0002767279693216551, "clip_ratio/region_mean": 0.004914530320093036, "epoch": 1.8492271799358413, "grad_norm": 0.11727022379636765, "learning_rate": 1e-06, "loss": 0.0021, "step": 792 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0502232142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3539.0, "completions/mean_length": 737.6160888671875, "completions/mean_terminated_length": 560.0281982421875, "completions/min_length": 162.0, "completions/min_terminated_length": 162.0, "epoch": 1.851560221638962, "grad_norm": 0.2755446135997772, "learning_rate": 1e-06, "loss": -0.0272, "num_tokens": 116823790.0, "reward": 0.578125, "reward_std": 0.19505293667316437, "rewards/verify_math_reward/mean": 0.578125, "rewards/verify_math_reward/std": 0.4941346049308777, "step": 793 }, { "clip_ratio/high_max": 0.0032081985627883114, "clip_ratio/high_mean": 0.001270226544875186, "clip_ratio/low_mean": 0.0009324903003289364, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.00220271685248008, "epoch": 1.8538932633420822, "grad_norm": 0.1850334107875824, "learning_rate": 1e-06, "loss": -0.0274, "step": 794 }, { "clip_ratio/high_max": 0.004287674688384868, "clip_ratio/high_mean": 0.001729646981402766, "clip_ratio/low_mean": 0.0014480761547019938, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.003177723177941516, "epoch": 1.8562263050452028, "grad_norm": 0.1667022556066513, "learning_rate": 1e-06, "loss": -0.0277, "step": 795 }, { "clip_ratio/high_max": 0.005318969197105616, "clip_ratio/high_mean": 0.002139384174370207, "clip_ratio/low_mean": 0.0019494820553518366, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.004088866298843641, "epoch": 1.858559346748323, "grad_norm": 0.10869462043046951, "learning_rate": 1e-06, "loss": -0.0279, "step": 796 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.033482142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 2797.0, "completions/mean_length": 743.4855346679688, "completions/mean_terminated_length": 627.3475952148438, "completions/min_length": 136.0, "completions/min_terminated_length": 136.0, "epoch": 1.8608923884514437, "grad_norm": 0.23042643070220947, "learning_rate": 1e-06, "loss": 0.002, "num_tokens": 117458465.0, "reward": 0.6071428656578064, "reward_std": 0.13973930478096008, "rewards/verify_math_reward/mean": 0.6071428656578064, "rewards/verify_math_reward/std": 0.48865827918052673, "step": 797 }, { "clip_ratio/high_max": 0.002304195848410018, "clip_ratio/high_mean": 0.0008718851295270724, "clip_ratio/low_mean": 0.000823928028694354, "clip_ratio/low_min": 2.1067177840450313e-05, "clip_ratio/region_mean": 0.001695813120022649, "epoch": 1.8632254301545639, "grad_norm": 0.15301695466041565, "learning_rate": 1e-06, "loss": 0.0019, "step": 798 }, { "clip_ratio/high_max": 0.002941871432994958, "clip_ratio/high_mean": 0.00119379346142523, "clip_ratio/low_mean": 0.00121710902567429, "clip_ratio/low_min": 1.655629057495389e-05, "clip_ratio/region_mean": 0.0024109024998324458, "epoch": 1.8655584718576845, "grad_norm": 0.1183725893497467, "learning_rate": 1e-06, "loss": 0.0016, "step": 799 }, { "clip_ratio/high_max": 0.0037563690857496113, "clip_ratio/high_mean": 0.0014773591919947648, "clip_ratio/low_mean": 0.0016602653086010832, "clip_ratio/low_min": 4.590161370288115e-05, "clip_ratio/region_mean": 0.0031376245606224984, "epoch": 1.8678915135608047, "grad_norm": 0.09654898196458817, "learning_rate": 1e-06, "loss": 0.0015, "step": 800 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.041294642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3356.0, "completions/mean_length": 706.3426513671875, "completions/mean_terminated_length": 560.3387451171875, "completions/min_length": 152.0, "completions/min_terminated_length": 152.0, "epoch": 1.8702245552639254, "grad_norm": 0.2967449724674225, "learning_rate": 1e-06, "loss": -0.0175, "num_tokens": 118020412.0, "reward": 0.6116071939468384, "reward_std": 0.19265253841876984, "rewards/verify_math_reward/mean": 0.6116071343421936, "rewards/verify_math_reward/std": 0.48765692114830017, "step": 801 }, { "clip_ratio/high_max": 0.003343998519994784, "clip_ratio/high_mean": 0.0012377463608572725, "clip_ratio/low_mean": 0.0013545118126785383, "clip_ratio/low_min": 0.00010636023216648027, "clip_ratio/region_mean": 0.0025922581626218744, "epoch": 1.8725575969670458, "grad_norm": 0.20814812183380127, "learning_rate": 1e-06, "loss": -0.0176, "step": 802 }, { "clip_ratio/high_max": 0.004715204027888831, "clip_ratio/high_mean": 0.0018345782336837146, "clip_ratio/low_mean": 0.0020733372075483203, "clip_ratio/low_min": 0.00022938732217880897, "clip_ratio/region_mean": 0.0039079153939383104, "epoch": 1.8748906386701663, "grad_norm": 0.15765273571014404, "learning_rate": 1e-06, "loss": -0.0181, "step": 803 }, { "clip_ratio/high_max": 0.00592966526892269, "clip_ratio/high_mean": 0.0022788866845075972, "clip_ratio/low_mean": 0.0027857510212925263, "clip_ratio/low_min": 0.00029554122738773003, "clip_ratio/region_mean": 0.005064637691248208, "epoch": 1.8772236803732867, "grad_norm": 0.13226355612277985, "learning_rate": 1e-06, "loss": -0.0184, "step": 804 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.044642857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3746.0, "completions/mean_length": 797.2567138671875, "completions/mean_terminated_length": 643.1098022460938, "completions/min_length": 146.0, "completions/min_terminated_length": 146.0, "epoch": 1.8795567220764071, "grad_norm": 0.21105365455150604, "learning_rate": 1e-06, "loss": -0.0268, "num_tokens": 118650994.0, "reward": 0.5725446939468384, "reward_std": 0.13755826652050018, "rewards/verify_math_reward/mean": 0.5725446343421936, "rewards/verify_math_reward/std": 0.49498558044433594, "step": 805 }, { "clip_ratio/high_max": 0.0020558973192237318, "clip_ratio/high_mean": 0.0007561522443211288, "clip_ratio/low_mean": 0.0007237213285407051, "clip_ratio/low_min": 3.0438312023761682e-05, "clip_ratio/region_mean": 0.0014798735282965936, "epoch": 1.8818897637795275, "grad_norm": 0.16472026705741882, "learning_rate": 1e-06, "loss": -0.0269, "step": 806 }, { "clip_ratio/high_max": 0.002754494289547438, "clip_ratio/high_mean": 0.0009647542829043232, "clip_ratio/low_mean": 0.0010165352887270274, "clip_ratio/low_min": 4.0025617636274546e-05, "clip_ratio/region_mean": 0.0019812895443465095, "epoch": 1.884222805482648, "grad_norm": 0.11768662929534912, "learning_rate": 1e-06, "loss": -0.0271, "step": 807 }, { "clip_ratio/high_max": 0.0035028094207518734, "clip_ratio/high_mean": 0.001266509183551534, "clip_ratio/low_mean": 0.001315934181548073, "clip_ratio/low_min": 5.0730519433273e-05, "clip_ratio/region_mean": 0.002582443383289501, "epoch": 1.8865558471857684, "grad_norm": 0.09358278661966324, "learning_rate": 1e-06, "loss": -0.0273, "step": 808 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0457589285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3619.0, "completions/mean_length": 752.5145263671875, "completions/mean_terminated_length": 592.18359375, "completions/min_length": 146.0, "completions/min_terminated_length": 146.0, "epoch": 1.8888888888888888, "grad_norm": 0.30046984553337097, "learning_rate": 1e-06, "loss": -0.0228, "num_tokens": 119249103.0, "reward": 0.5691964626312256, "reward_std": 0.19828173518180847, "rewards/verify_math_reward/mean": 0.5691964030265808, "rewards/verify_math_reward/std": 0.4954652488231659, "step": 809 }, { "clip_ratio/high_max": 0.00414633886248339, "clip_ratio/high_mean": 0.0015466453041881323, "clip_ratio/low_mean": 0.0011576873002923094, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002704332538996823, "epoch": 1.8912219305920095, "grad_norm": 0.21607856452465057, "learning_rate": 1e-06, "loss": -0.0229, "step": 810 }, { "clip_ratio/high_max": 0.005465991285745986, "clip_ratio/high_mean": 0.0020686952520918567, "clip_ratio/low_mean": 0.001820426095946459, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.00388912130438257, "epoch": 1.8935549722951297, "grad_norm": 0.16273640096187592, "learning_rate": 1e-06, "loss": -0.0233, "step": 811 }, { "clip_ratio/high_max": 0.006136221811175346, "clip_ratio/high_mean": 0.0025098554688156582, "clip_ratio/low_mean": 0.0024466172690154053, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.00495647283969447, "epoch": 1.8958880139982504, "grad_norm": 0.13049514591693878, "learning_rate": 1e-06, "loss": -0.0236, "step": 812 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0401785714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3067.0, "completions/mean_length": 756.4074096679688, "completions/mean_terminated_length": 616.6104736328125, "completions/min_length": 132.0, "completions/min_terminated_length": 132.0, "epoch": 1.8982210557013706, "grad_norm": 0.27620360255241394, "learning_rate": 1e-06, "loss": -0.0121, "num_tokens": 119876340.0, "reward": 0.6037946939468384, "reward_std": 0.16258707642555237, "rewards/verify_math_reward/mean": 0.6037946343421936, "rewards/verify_math_reward/std": 0.48938122391700745, "step": 813 }, { "clip_ratio/high_max": 0.0028073974790459033, "clip_ratio/high_mean": 0.001081869464542251, "clip_ratio/low_mean": 0.0011113657437817892, "clip_ratio/low_min": 4.5494121877709404e-05, "clip_ratio/region_mean": 0.002193235166487284, "epoch": 1.9005540974044912, "grad_norm": 0.1851472556591034, "learning_rate": 1e-06, "loss": -0.0123, "step": 814 }, { "clip_ratio/high_max": 0.0036648960958700627, "clip_ratio/high_mean": 0.0013715657114516944, "clip_ratio/low_mean": 0.001557411644171225, "clip_ratio/low_min": 5.7185106925317086e-05, "clip_ratio/region_mean": 0.0029289773665368557, "epoch": 1.9028871391076114, "grad_norm": 0.14763949811458588, "learning_rate": 1e-06, "loss": -0.0126, "step": 815 }, { "clip_ratio/high_max": 0.004410455207107589, "clip_ratio/high_mean": 0.001652118437050376, "clip_ratio/low_mean": 0.00222875757390284, "clip_ratio/low_min": 9.073063301912043e-05, "clip_ratio/region_mean": 0.0038808759272797033, "epoch": 1.905220180810732, "grad_norm": 0.11592372506856918, "learning_rate": 1e-06, "loss": -0.0129, "step": 816 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0580357142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3504.0, "completions/mean_length": 813.1998291015625, "completions/mean_terminated_length": 610.9419555664062, "completions/min_length": 128.0, "completions/min_terminated_length": 128.0, "epoch": 1.9075532225138523, "grad_norm": 0.26833945512771606, "learning_rate": 1e-06, "loss": -0.0117, "num_tokens": 120477791.0, "reward": 0.520089328289032, "reward_std": 0.17585043609142303, "rewards/verify_math_reward/mean": 0.5200892686843872, "rewards/verify_math_reward/std": 0.4998753070831299, "step": 817 }, { "clip_ratio/high_max": 0.0029519485178752802, "clip_ratio/high_mean": 0.001182112080641673, "clip_ratio/low_mean": 0.0010820788447745144, "clip_ratio/low_min": 7.51366897020489e-05, "clip_ratio/region_mean": 0.002264190941787092, "epoch": 1.909886264216973, "grad_norm": 0.20074081420898438, "learning_rate": 1e-06, "loss": -0.0118, "step": 818 }, { "clip_ratio/high_max": 0.003937263565603644, "clip_ratio/high_mean": 0.0015665422051824862, "clip_ratio/low_mean": 0.0015585158944304567, "clip_ratio/low_min": 0.0001918823254527524, "clip_ratio/region_mean": 0.0031250580577761866, "epoch": 1.9122193059200934, "grad_norm": 0.1334230750799179, "learning_rate": 1e-06, "loss": -0.0121, "step": 819 }, { "clip_ratio/high_max": 0.004838202992687002, "clip_ratio/high_mean": 0.0018968461008626036, "clip_ratio/low_mean": 0.0021204983695497504, "clip_ratio/low_min": 0.0002480997827660758, "clip_ratio/region_mean": 0.004017344523163047, "epoch": 1.9145523476232138, "grad_norm": 0.1114879623055458, "learning_rate": 1e-06, "loss": -0.0123, "step": 820 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0267857142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 2560.0, "completions/mean_length": 751.271240234375, "completions/mean_terminated_length": 659.2144165039062, "completions/min_length": 160.0, "completions/min_terminated_length": 160.0, "epoch": 1.9168853893263342, "grad_norm": 0.265716016292572, "learning_rate": 1e-06, "loss": 0.0, "num_tokens": 121132250.0, "reward": 0.5491071939468384, "reward_std": 0.1677054762840271, "rewards/verify_math_reward/mean": 0.5491071343421936, "rewards/verify_math_reward/std": 0.49786055088043213, "step": 821 }, { "clip_ratio/high_max": 0.0029034060426056385, "clip_ratio/high_mean": 0.0011174121864314657, "clip_ratio/low_mean": 0.0009803238972381223, "clip_ratio/low_min": 4.627344605978578e-05, "clip_ratio/region_mean": 0.002097736105497461, "epoch": 1.9192184310294547, "grad_norm": 0.18992997705936432, "learning_rate": 1e-06, "loss": -0.0, "step": 822 }, { "clip_ratio/high_max": 0.0036351673988974653, "clip_ratio/high_mean": 0.0014360028762894217, "clip_ratio/low_mean": 0.0014615544569096528, "clip_ratio/low_min": 0.00013460544141707942, "clip_ratio/region_mean": 0.0028975573732168414, "epoch": 1.921551472732575, "grad_norm": 0.12745270133018494, "learning_rate": 1e-06, "loss": -0.0004, "step": 823 }, { "clip_ratio/high_max": 0.0044301141242613085, "clip_ratio/high_mean": 0.001729643699945882, "clip_ratio/low_mean": 0.0019472953608783428, "clip_ratio/low_min": 0.00017395226313965395, "clip_ratio/region_mean": 0.0036769389844266698, "epoch": 1.9238845144356955, "grad_norm": 0.1055830642580986, "learning_rate": 1e-06, "loss": -0.0006, "step": 824 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.056919642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 4019.0, "completions/mean_length": 801.0089721679688, "completions/mean_terminated_length": 602.1396484375, "completions/min_length": 138.0, "completions/min_terminated_length": 138.0, "epoch": 1.926217556138816, "grad_norm": 0.2585517466068268, "learning_rate": 1e-06, "loss": -0.0287, "num_tokens": 121734506.0, "reward": 0.5703125, "reward_std": 0.15695396065711975, "rewards/verify_math_reward/mean": 0.5703125, "rewards/verify_math_reward/std": 0.49530795216560364, "step": 825 }, { "clip_ratio/high_max": 0.0021760613017249852, "clip_ratio/high_mean": 0.0008149208151735365, "clip_ratio/low_mean": 0.0009635127335059224, "clip_ratio/low_min": 5.545138719753595e-05, "clip_ratio/region_mean": 0.0017784335868782364, "epoch": 1.9285505978419364, "grad_norm": 0.16936899721622467, "learning_rate": 1e-06, "loss": -0.0288, "step": 826 }, { "clip_ratio/high_max": 0.0031448003865079954, "clip_ratio/high_mean": 0.0010849007121578325, "clip_ratio/low_mean": 0.0013563830216298811, "clip_ratio/low_min": 2.5145845938823186e-05, "clip_ratio/region_mean": 0.002441283802909311, "epoch": 1.9308836395450568, "grad_norm": 0.12165207415819168, "learning_rate": 1e-06, "loss": -0.0291, "step": 827 }, { "clip_ratio/high_max": 0.0035627341640065424, "clip_ratio/high_mean": 0.0013078061601845548, "clip_ratio/low_mean": 0.0019288091243652161, "clip_ratio/low_min": 5.6860979384509847e-05, "clip_ratio/region_mean": 0.003236615302739665, "epoch": 1.9332166812481772, "grad_norm": 0.09695543348789215, "learning_rate": 1e-06, "loss": -0.0293, "step": 828 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0513392857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3215.0, "completions/mean_length": 818.5156860351562, "completions/mean_terminated_length": 641.1458740234375, "completions/min_length": 121.0, "completions/min_terminated_length": 121.0, "epoch": 1.935549722951298, "grad_norm": 0.28884321451187134, "learning_rate": 1e-06, "loss": -0.0259, "num_tokens": 122370472.0, "reward": 0.53125, "reward_std": 0.2205638438463211, "rewards/verify_math_reward/mean": 0.53125, "rewards/verify_math_reward/std": 0.4993011951446533, "step": 829 }, { "clip_ratio/high_max": 0.003323182543681469, "clip_ratio/high_mean": 0.001469734954298474, "clip_ratio/low_mean": 0.001177801052108407, "clip_ratio/low_min": 6.61262747598812e-05, "clip_ratio/region_mean": 0.0026475359336473048, "epoch": 1.937882764654418, "grad_norm": 0.19217514991760254, "learning_rate": 1e-06, "loss": -0.026, "step": 830 }, { "clip_ratio/high_max": 0.004301931752706878, "clip_ratio/high_mean": 0.0018692515368456952, "clip_ratio/low_mean": 0.0016967855772236362, "clip_ratio/low_min": 0.00014876827845000662, "clip_ratio/region_mean": 0.003566037106793374, "epoch": 1.9402158063575388, "grad_norm": 0.14799891412258148, "learning_rate": 1e-06, "loss": -0.0264, "step": 831 }, { "clip_ratio/high_max": 0.00509976020839531, "clip_ratio/high_mean": 0.0022558999626198784, "clip_ratio/low_mean": 0.002382559687248431, "clip_ratio/low_min": 0.0002279343889313168, "clip_ratio/region_mean": 0.004638459635316394, "epoch": 1.942548848060659, "grad_norm": 0.12829618155956268, "learning_rate": 1e-06, "loss": -0.0266, "step": 832 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.03125, "completions/max_length": 4096.0, "completions/max_terminated_length": 3632.0, "completions/mean_length": 652.3326416015625, "completions/mean_terminated_length": 541.2465209960938, "completions/min_length": 157.0, "completions/min_terminated_length": 157.0, "epoch": 1.9448818897637796, "grad_norm": 0.273409903049469, "learning_rate": 1e-06, "loss": -0.0188, "num_tokens": 122925498.0, "reward": 0.6908482313156128, "reward_std": 0.16101153194904327, "rewards/verify_math_reward/mean": 0.6908482313156128, "rewards/verify_math_reward/std": 0.46240198612213135, "step": 833 }, { "clip_ratio/high_max": 0.002868306743039284, "clip_ratio/high_mean": 0.00117907220737834, "clip_ratio/low_mean": 0.0007301081259356579, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0019091803187620826, "epoch": 1.9472149314668998, "grad_norm": 0.17366155982017517, "learning_rate": 1e-06, "loss": -0.0189, "step": 834 }, { "clip_ratio/high_max": 0.0035869561397703364, "clip_ratio/high_mean": 0.0014796399882470723, "clip_ratio/low_mean": 0.0011686617544910405, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0026483017645659856, "epoch": 1.9495479731700205, "grad_norm": 0.1268068253993988, "learning_rate": 1e-06, "loss": -0.0192, "step": 835 }, { "clip_ratio/high_max": 0.0045702830830123276, "clip_ratio/high_mean": 0.0018947995558846742, "clip_ratio/low_mean": 0.0015959115371515509, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0034907111403299496, "epoch": 1.9518810148731407, "grad_norm": 0.10451728105545044, "learning_rate": 1e-06, "loss": -0.0194, "step": 836 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0691964285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3886.0, "completions/mean_length": 827.5457763671875, "completions/mean_terminated_length": 584.567138671875, "completions/min_length": 127.0, "completions/min_terminated_length": 127.0, "epoch": 1.9542140565762613, "grad_norm": 0.32571104168891907, "learning_rate": 1e-06, "loss": -0.0171, "num_tokens": 123495651.0, "reward": 0.660714328289032, "reward_std": 0.16642414033412933, "rewards/verify_math_reward/mean": 0.6607142686843872, "rewards/verify_math_reward/std": 0.4737313687801361, "step": 837 }, { "clip_ratio/high_max": 0.0031645296621718444, "clip_ratio/high_mean": 0.0012686815207416657, "clip_ratio/low_mean": 0.0011152174629387446, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0023838989800424315, "epoch": 1.9565470982793818, "grad_norm": 0.19464130699634552, "learning_rate": 1e-06, "loss": -0.0173, "step": 838 }, { "clip_ratio/high_max": 0.004461124124645721, "clip_ratio/high_mean": 0.0017314631622866727, "clip_ratio/low_mean": 0.0016994833240460139, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.003430946533626411, "epoch": 1.9588801399825022, "grad_norm": 0.15106505155563354, "learning_rate": 1e-06, "loss": -0.0176, "step": 839 }, { "clip_ratio/high_max": 0.005133534876222257, "clip_ratio/high_mean": 0.002148425133782439, "clip_ratio/low_mean": 0.002245298557681963, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.004393723662360571, "epoch": 1.9612131816856226, "grad_norm": 0.11754657328128815, "learning_rate": 1e-06, "loss": -0.0178, "step": 840 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0479910714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3388.0, "completions/mean_length": 780.2980346679688, "completions/mean_terminated_length": 613.1524047851562, "completions/min_length": 173.0, "completions/min_terminated_length": 173.0, "epoch": 1.963546223388743, "grad_norm": 0.23515422642230988, "learning_rate": 1e-06, "loss": -0.022, "num_tokens": 124108590.0, "reward": 0.6272321939468384, "reward_std": 0.16468749940395355, "rewards/verify_math_reward/mean": 0.6272321343421936, "rewards/verify_math_reward/std": 0.4838111698627472, "step": 841 }, { "clip_ratio/high_max": 0.002631799303344451, "clip_ratio/high_mean": 0.001067387991497526, "clip_ratio/low_mean": 0.0007637361450179014, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0018311241146875545, "epoch": 1.9658792650918635, "grad_norm": 0.17903964221477509, "learning_rate": 1e-06, "loss": -0.022, "step": 842 }, { "clip_ratio/high_max": 0.0033845004509203136, "clip_ratio/high_mean": 0.0013698315797228133, "clip_ratio/low_mean": 0.001177584979814128, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002547416544985026, "epoch": 1.968212306794984, "grad_norm": 0.12591318786144257, "learning_rate": 1e-06, "loss": -0.0223, "step": 843 }, { "clip_ratio/high_max": 0.00416078204580117, "clip_ratio/high_mean": 0.0016859679089975543, "clip_ratio/low_mean": 0.0015593304051435553, "clip_ratio/low_min": 1.0144457519345451e-05, "clip_ratio/region_mean": 0.003245298285037279, "epoch": 1.9705453484981044, "grad_norm": 0.10272466391324997, "learning_rate": 1e-06, "loss": -0.0225, "step": 844 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0691964285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3862.0, "completions/mean_length": 798.9721069335938, "completions/mean_terminated_length": 553.8693237304688, "completions/min_length": 148.0, "completions/min_terminated_length": 148.0, "epoch": 1.9728783902012248, "grad_norm": 0.2473994344472885, "learning_rate": 1e-06, "loss": -0.0363, "num_tokens": 124655053.0, "reward": 0.6160714626312256, "reward_std": 0.15544581413269043, "rewards/verify_math_reward/mean": 0.6160714030265808, "rewards/verify_math_reward/std": 0.486612468957901, "step": 845 }, { "clip_ratio/high_max": 0.0025367951930093113, "clip_ratio/high_mean": 0.0010239180810458492, "clip_ratio/low_mean": 0.0008026299910852686, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0018265480794070754, "epoch": 1.9752114319043454, "grad_norm": 0.19398920238018036, "learning_rate": 1e-06, "loss": -0.0364, "step": 846 }, { "clip_ratio/high_max": 0.003460346153588034, "clip_ratio/high_mean": 0.0013492602229234762, "clip_ratio/low_mean": 0.0011571871873456985, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.00250644742482109, "epoch": 1.9775444736074657, "grad_norm": 0.12446160614490509, "learning_rate": 1e-06, "loss": -0.0367, "step": 847 }, { "clip_ratio/high_max": 0.004393142669869121, "clip_ratio/high_mean": 0.0018039105307252612, "clip_ratio/low_mean": 0.001614636512385914, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0034185470358352177, "epoch": 1.9798775153105863, "grad_norm": 0.10707778483629227, "learning_rate": 1e-06, "loss": -0.0369, "step": 848 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0591517857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 2026.0, "completions/mean_length": 754.9609985351562, "completions/mean_terminated_length": 544.907470703125, "completions/min_length": 156.0, "completions/min_terminated_length": 156.0, "epoch": 1.9822105570137065, "grad_norm": 0.2629238963127136, "learning_rate": 1e-06, "loss": -0.0169, "num_tokens": 125207162.0, "reward": 0.5959821939468384, "reward_std": 0.14356933534145355, "rewards/verify_math_reward/mean": 0.5959821343421936, "rewards/verify_math_reward/std": 0.490975022315979, "step": 849 }, { "clip_ratio/high_max": 0.002639403239300009, "clip_ratio/high_mean": 0.001012396363876178, "clip_ratio/low_mean": 0.0009428266130271368, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0019552229932742193, "epoch": 1.9845435987168272, "grad_norm": 0.1881600320339203, "learning_rate": 1e-06, "loss": -0.017, "step": 850 }, { "clip_ratio/high_max": 0.0034354824747424573, "clip_ratio/high_mean": 0.0012989621027372777, "clip_ratio/low_mean": 0.0013958134695712943, "clip_ratio/low_min": 1.7317815945716575e-05, "clip_ratio/region_mean": 0.002694775590498466, "epoch": 1.9868766404199474, "grad_norm": 0.13478711247444153, "learning_rate": 1e-06, "loss": -0.0173, "step": 851 }, { "clip_ratio/high_max": 0.004115693511266727, "clip_ratio/high_mean": 0.0015259725023497595, "clip_ratio/low_mean": 0.001940916721650865, "clip_ratio/low_min": 6.92712637828663e-05, "clip_ratio/region_mean": 0.003466889203991741, "epoch": 1.989209682123068, "grad_norm": 0.11701318621635437, "learning_rate": 1e-06, "loss": -0.0175, "step": 852 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0535714285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3953.0, "completions/mean_length": 782.4017944335938, "completions/mean_terminated_length": 594.8396606445312, "completions/min_length": 157.0, "completions/min_terminated_length": 157.0, "epoch": 1.9915427238261882, "grad_norm": 0.29080289602279663, "learning_rate": 1e-06, "loss": -0.0084, "num_tokens": 125795658.0, "reward": 0.629464328289032, "reward_std": 0.16160815954208374, "rewards/verify_math_reward/mean": 0.6294642686843872, "rewards/verify_math_reward/std": 0.4832179844379425, "step": 853 }, { "clip_ratio/high_max": 0.003062258881982416, "clip_ratio/high_mean": 0.0012164052168373019, "clip_ratio/low_mean": 0.0009718203127704328, "clip_ratio/low_min": 4.3123158320668153e-05, "clip_ratio/region_mean": 0.002188225509598851, "epoch": 1.993875765529309, "grad_norm": 0.19691500067710876, "learning_rate": 1e-06, "loss": -0.0085, "step": 854 }, { "clip_ratio/high_max": 0.003847715095616877, "clip_ratio/high_mean": 0.001553784080897458, "clip_ratio/low_mean": 0.0015201193273242097, "clip_ratio/low_min": 0.0001092471466108691, "clip_ratio/region_mean": 0.0030739033900317736, "epoch": 1.9962088072324293, "grad_norm": 0.15383592247962952, "learning_rate": 1e-06, "loss": -0.0089, "step": 855 }, { "clip_ratio/high_max": 0.004732859262730926, "clip_ratio/high_mean": 0.001860969507106347, "clip_ratio/low_mean": 0.0021460957141243853, "clip_ratio/low_min": 0.0001534343282401096, "clip_ratio/region_mean": 0.004007065144833177, "epoch": 1.9985418489355498, "grad_norm": 0.1199055165052414, "learning_rate": 1e-06, "loss": -0.0091, "step": 856 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.056919642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3028.0, "completions/mean_length": 772.9185791015625, "completions/mean_terminated_length": 572.3538208007812, "completions/min_length": 124.0, "completions/min_terminated_length": 124.0, "epoch": 2.0023330417031207, "grad_norm": 0.28605201840400696, "learning_rate": 1e-06, "loss": -0.0138, "num_tokens": 126367785.0, "reward": 0.59375, "reward_std": 0.16709814965724945, "rewards/verify_math_reward/mean": 0.59375, "rewards/verify_math_reward/std": 0.4914066195487976, "step": 857 }, { "clip_ratio/high_max": 0.0031575198663631454, "clip_ratio/high_mean": 0.001189718608657131, "clip_ratio/low_mean": 0.000987628862276324, "clip_ratio/low_min": 2.8561235922097694e-05, "clip_ratio/region_mean": 0.0021773475091322325, "epoch": 2.004666083406241, "grad_norm": 0.2252815067768097, "learning_rate": 1e-06, "loss": -0.0139, "step": 858 }, { "clip_ratio/high_max": 0.004039512175950222, "clip_ratio/high_mean": 0.0015659927521483041, "clip_ratio/low_mean": 0.001601550215127645, "clip_ratio/low_min": 1.8541312783781905e-05, "clip_ratio/region_mean": 0.003167543007293716, "epoch": 2.0069991251093615, "grad_norm": 0.14869128167629242, "learning_rate": 1e-06, "loss": -0.0142, "step": 859 }, { "clip_ratio/high_max": 0.004896121463389136, "clip_ratio/high_mean": 0.0018801896148943342, "clip_ratio/low_mean": 0.0020864028883806895, "clip_ratio/low_min": 5.562393562286161e-05, "clip_ratio/region_mean": 0.003966592441429384, "epoch": 2.0093321668124817, "grad_norm": 0.11910010874271393, "learning_rate": 1e-06, "loss": -0.0144, "step": 860 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0580357142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 2505.0, "completions/mean_length": 787.3359985351562, "completions/mean_terminated_length": 583.484619140625, "completions/min_length": 123.0, "completions/min_terminated_length": 123.0, "epoch": 2.0116652085156024, "grad_norm": 0.26455026865005493, "learning_rate": 1e-06, "loss": -0.0255, "num_tokens": 126949926.0, "reward": 0.5892857313156128, "reward_std": 0.14173558354377747, "rewards/verify_math_reward/mean": 0.5892857313156128, "rewards/verify_math_reward/std": 0.49223825335502625, "step": 861 }, { "clip_ratio/high_max": 0.003060670438571833, "clip_ratio/high_mean": 0.0010455899428052362, "clip_ratio/low_mean": 0.0008936263429859537, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0019392163303564303, "epoch": 2.0139982502187226, "grad_norm": 0.18861906230449677, "learning_rate": 1e-06, "loss": -0.0255, "step": 862 }, { "clip_ratio/high_max": 0.004127931737457402, "clip_ratio/high_mean": 0.001437995131709613, "clip_ratio/low_mean": 0.0014083066289458657, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002846301707904786, "epoch": 2.0163312919218432, "grad_norm": 0.13935938477516174, "learning_rate": 1e-06, "loss": -0.0258, "step": 863 }, { "clip_ratio/high_max": 0.004721345583675429, "clip_ratio/high_mean": 0.0016240370605373755, "clip_ratio/low_mean": 0.0019194504347979091, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0035434875317150727, "epoch": 2.0186643336249634, "grad_norm": 0.10832101851701736, "learning_rate": 1e-06, "loss": -0.0261, "step": 864 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0792410714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3591.0, "completions/mean_length": 835.9710083007812, "completions/mean_terminated_length": 555.410888671875, "completions/min_length": 134.0, "completions/min_terminated_length": 134.0, "epoch": 2.020997375328084, "grad_norm": 0.29960817098617554, "learning_rate": 1e-06, "loss": -0.0146, "num_tokens": 127501836.0, "reward": 0.5535714626312256, "reward_std": 0.17217236757278442, "rewards/verify_math_reward/mean": 0.5535714030265808, "rewards/verify_math_reward/std": 0.4973994791507721, "step": 865 }, { "clip_ratio/high_max": 0.0032486176060047, "clip_ratio/high_mean": 0.0013558716782426927, "clip_ratio/low_mean": 0.001163538660875929, "clip_ratio/low_min": 5.150025026523508e-05, "clip_ratio/region_mean": 0.0025194103727699257, "epoch": 2.0233304170312043, "grad_norm": 0.24563723802566528, "learning_rate": 1e-06, "loss": -0.0147, "step": 866 }, { "clip_ratio/high_max": 0.004222351242788136, "clip_ratio/high_mean": 0.0017093753740482498, "clip_ratio/low_mean": 0.0016678377414791612, "clip_ratio/low_min": 0.00011995467502856627, "clip_ratio/region_mean": 0.0033772130918805487, "epoch": 2.025663458734325, "grad_norm": 0.1549946665763855, "learning_rate": 1e-06, "loss": -0.0151, "step": 867 }, { "clip_ratio/high_max": 0.004918584862025455, "clip_ratio/high_mean": 0.0020137444880674593, "clip_ratio/low_mean": 0.00231331121904077, "clip_ratio/low_min": 0.0001807530497899279, "clip_ratio/region_mean": 0.004327055634348653, "epoch": 2.027996500437445, "grad_norm": 0.12319207191467285, "learning_rate": 1e-06, "loss": -0.0153, "step": 868 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0580357142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 2898.0, "completions/mean_length": 832.9285888671875, "completions/mean_terminated_length": 631.8862915039062, "completions/min_length": 85.0, "completions/min_terminated_length": 85.0, "epoch": 2.030329542140566, "grad_norm": 0.3084443211555481, "learning_rate": 1e-06, "loss": -0.0173, "num_tokens": 128126460.0, "reward": 0.5658482313156128, "reward_std": 0.17626270651817322, "rewards/verify_math_reward/mean": 0.5658482313156128, "rewards/verify_math_reward/std": 0.49592188000679016, "step": 869 }, { "clip_ratio/high_max": 0.002786785931675695, "clip_ratio/high_mean": 0.0010556178676779382, "clip_ratio/low_mean": 0.0011767580908781383, "clip_ratio/low_min": 6.32032079010969e-05, "clip_ratio/region_mean": 0.0022323759549180977, "epoch": 2.032662583843686, "grad_norm": 0.251633882522583, "learning_rate": 1e-06, "loss": -0.0173, "step": 870 }, { "clip_ratio/high_max": 0.004016864491859451, "clip_ratio/high_mean": 0.0014131901480141096, "clip_ratio/low_mean": 0.0018161660482292064, "clip_ratio/low_min": 0.000126321826428466, "clip_ratio/region_mean": 0.0032293561816914007, "epoch": 2.0349956255468067, "grad_norm": 0.1456253081560135, "learning_rate": 1e-06, "loss": -0.0177, "step": 871 }, { "clip_ratio/high_max": 0.005042114542447962, "clip_ratio/high_mean": 0.0018487909546820447, "clip_ratio/low_mean": 0.0023733653106319252, "clip_ratio/low_min": 0.000206353818612115, "clip_ratio/region_mean": 0.004222156319883652, "epoch": 2.037328667249927, "grad_norm": 0.12213992327451706, "learning_rate": 1e-06, "loss": -0.018, "step": 872 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0714285714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3379.0, "completions/mean_length": 768.849365234375, "completions/mean_terminated_length": 512.9146728515625, "completions/min_length": 150.0, "completions/min_terminated_length": 150.0, "epoch": 2.0396617089530475, "grad_norm": 0.3045518398284912, "learning_rate": 1e-06, "loss": -0.0142, "num_tokens": 128640549.0, "reward": 0.6741071939468384, "reward_std": 0.1456843614578247, "rewards/verify_math_reward/mean": 0.6741071343421936, "rewards/verify_math_reward/std": 0.4689692556858063, "step": 873 }, { "clip_ratio/high_max": 0.003249675879487768, "clip_ratio/high_mean": 0.0013188083576096687, "clip_ratio/low_mean": 0.0009610468750906875, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002279855245433282, "epoch": 2.041994750656168, "grad_norm": 0.1955275684595108, "learning_rate": 1e-06, "loss": -0.0144, "step": 874 }, { "clip_ratio/high_max": 0.004269578625098802, "clip_ratio/high_mean": 0.0017727342928992584, "clip_ratio/low_mean": 0.0015168888348853216, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.003289623185992241, "epoch": 2.0443277923592884, "grad_norm": 0.14373070001602173, "learning_rate": 1e-06, "loss": -0.0148, "step": 875 }, { "clip_ratio/high_max": 0.005338603354175575, "clip_ratio/high_mean": 0.00219085920980433, "clip_ratio/low_mean": 0.0021228319601505063, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.004313691024435684, "epoch": 2.046660834062409, "grad_norm": 0.11054135113954544, "learning_rate": 1e-06, "loss": -0.015, "step": 876 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0747767857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3891.0, "completions/mean_length": 878.0881958007812, "completions/mean_terminated_length": 618.0156860351562, "completions/min_length": 133.0, "completions/min_terminated_length": 133.0, "epoch": 2.0489938757655293, "grad_norm": 0.28686681389808655, "learning_rate": 1e-06, "loss": -0.0324, "num_tokens": 129247420.0, "reward": 0.5948660969734192, "reward_std": 0.18956109881401062, "rewards/verify_math_reward/mean": 0.5948660969734192, "rewards/verify_math_reward/std": 0.49119213223457336, "step": 877 }, { "clip_ratio/high_max": 0.0028610713270609267, "clip_ratio/high_mean": 0.0011752140962926205, "clip_ratio/low_mean": 0.0011545994893822353, "clip_ratio/low_min": 1.451463049306767e-05, "clip_ratio/region_mean": 0.0023298136075027287, "epoch": 2.05132691746865, "grad_norm": 0.2065815031528473, "learning_rate": 1e-06, "loss": -0.0325, "step": 878 }, { "clip_ratio/high_max": 0.003802502091275528, "clip_ratio/high_mean": 0.0015552722579741385, "clip_ratio/low_mean": 0.001864749297965318, "clip_ratio/low_min": 4.3543892388697714e-05, "clip_ratio/region_mean": 0.0034200215013697743, "epoch": 2.05365995917177, "grad_norm": 0.15660862624645233, "learning_rate": 1e-06, "loss": -0.033, "step": 879 }, { "clip_ratio/high_max": 0.004925638000713661, "clip_ratio/high_mean": 0.0019519068155204877, "clip_ratio/low_mean": 0.0024079400609480217, "clip_ratio/low_min": 4.3543892388697714e-05, "clip_ratio/region_mean": 0.00435984693467617, "epoch": 2.055993000874891, "grad_norm": 0.1228170096874237, "learning_rate": 1e-06, "loss": -0.0332, "step": 880 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.052455357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3344.0, "completions/mean_length": 762.5569458007812, "completions/mean_terminated_length": 578.02001953125, "completions/min_length": 124.0, "completions/min_terminated_length": 124.0, "epoch": 2.058326042578011, "grad_norm": 0.2670280337333679, "learning_rate": 1e-06, "loss": -0.0139, "num_tokens": 129826071.0, "reward": 0.6361607313156128, "reward_std": 0.12907657027244568, "rewards/verify_math_reward/mean": 0.6361607313156128, "rewards/verify_math_reward/std": 0.4813718795776367, "step": 881 }, { "clip_ratio/high_max": 0.0030603227205574512, "clip_ratio/high_mean": 0.0009596358122507809, "clip_ratio/low_mean": 0.0007273225855897181, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001686958406935446, "epoch": 2.0606590842811316, "grad_norm": 0.17066572606563568, "learning_rate": 1e-06, "loss": -0.014, "step": 882 }, { "clip_ratio/high_max": 0.004225489275995642, "clip_ratio/high_mean": 0.0013066957144474145, "clip_ratio/low_mean": 0.0011311813832435291, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0024378771413466893, "epoch": 2.062992125984252, "grad_norm": 0.14567777514457703, "learning_rate": 1e-06, "loss": -0.0142, "step": 883 }, { "clip_ratio/high_max": 0.00489855252817506, "clip_ratio/high_mean": 0.0015477416964131407, "clip_ratio/low_mean": 0.0014860482224321458, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0030337899224832654, "epoch": 2.0653251676873725, "grad_norm": 0.10635584592819214, "learning_rate": 1e-06, "loss": -0.0144, "step": 884 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0345982142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 2864.0, "completions/mean_length": 728.4855346679688, "completions/mean_terminated_length": 607.7999877929688, "completions/min_length": 163.0, "completions/min_terminated_length": 163.0, "epoch": 2.0676582093904927, "grad_norm": 0.30889102816581726, "learning_rate": 1e-06, "loss": -0.006, "num_tokens": 130443466.0, "reward": 0.5915178656578064, "reward_std": 0.19032247364521027, "rewards/verify_math_reward/mean": 0.5915178656578064, "rewards/verify_math_reward/std": 0.49182769656181335, "step": 885 }, { "clip_ratio/high_max": 0.002870988319045864, "clip_ratio/high_mean": 0.0012390595838951413, "clip_ratio/low_mean": 0.0011685794670484029, "clip_ratio/low_min": 5.4791222282801755e-05, "clip_ratio/region_mean": 0.0024076390036498196, "epoch": 2.0699912510936134, "grad_norm": 0.21724823117256165, "learning_rate": 1e-06, "loss": -0.0061, "step": 886 }, { "clip_ratio/high_max": 0.003807486231380608, "clip_ratio/high_mean": 0.0016763579988037236, "clip_ratio/low_mean": 0.0018138432533305604, "clip_ratio/low_min": 6.759728785254993e-05, "clip_ratio/region_mean": 0.0034902012266684324, "epoch": 2.0723242927967336, "grad_norm": 0.15129409730434418, "learning_rate": 1e-06, "loss": -0.0065, "step": 887 }, { "clip_ratio/high_max": 0.004898910454357974, "clip_ratio/high_mean": 0.002103906146658119, "clip_ratio/low_mean": 0.0024689877609489486, "clip_ratio/low_min": 0.00012384025831124745, "clip_ratio/region_mean": 0.0045728939439868554, "epoch": 2.0746573344998542, "grad_norm": 0.1217551901936531, "learning_rate": 1e-06, "loss": -0.0068, "step": 888 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0680803571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 2947.0, "completions/mean_length": 784.9832763671875, "completions/mean_terminated_length": 543.1005859375, "completions/min_length": 138.0, "completions/min_terminated_length": 138.0, "epoch": 2.0769903762029744, "grad_norm": 0.3328976035118103, "learning_rate": 1e-06, "loss": -0.0413, "num_tokens": 130998075.0, "reward": 0.6506696939468384, "reward_std": 0.17911705374717712, "rewards/verify_math_reward/mean": 0.6506696343421936, "rewards/verify_math_reward/std": 0.47702476382255554, "step": 889 }, { "clip_ratio/high_max": 0.0033736916157067753, "clip_ratio/high_mean": 0.0013411463114607614, "clip_ratio/low_mean": 0.0013328426648513414, "clip_ratio/low_min": 0.00013817741819366347, "clip_ratio/region_mean": 0.002673988987226039, "epoch": 2.079323417906095, "grad_norm": 0.22596056759357452, "learning_rate": 1e-06, "loss": -0.0415, "step": 890 }, { "clip_ratio/high_max": 0.004473264423722867, "clip_ratio/high_mean": 0.0017621436963963788, "clip_ratio/low_mean": 0.001972942831343971, "clip_ratio/low_min": 0.0003467309143161401, "clip_ratio/region_mean": 0.003735086676897481, "epoch": 2.0816564596092153, "grad_norm": 0.17085574567317963, "learning_rate": 1e-06, "loss": -0.0419, "step": 891 }, { "clip_ratio/high_max": 0.005657801091729198, "clip_ratio/high_mean": 0.002279354666825384, "clip_ratio/low_mean": 0.0026116365697816946, "clip_ratio/low_min": 0.00047803486449993216, "clip_ratio/region_mean": 0.0048909911783994175, "epoch": 2.083989501312336, "grad_norm": 0.15900585055351257, "learning_rate": 1e-06, "loss": -0.0422, "step": 892 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0758928571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 2879.0, "completions/mean_length": 841.435302734375, "completions/mean_terminated_length": 574.1521606445312, "completions/min_length": 139.0, "completions/min_terminated_length": 139.0, "epoch": 2.0863225430154566, "grad_norm": 0.3205788731575012, "learning_rate": 1e-06, "loss": -0.0334, "num_tokens": 131559177.0, "reward": 0.6618303656578064, "reward_std": 0.16578546166419983, "rewards/verify_math_reward/mean": 0.6618303656578064, "rewards/verify_math_reward/std": 0.4733508229255676, "step": 893 }, { "clip_ratio/high_max": 0.0033107897615991533, "clip_ratio/high_mean": 0.0013483592138072709, "clip_ratio/low_mean": 0.0011272444835412898, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0024756036946200766, "epoch": 2.088655584718577, "grad_norm": 0.24384662508964539, "learning_rate": 1e-06, "loss": -0.0336, "step": 894 }, { "clip_ratio/high_max": 0.004515480119152926, "clip_ratio/high_mean": 0.0017696472714305855, "clip_ratio/low_mean": 0.0017157474558189278, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.003485394685412757, "epoch": 2.0909886264216975, "grad_norm": 0.1562778502702713, "learning_rate": 1e-06, "loss": -0.034, "step": 895 }, { "clip_ratio/high_max": 0.0059018700558226556, "clip_ratio/high_mean": 0.0022835506133560557, "clip_ratio/low_mean": 0.002354396798182279, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.004637947422452271, "epoch": 2.0933216681248177, "grad_norm": 0.12244094163179398, "learning_rate": 1e-06, "loss": -0.0343, "step": 896 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0736607142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3539.0, "completions/mean_length": 870.2344360351562, "completions/mean_terminated_length": 613.7277221679688, "completions/min_length": 173.0, "completions/min_terminated_length": 173.0, "epoch": 2.0956547098279383, "grad_norm": 0.2818109095096588, "learning_rate": 1e-06, "loss": -0.0117, "num_tokens": 132166979.0, "reward": 0.5703125, "reward_std": 0.16961409151554108, "rewards/verify_math_reward/mean": 0.5703125, "rewards/verify_math_reward/std": 0.49530795216560364, "step": 897 }, { "clip_ratio/high_max": 0.0034449845261406153, "clip_ratio/high_mean": 0.0011728268946171738, "clip_ratio/low_mean": 0.0012086725982953794, "clip_ratio/low_min": 1.0941877008008305e-05, "clip_ratio/region_mean": 0.0023814995220163837, "epoch": 2.0979877515310585, "grad_norm": 0.20169880986213684, "learning_rate": 1e-06, "loss": -0.0118, "step": 898 }, { "clip_ratio/high_max": 0.004253324601450004, "clip_ratio/high_mean": 0.0015232669829856604, "clip_ratio/low_mean": 0.0016716274694772437, "clip_ratio/low_min": 2.188375401601661e-05, "clip_ratio/region_mean": 0.003194894437910989, "epoch": 2.100320793234179, "grad_norm": 0.14429666101932526, "learning_rate": 1e-06, "loss": -0.0121, "step": 899 }, { "clip_ratio/high_max": 0.005247311113635078, "clip_ratio/high_mean": 0.0018737691698333947, "clip_ratio/low_mean": 0.002305565489223227, "clip_ratio/low_min": 4.376750803203322e-05, "clip_ratio/region_mean": 0.004179334777290933, "epoch": 2.1026538349372994, "grad_norm": 0.11282812803983688, "learning_rate": 1e-06, "loss": -0.0124, "step": 900 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0591517857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 2933.0, "completions/mean_length": 799.9308471679688, "completions/mean_terminated_length": 592.7046508789062, "completions/min_length": 147.0, "completions/min_terminated_length": 147.0, "epoch": 2.10498687664042, "grad_norm": 0.2659655213356018, "learning_rate": 1e-06, "loss": -0.0181, "num_tokens": 132758477.0, "reward": 0.6149553656578064, "reward_std": 0.14267736673355103, "rewards/verify_math_reward/mean": 0.6149553656578064, "rewards/verify_math_reward/std": 0.4868776500225067, "step": 901 }, { "clip_ratio/high_max": 0.002925620494352188, "clip_ratio/high_mean": 0.0011229197443753947, "clip_ratio/low_mean": 0.0009418803820153698, "clip_ratio/low_min": 3.538866440067068e-05, "clip_ratio/region_mean": 0.002064800151856616, "epoch": 2.1073199183435403, "grad_norm": 0.2527206838130951, "learning_rate": 1e-06, "loss": -0.0182, "step": 902 }, { "clip_ratio/high_max": 0.003359719441505149, "clip_ratio/high_mean": 0.001359813524686615, "clip_ratio/low_mean": 0.001348612189758569, "clip_ratio/low_min": 6.528343692480121e-05, "clip_ratio/region_mean": 0.0027084256435045972, "epoch": 2.109652960046661, "grad_norm": 0.15194401144981384, "learning_rate": 1e-06, "loss": -0.0185, "step": 903 }, { "clip_ratio/high_max": 0.004092343900993001, "clip_ratio/high_mean": 0.0016419109197158832, "clip_ratio/low_mean": 0.0018668973170861136, "clip_ratio/low_min": 7.524835928052198e-05, "clip_ratio/region_mean": 0.0035088082076981664, "epoch": 2.111986001749781, "grad_norm": 0.11760832369327545, "learning_rate": 1e-06, "loss": -0.0187, "step": 904 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0479910714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 2522.0, "completions/mean_length": 740.3460083007812, "completions/mean_terminated_length": 571.1864013671875, "completions/min_length": 151.0, "completions/min_terminated_length": 151.0, "epoch": 2.114319043452902, "grad_norm": 0.34094762802124023, "learning_rate": 1e-06, "loss": -0.0135, "num_tokens": 133328451.0, "reward": 0.676339328289032, "reward_std": 0.19846998155117035, "rewards/verify_math_reward/mean": 0.6763392686843872, "rewards/verify_math_reward/std": 0.4681335985660553, "step": 905 }, { "clip_ratio/high_max": 0.0033262096767430194, "clip_ratio/high_mean": 0.0013361083110794425, "clip_ratio/low_mean": 0.0011833548560389318, "clip_ratio/low_min": 5.430216424429091e-05, "clip_ratio/region_mean": 0.0025194631452905014, "epoch": 2.116652085156022, "grad_norm": 0.2153509110212326, "learning_rate": 1e-06, "loss": -0.0136, "step": 906 }, { "clip_ratio/high_max": 0.004225567885441706, "clip_ratio/high_mean": 0.0018209086592833046, "clip_ratio/low_mean": 0.0018921996670542285, "clip_ratio/low_min": 0.00010104572720592842, "clip_ratio/region_mean": 0.003713108439114876, "epoch": 2.1189851268591426, "grad_norm": 0.16036805510520935, "learning_rate": 1e-06, "loss": -0.014, "step": 907 }, { "clip_ratio/high_max": 0.005712050027796067, "clip_ratio/high_mean": 0.002256985506392084, "clip_ratio/low_mean": 0.002564040602010209, "clip_ratio/low_min": 0.0001545208433526568, "clip_ratio/region_mean": 0.004821026042918675, "epoch": 2.121318168562263, "grad_norm": 0.1366255283355713, "learning_rate": 1e-06, "loss": -0.0143, "step": 908 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.060267857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3405.0, "completions/mean_length": 812.8426513671875, "completions/mean_terminated_length": 602.2838745117188, "completions/min_length": 149.0, "completions/min_terminated_length": 149.0, "epoch": 2.1236512102653835, "grad_norm": 0.2769761383533478, "learning_rate": 1e-06, "loss": -0.0185, "num_tokens": 133923982.0, "reward": 0.6428571939468384, "reward_std": 0.15887397527694702, "rewards/verify_math_reward/mean": 0.6428571343421936, "rewards/verify_math_reward/std": 0.4794250428676605, "step": 909 }, { "clip_ratio/high_max": 0.0030629160974058323, "clip_ratio/high_mean": 0.0010740306752268225, "clip_ratio/low_mean": 0.0008721903323021252, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0019462210257188417, "epoch": 2.1259842519685037, "grad_norm": 0.22105692327022552, "learning_rate": 1e-06, "loss": -0.0187, "step": 910 }, { "clip_ratio/high_max": 0.0039203230844577774, "clip_ratio/high_mean": 0.0014375436294358224, "clip_ratio/low_mean": 0.0013131127998349257, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0027506563856150024, "epoch": 2.1283172936716244, "grad_norm": 0.15206480026245117, "learning_rate": 1e-06, "loss": -0.019, "step": 911 }, { "clip_ratio/high_max": 0.00493301784445066, "clip_ratio/high_mean": 0.0018046515324385837, "clip_ratio/low_mean": 0.0017769030564522836, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.003581554614356719, "epoch": 2.130650335374745, "grad_norm": 0.11555609852075577, "learning_rate": 1e-06, "loss": -0.0192, "step": 912 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0703125, "completions/max_length": 4096.0, "completions/max_terminated_length": 3036.0, "completions/mean_length": 877.0335083007812, "completions/mean_terminated_length": 633.5822143554688, "completions/min_length": 181.0, "completions/min_terminated_length": 181.0, "epoch": 2.1329833770778652, "grad_norm": 0.25124591588974, "learning_rate": 1e-06, "loss": -0.0172, "num_tokens": 134546692.0, "reward": 0.5758928656578064, "reward_std": 0.15785479545593262, "rewards/verify_math_reward/mean": 0.5758928656578064, "rewards/verify_math_reward/std": 0.49448272585868835, "step": 913 }, { "clip_ratio/high_max": 0.002544836745073553, "clip_ratio/high_mean": 0.0009047786570590688, "clip_ratio/low_mean": 0.0009317242675024318, "clip_ratio/low_min": 3.4293552744202316e-05, "clip_ratio/region_mean": 0.0018365029682172462, "epoch": 2.135316418780986, "grad_norm": 0.16623066365718842, "learning_rate": 1e-06, "loss": -0.0173, "step": 914 }, { "clip_ratio/high_max": 0.0033260134296142496, "clip_ratio/high_mean": 0.0011476344006950967, "clip_ratio/low_mean": 0.001382508398819482, "clip_ratio/low_min": 4.895816891803406e-05, "clip_ratio/region_mean": 0.0025301427667727694, "epoch": 2.137649460484106, "grad_norm": 0.1292862892150879, "learning_rate": 1e-06, "loss": -0.0175, "step": 915 }, { "clip_ratio/high_max": 0.0040529364705435, "clip_ratio/high_mean": 0.0013921132704126649, "clip_ratio/low_mean": 0.0019208991070627235, "clip_ratio/low_min": 8.573388186050579e-05, "clip_ratio/region_mean": 0.003313012421131134, "epoch": 2.1399825021872267, "grad_norm": 0.10118012130260468, "learning_rate": 1e-06, "loss": -0.0177, "step": 916 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0502232142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3281.0, "completions/mean_length": 730.6908569335938, "completions/mean_terminated_length": 552.7367553710938, "completions/min_length": 88.0, "completions/min_terminated_length": 88.0, "epoch": 2.142315543890347, "grad_norm": 0.29722341895103455, "learning_rate": 1e-06, "loss": -0.0262, "num_tokens": 135102663.0, "reward": 0.6774553656578064, "reward_std": 0.16935335099697113, "rewards/verify_math_reward/mean": 0.6774553656578064, "rewards/verify_math_reward/std": 0.4677111804485321, "step": 917 }, { "clip_ratio/high_max": 0.0025260009424528107, "clip_ratio/high_mean": 0.0012485976585594472, "clip_ratio/low_mean": 0.000993276385088393, "clip_ratio/low_min": 3.726893191924319e-05, "clip_ratio/region_mean": 0.0022418740481953137, "epoch": 2.1446485855934676, "grad_norm": 0.20507359504699707, "learning_rate": 1e-06, "loss": -0.0263, "step": 918 }, { "clip_ratio/high_max": 0.0037213098985375836, "clip_ratio/high_mean": 0.0016931834470597096, "clip_ratio/low_mean": 0.001416172579411068, "clip_ratio/low_min": 4.969190922565758e-05, "clip_ratio/region_mean": 0.0031093560974113643, "epoch": 2.146981627296588, "grad_norm": 0.1391584873199463, "learning_rate": 1e-06, "loss": -0.0267, "step": 919 }, { "clip_ratio/high_max": 0.0037900824390817434, "clip_ratio/high_mean": 0.0019224171919631772, "clip_ratio/low_mean": 0.0019036271805816796, "clip_ratio/low_min": 3.726893191924319e-05, "clip_ratio/region_mean": 0.003826044288871344, "epoch": 2.1493146689997085, "grad_norm": 0.11054936796426773, "learning_rate": 1e-06, "loss": -0.0269, "step": 920 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 4096.0, "completions/max_terminated_length": 2406.0, "completions/mean_length": 781.068115234375, "completions/mean_terminated_length": 560.0726318359375, "completions/min_length": 121.0, "completions/min_terminated_length": 121.0, "epoch": 2.1516477107028287, "grad_norm": 0.29332903027534485, "learning_rate": 1e-06, "loss": -0.035, "num_tokens": 135662860.0, "reward": 0.5948660969734192, "reward_std": 0.16337619721889496, "rewards/verify_math_reward/mean": 0.5948660969734192, "rewards/verify_math_reward/std": 0.49119213223457336, "step": 921 }, { "clip_ratio/high_max": 0.003122845031612087, "clip_ratio/high_mean": 0.0011568565605557524, "clip_ratio/low_mean": 0.0010460384328325745, "clip_ratio/low_min": 1.4410883522941731e-05, "clip_ratio/region_mean": 0.002202895069785882, "epoch": 2.1539807524059493, "grad_norm": 0.20762155950069427, "learning_rate": 1e-06, "loss": -0.0351, "step": 922 }, { "clip_ratio/high_max": 0.004573675978463143, "clip_ratio/high_mean": 0.0016039726433518808, "clip_ratio/low_mean": 0.0016370986922993325, "clip_ratio/low_min": 2.8821767045883462e-05, "clip_ratio/region_mean": 0.0032410714047728106, "epoch": 2.1563137941090695, "grad_norm": 0.16340962052345276, "learning_rate": 1e-06, "loss": -0.0354, "step": 923 }, { "clip_ratio/high_max": 0.005298002273775637, "clip_ratio/high_mean": 0.0019376581731194165, "clip_ratio/low_mean": 0.002164386365620885, "clip_ratio/low_min": 2.8821767045883462e-05, "clip_ratio/region_mean": 0.004102044636965729, "epoch": 2.15864683581219, "grad_norm": 0.12749722599983215, "learning_rate": 1e-06, "loss": -0.0357, "step": 924 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0915178571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3478.0, "completions/mean_length": 931.7891235351562, "completions/mean_terminated_length": 613.03564453125, "completions/min_length": 159.0, "completions/min_terminated_length": 159.0, "epoch": 2.1609798775153104, "grad_norm": 0.2773822247982025, "learning_rate": 1e-06, "loss": -0.0339, "num_tokens": 136252247.0, "reward": 0.5535714626312256, "reward_std": 0.16859450936317444, "rewards/verify_math_reward/mean": 0.5535714030265808, "rewards/verify_math_reward/std": 0.4973994791507721, "step": 925 }, { "clip_ratio/high_max": 0.002595619880594313, "clip_ratio/high_mean": 0.0010906462084676605, "clip_ratio/low_mean": 0.0009678277510829503, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0020584739540936425, "epoch": 2.163312919218431, "grad_norm": 0.18303123116493225, "learning_rate": 1e-06, "loss": -0.0339, "step": 926 }, { "clip_ratio/high_max": 0.0034668194130063057, "clip_ratio/high_mean": 0.0013711351821257267, "clip_ratio/low_mean": 0.001509971540144761, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002881106724089477, "epoch": 2.1656459609215517, "grad_norm": 0.1436970978975296, "learning_rate": 1e-06, "loss": -0.0343, "step": 927 }, { "clip_ratio/high_max": 0.004512841063842643, "clip_ratio/high_mean": 0.0017775731685105711, "clip_ratio/low_mean": 0.0019714374757313635, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0037490107642952353, "epoch": 2.167979002624672, "grad_norm": 0.1098676547408104, "learning_rate": 1e-06, "loss": -0.0345, "step": 928 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0535714285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3765.0, "completions/mean_length": 799.6986694335938, "completions/mean_terminated_length": 613.1156005859375, "completions/min_length": 143.0, "completions/min_terminated_length": 143.0, "epoch": 2.1703120443277926, "grad_norm": 0.2608231008052826, "learning_rate": 1e-06, "loss": -0.0069, "num_tokens": 136863433.0, "reward": 0.582589328289032, "reward_std": 0.16171692311763763, "rewards/verify_math_reward/mean": 0.5825892686843872, "rewards/verify_math_reward/std": 0.4934072494506836, "step": 929 }, { "clip_ratio/high_max": 0.002594792254967615, "clip_ratio/high_mean": 0.001090283973098849, "clip_ratio/low_mean": 0.0009110010778385913, "clip_ratio/low_min": 1.768033871485386e-05, "clip_ratio/region_mean": 0.0020012850291095674, "epoch": 2.1726450860309128, "grad_norm": 0.18556411564350128, "learning_rate": 1e-06, "loss": -0.007, "step": 930 }, { "clip_ratio/high_max": 0.0032463525640196167, "clip_ratio/high_mean": 0.0013480858979164623, "clip_ratio/low_mean": 0.0013022629736951785, "clip_ratio/low_min": 3.536067742970772e-05, "clip_ratio/region_mean": 0.002650348840688821, "epoch": 2.1749781277340334, "grad_norm": 0.13753841817378998, "learning_rate": 1e-06, "loss": -0.0073, "step": 931 }, { "clip_ratio/high_max": 0.004069790324138012, "clip_ratio/high_mean": 0.0016946601353993174, "clip_ratio/low_mean": 0.0018098091786669102, "clip_ratio/low_min": 6.92520770826377e-05, "clip_ratio/region_mean": 0.0035044692776864395, "epoch": 2.1773111694371536, "grad_norm": 0.10956626385450363, "learning_rate": 1e-06, "loss": -0.0075, "step": 932 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0758928571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 2515.0, "completions/mean_length": 842.2924194335938, "completions/mean_terminated_length": 575.0797119140625, "completions/min_length": 144.0, "completions/min_terminated_length": 144.0, "epoch": 2.1796442111402743, "grad_norm": 0.3029051423072815, "learning_rate": 1e-06, "loss": -0.0149, "num_tokens": 137446863.0, "reward": 0.527901828289032, "reward_std": 0.17254145443439484, "rewards/verify_math_reward/mean": 0.5279017686843872, "rewards/verify_math_reward/std": 0.49949970841407776, "step": 933 }, { "clip_ratio/high_max": 0.0032030338115873747, "clip_ratio/high_mean": 0.0013054558330622967, "clip_ratio/low_mean": 0.0011362493805791019, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0024417051681666635, "epoch": 2.1819772528433945, "grad_norm": 0.18715184926986694, "learning_rate": 1e-06, "loss": -0.015, "step": 934 }, { "clip_ratio/high_max": 0.004060306790051982, "clip_ratio/high_mean": 0.0016897071836865507, "clip_ratio/low_mean": 0.0016584113764110953, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.003348118480062112, "epoch": 2.184310294546515, "grad_norm": 0.14660093188285828, "learning_rate": 1e-06, "loss": -0.0154, "step": 935 }, { "clip_ratio/high_max": 0.004762679047416896, "clip_ratio/high_mean": 0.0019417361691012047, "clip_ratio/low_mean": 0.002219055091700284, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.004160791329923086, "epoch": 2.1866433362496354, "grad_norm": 0.11722005903720856, "learning_rate": 1e-06, "loss": -0.0156, "step": 936 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.041294642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3025.0, "completions/mean_length": 690.4799194335938, "completions/mean_terminated_length": 543.7927856445312, "completions/min_length": 168.0, "completions/min_terminated_length": 168.0, "epoch": 2.188976377952756, "grad_norm": 0.3265189528465271, "learning_rate": 1e-06, "loss": -0.0129, "num_tokens": 138001277.0, "reward": 0.6227678656578064, "reward_std": 0.19448630511760712, "rewards/verify_math_reward/mean": 0.6227678656578064, "rewards/verify_math_reward/std": 0.4849644601345062, "step": 937 }, { "clip_ratio/high_max": 0.0027380154278944246, "clip_ratio/high_mean": 0.0012599122783285566, "clip_ratio/low_mean": 0.0012074826190655585, "clip_ratio/low_min": 1.6706762835383415e-05, "clip_ratio/region_mean": 0.0024673948937561363, "epoch": 2.1913094196558762, "grad_norm": 0.23600614070892334, "learning_rate": 1e-06, "loss": -0.013, "step": 938 }, { "clip_ratio/high_max": 0.0036378396180225536, "clip_ratio/high_mean": 0.0016853785200510174, "clip_ratio/low_mean": 0.001895563822472468, "clip_ratio/low_min": 7.797878788551316e-05, "clip_ratio/region_mean": 0.0035809422697639093, "epoch": 2.193642461358997, "grad_norm": 0.14487050473690033, "learning_rate": 1e-06, "loss": -0.0135, "step": 939 }, { "clip_ratio/high_max": 0.004585127855534665, "clip_ratio/high_mean": 0.002057483812677674, "clip_ratio/low_mean": 0.0025355320030939765, "clip_ratio/low_min": 0.00010397171718068421, "clip_ratio/region_mean": 0.0045930157211842015, "epoch": 2.195975503062117, "grad_norm": 0.14355456829071045, "learning_rate": 1e-06, "loss": -0.0137, "step": 940 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.056919642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 2110.0, "completions/mean_length": 749.7902221679688, "completions/mean_terminated_length": 547.82958984375, "completions/min_length": 142.0, "completions/min_terminated_length": 142.0, "epoch": 2.1983085447652377, "grad_norm": 0.33057332038879395, "learning_rate": 1e-06, "loss": -0.0223, "num_tokens": 138553625.0, "reward": 0.6462053656578064, "reward_std": 0.1510535031557083, "rewards/verify_math_reward/mean": 0.6462053656578064, "rewards/verify_math_reward/std": 0.478413462638855, "step": 941 }, { "clip_ratio/high_max": 0.003345898090628907, "clip_ratio/high_mean": 0.0012596784290508367, "clip_ratio/low_mean": 0.0010325881594326347, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002292266617587302, "epoch": 2.200641586468358, "grad_norm": 0.20659570395946503, "learning_rate": 1e-06, "loss": -0.0225, "step": 942 }, { "clip_ratio/high_max": 0.004394995921757072, "clip_ratio/high_mean": 0.0016778078497736715, "clip_ratio/low_mean": 0.0016187332694244105, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0032965410500764847, "epoch": 2.2029746281714786, "grad_norm": 0.16463735699653625, "learning_rate": 1e-06, "loss": -0.0228, "step": 943 }, { "clip_ratio/high_max": 0.005395822518039495, "clip_ratio/high_mean": 0.0021049265196779743, "clip_ratio/low_mean": 0.0021176774680498056, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.004222604096867144, "epoch": 2.205307669874599, "grad_norm": 0.12173808366060257, "learning_rate": 1e-06, "loss": -0.0231, "step": 944 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0591517857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 2511.0, "completions/mean_length": 791.1116333007812, "completions/mean_terminated_length": 583.3309936523438, "completions/min_length": 161.0, "completions/min_terminated_length": 161.0, "epoch": 2.2076407115777195, "grad_norm": 0.31200137734413147, "learning_rate": 1e-06, "loss": -0.0092, "num_tokens": 139128781.0, "reward": 0.5569196939468384, "reward_std": 0.1902044266462326, "rewards/verify_math_reward/mean": 0.5569196343421936, "rewards/verify_math_reward/std": 0.49702703952789307, "step": 945 }, { "clip_ratio/high_max": 0.002844688940967899, "clip_ratio/high_mean": 0.0011944679681619164, "clip_ratio/low_mean": 0.0012930356933793519, "clip_ratio/low_min": 0.00015965670263540233, "clip_ratio/region_mean": 0.002487503683369141, "epoch": 2.20997375328084, "grad_norm": 0.21631820499897003, "learning_rate": 1e-06, "loss": -0.0093, "step": 946 }, { "clip_ratio/high_max": 0.003735170335858129, "clip_ratio/high_mean": 0.0015952659414324444, "clip_ratio/low_mean": 0.001962183123396244, "clip_ratio/low_min": 0.0002721028049563756, "clip_ratio/region_mean": 0.0035574489884311333, "epoch": 2.2123067949839603, "grad_norm": 0.15403150022029877, "learning_rate": 1e-06, "loss": -0.0098, "step": 947 }, { "clip_ratio/high_max": 0.004715195842436515, "clip_ratio/high_mean": 0.0019732481305254623, "clip_ratio/low_mean": 0.002724868718360085, "clip_ratio/low_min": 0.0003985227485827636, "clip_ratio/region_mean": 0.004698116856161505, "epoch": 2.214639836687081, "grad_norm": 0.11805091053247452, "learning_rate": 1e-06, "loss": -0.01, "step": 948 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0513392857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 2805.0, "completions/mean_length": 740.8705444335938, "completions/mean_terminated_length": 559.298828125, "completions/min_length": 126.0, "completions/min_terminated_length": 126.0, "epoch": 2.216972878390201, "grad_norm": 0.29770275950431824, "learning_rate": 1e-06, "loss": -0.0151, "num_tokens": 139685921.0, "reward": 0.582589328289032, "reward_std": 0.16044700145721436, "rewards/verify_math_reward/mean": 0.5825892686843872, "rewards/verify_math_reward/std": 0.4934072494506836, "step": 949 }, { "clip_ratio/high_max": 0.0029555918372352608, "clip_ratio/high_mean": 0.0010729042642196873, "clip_ratio/low_mean": 0.0010883038357860642, "clip_ratio/low_min": 5.277947639115155e-05, "clip_ratio/region_mean": 0.0021612080454360694, "epoch": 2.219305920093322, "grad_norm": 0.1952330768108368, "learning_rate": 1e-06, "loss": -0.0152, "step": 950 }, { "clip_ratio/high_max": 0.003866097016725689, "clip_ratio/high_mean": 0.0014254486886784434, "clip_ratio/low_mean": 0.001642579676627065, "clip_ratio/low_min": 0.0001828239328460768, "clip_ratio/region_mean": 0.0030680283889523707, "epoch": 2.221638961796442, "grad_norm": 0.1441340446472168, "learning_rate": 1e-06, "loss": -0.0156, "step": 951 }, { "clip_ratio/high_max": 0.005018486961489543, "clip_ratio/high_mean": 0.001827264186431421, "clip_ratio/low_mean": 0.002179575387344812, "clip_ratio/low_min": 0.00016269919069600292, "clip_ratio/region_mean": 0.004006839633802883, "epoch": 2.2239720034995627, "grad_norm": 0.11001552641391754, "learning_rate": 1e-06, "loss": -0.0158, "step": 952 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0580357142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3600.0, "completions/mean_length": 737.5245971679688, "completions/mean_terminated_length": 530.6043090820312, "completions/min_length": 127.0, "completions/min_terminated_length": 127.0, "epoch": 2.226305045202683, "grad_norm": 0.31458839774131775, "learning_rate": 1e-06, "loss": -0.0098, "num_tokens": 140224943.0, "reward": 0.625, "reward_std": 0.1573241800069809, "rewards/verify_math_reward/mean": 0.625, "rewards/verify_math_reward/std": 0.48439329862594604, "step": 953 }, { "clip_ratio/high_max": 0.0033977579296333715, "clip_ratio/high_mean": 0.0012466823318391107, "clip_ratio/low_mean": 0.0010291918297298253, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002275874176120851, "epoch": 2.2286380869058036, "grad_norm": 0.18774908781051636, "learning_rate": 1e-06, "loss": -0.01, "step": 954 }, { "clip_ratio/high_max": 0.004191605839878321, "clip_ratio/high_mean": 0.001540441720862873, "clip_ratio/low_mean": 0.0016601760835328605, "clip_ratio/low_min": 5.8085501223104075e-05, "clip_ratio/region_mean": 0.003200617866241373, "epoch": 2.2309711286089238, "grad_norm": 0.1363900899887085, "learning_rate": 1e-06, "loss": -0.0103, "step": 955 }, { "clip_ratio/high_max": 0.0052029774960828945, "clip_ratio/high_mean": 0.0019172112879459746, "clip_ratio/low_mean": 0.00215262397978222, "clip_ratio/low_min": 4.356412682682276e-05, "clip_ratio/region_mean": 0.004069835384143516, "epoch": 2.2333041703120444, "grad_norm": 0.11612249910831451, "learning_rate": 1e-06, "loss": -0.0105, "step": 956 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0892857142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 1940.0, "completions/mean_length": 845.9531860351562, "completions/mean_terminated_length": 527.3211059570312, "completions/min_length": 98.0, "completions/min_terminated_length": 98.0, "epoch": 2.2356372120151646, "grad_norm": 0.28786343336105347, "learning_rate": 1e-06, "loss": -0.0291, "num_tokens": 140730381.0, "reward": 0.6473214626312256, "reward_std": 0.13842590153217316, "rewards/verify_math_reward/mean": 0.6473214030265808, "rewards/verify_math_reward/std": 0.47807058691978455, "step": 957 }, { "clip_ratio/high_max": 0.0027383992783143185, "clip_ratio/high_mean": 0.0011324655079079093, "clip_ratio/low_mean": 0.0007374929173238343, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0018699584143178072, "epoch": 2.2379702537182853, "grad_norm": 0.510836660861969, "learning_rate": 1e-06, "loss": -0.0292, "step": 958 }, { "clip_ratio/high_max": 0.0033866656813188456, "clip_ratio/high_mean": 0.0014357821382873226, "clip_ratio/low_mean": 0.0011694651147990953, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0026052472021547146, "epoch": 2.2403032954214055, "grad_norm": 0.12284267693758011, "learning_rate": 1e-06, "loss": -0.0295, "step": 959 }, { "clip_ratio/high_max": 0.00402570631558774, "clip_ratio/high_mean": 0.0017033829935826361, "clip_ratio/low_mean": 0.00156416988465935, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0032675528855179437, "epoch": 2.242636337124526, "grad_norm": 0.11234059929847717, "learning_rate": 1e-06, "loss": -0.0297, "step": 960 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0714285714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 2071.0, "completions/mean_length": 825.0424194335938, "completions/mean_terminated_length": 573.4302978515625, "completions/min_length": 164.0, "completions/min_terminated_length": 164.0, "epoch": 2.2449693788276464, "grad_norm": 0.32868584990501404, "learning_rate": 1e-06, "loss": -0.0217, "num_tokens": 141295955.0, "reward": 0.6116071939468384, "reward_std": 0.1709267795085907, "rewards/verify_math_reward/mean": 0.6116071343421936, "rewards/verify_math_reward/std": 0.48765692114830017, "step": 961 }, { "clip_ratio/high_max": 0.003223244442779105, "clip_ratio/high_mean": 0.0012568392703542486, "clip_ratio/low_mean": 0.0010169135575779364, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0022737527833669446, "epoch": 2.247302420530767, "grad_norm": 0.1923956274986267, "learning_rate": 1e-06, "loss": -0.0219, "step": 962 }, { "clip_ratio/high_max": 0.004548781056655571, "clip_ratio/high_mean": 0.0017512694284960162, "clip_ratio/low_mean": 0.001488579702709103, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0032398491675849073, "epoch": 2.249635462233887, "grad_norm": 0.14168216288089752, "learning_rate": 1e-06, "loss": -0.0222, "step": 963 }, { "clip_ratio/high_max": 0.005628042708849534, "clip_ratio/high_mean": 0.002081253900541924, "clip_ratio/low_mean": 0.0020753545686602592, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.004156608440098353, "epoch": 2.251968503937008, "grad_norm": 0.10867574065923691, "learning_rate": 1e-06, "loss": -0.0225, "step": 964 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0680803571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3361.0, "completions/mean_length": 788.8471069335938, "completions/mean_terminated_length": 547.2467041015625, "completions/min_length": 129.0, "completions/min_terminated_length": 129.0, "epoch": 2.2543015456401285, "grad_norm": 0.2968079149723053, "learning_rate": 1e-06, "loss": -0.0475, "num_tokens": 141832874.0, "reward": 0.6752232313156128, "reward_std": 0.15804095566272736, "rewards/verify_math_reward/mean": 0.6752232313156128, "rewards/verify_math_reward/std": 0.46855294704437256, "step": 965 }, { "clip_ratio/high_max": 0.0039728134870529175, "clip_ratio/high_mean": 0.0013761714253632817, "clip_ratio/low_mean": 0.0010281682798449765, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.00240433968428988, "epoch": 2.2566345873432487, "grad_norm": 0.18438033759593964, "learning_rate": 1e-06, "loss": -0.0476, "step": 966 }, { "clip_ratio/high_max": 0.005219698956352659, "clip_ratio/high_mean": 0.0017833866659202613, "clip_ratio/low_mean": 0.0014420898569369456, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.003225476495572366, "epoch": 2.2589676290463694, "grad_norm": 0.14730094373226166, "learning_rate": 1e-06, "loss": -0.048, "step": 967 }, { "clip_ratio/high_max": 0.006568110169610009, "clip_ratio/high_mean": 0.002230201949714683, "clip_ratio/low_mean": 0.0020205867167533142, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0042507885955274105, "epoch": 2.2613006707494896, "grad_norm": 0.10803677141666412, "learning_rate": 1e-06, "loss": -0.0482, "step": 968 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0770089285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 2677.0, "completions/mean_length": 812.911865234375, "completions/mean_terminated_length": 538.9903564453125, "completions/min_length": 159.0, "completions/min_terminated_length": 159.0, "epoch": 2.2636337124526102, "grad_norm": 0.2679833173751831, "learning_rate": 1e-06, "loss": -0.0363, "num_tokens": 142364275.0, "reward": 0.5970982313156128, "reward_std": 0.17325754463672638, "rewards/verify_math_reward/mean": 0.5970982313156128, "rewards/verify_math_reward/std": 0.4907552897930145, "step": 969 }, { "clip_ratio/high_max": 0.0031274915309040807, "clip_ratio/high_mean": 0.0011996782159258146, "clip_ratio/low_mean": 0.0009189200136461295, "clip_ratio/low_min": 5.9732385125244036e-05, "clip_ratio/region_mean": 0.00211859821138205, "epoch": 2.2659667541557305, "grad_norm": 0.2087981253862381, "learning_rate": 1e-06, "loss": -0.0365, "step": 970 }, { "clip_ratio/high_max": 0.004322994675021619, "clip_ratio/high_mean": 0.001675694264122285, "clip_ratio/low_mean": 0.0014205201896402286, "clip_ratio/low_min": 7.213071876321919e-05, "clip_ratio/region_mean": 0.003096214473771397, "epoch": 2.268299795858851, "grad_norm": 0.14278952777385712, "learning_rate": 1e-06, "loss": -0.0368, "step": 971 }, { "clip_ratio/high_max": 0.0053252124635037035, "clip_ratio/high_mean": 0.0020474727207329124, "clip_ratio/low_mean": 0.001973900591110578, "clip_ratio/low_min": 0.0001594700152054429, "clip_ratio/region_mean": 0.00402137337368913, "epoch": 2.2706328375619713, "grad_norm": 0.11389455199241638, "learning_rate": 1e-06, "loss": -0.0371, "step": 972 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0714285714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 2191.0, "completions/mean_length": 747.6529541015625, "completions/mean_terminated_length": 490.0877685546875, "completions/min_length": 122.0, "completions/min_terminated_length": 122.0, "epoch": 2.272965879265092, "grad_norm": 0.3091283440589905, "learning_rate": 1e-06, "loss": -0.0111, "num_tokens": 142849092.0, "reward": 0.6383928656578064, "reward_std": 0.13421748578548431, "rewards/verify_math_reward/mean": 0.6383928656578064, "rewards/verify_math_reward/std": 0.4807341694831848, "step": 973 }, { "clip_ratio/high_max": 0.003084462870901916, "clip_ratio/high_mean": 0.0011609100438363384, "clip_ratio/low_mean": 0.0010371861535531934, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002198096161009744, "epoch": 2.275298920968212, "grad_norm": 0.19639158248901367, "learning_rate": 1e-06, "loss": -0.0113, "step": 974 }, { "clip_ratio/high_max": 0.004008746203908231, "clip_ratio/high_mean": 0.0015445096537405334, "clip_ratio/low_mean": 0.001601144578671665, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.003145654329273384, "epoch": 2.277631962671333, "grad_norm": 0.13785851001739502, "learning_rate": 1e-06, "loss": -0.0116, "step": 975 }, { "clip_ratio/high_max": 0.004972171511326451, "clip_ratio/high_mean": 0.001914288586249313, "clip_ratio/low_mean": 0.002031329222518252, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0039456178492400795, "epoch": 2.279965004374453, "grad_norm": 0.11556866765022278, "learning_rate": 1e-06, "loss": -0.0118, "step": 976 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0770089285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3458.0, "completions/mean_length": 867.7254638671875, "completions/mean_terminated_length": 598.3772583007812, "completions/min_length": 156.0, "completions/min_terminated_length": 156.0, "epoch": 2.2822980460775737, "grad_norm": 0.25693705677986145, "learning_rate": 1e-06, "loss": -0.0098, "num_tokens": 143427206.0, "reward": 0.5770089626312256, "reward_std": 0.14586800336837769, "rewards/verify_math_reward/mean": 0.5770089030265808, "rewards/verify_math_reward/std": 0.4943099319934845, "step": 977 }, { "clip_ratio/high_max": 0.0024776747450232506, "clip_ratio/high_mean": 0.0009514283538010204, "clip_ratio/low_mean": 0.0007722742766418378, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0017237026477232575, "epoch": 2.284631087780694, "grad_norm": 0.17322511970996857, "learning_rate": 1e-06, "loss": -0.0099, "step": 978 }, { "clip_ratio/high_max": 0.0032855732715688646, "clip_ratio/high_mean": 0.0013396914000622928, "clip_ratio/low_mean": 0.001168073571534478, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0025077649552258663, "epoch": 2.2869641294838146, "grad_norm": 0.12522724270820618, "learning_rate": 1e-06, "loss": -0.0102, "step": 979 }, { "clip_ratio/high_max": 0.003995811814093031, "clip_ratio/high_mean": 0.0016527393272554036, "clip_ratio/low_mean": 0.0016191830018215114, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.003271922301792074, "epoch": 2.289297171186935, "grad_norm": 0.09807094931602478, "learning_rate": 1e-06, "loss": -0.0104, "step": 980 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0736607142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 2687.0, "completions/mean_length": 860.4017944335938, "completions/mean_terminated_length": 603.11328125, "completions/min_length": 172.0, "completions/min_terminated_length": 172.0, "epoch": 2.2916302128900554, "grad_norm": 0.3257005512714386, "learning_rate": 1e-06, "loss": -0.0417, "num_tokens": 144020782.0, "reward": 0.5569196939468384, "reward_std": 0.17848652601242065, "rewards/verify_math_reward/mean": 0.5569196343421936, "rewards/verify_math_reward/std": 0.4970270097255707, "step": 981 }, { "clip_ratio/high_max": 0.003074881067732349, "clip_ratio/high_mean": 0.0011982016803813167, "clip_ratio/low_mean": 0.0012344926599325845, "clip_ratio/low_min": 9.006804521050071e-05, "clip_ratio/region_mean": 0.002432694280287251, "epoch": 2.2939632545931756, "grad_norm": 0.20245181024074554, "learning_rate": 1e-06, "loss": -0.0418, "step": 982 }, { "clip_ratio/high_max": 0.004064799715706613, "clip_ratio/high_mean": 0.001608440823474666, "clip_ratio/low_mean": 0.0017883843283925671, "clip_ratio/low_min": 0.00014406541049538646, "clip_ratio/region_mean": 0.0033968250354519114, "epoch": 2.2962962962962963, "grad_norm": 0.14981713891029358, "learning_rate": 1e-06, "loss": -0.0422, "step": 983 }, { "clip_ratio/high_max": 0.004826207739824895, "clip_ratio/high_mean": 0.0019289758020022418, "clip_ratio/low_mean": 0.0024491512376698665, "clip_ratio/low_min": 0.0001776724475348601, "clip_ratio/region_mean": 0.0043781270942417905, "epoch": 2.298629337999417, "grad_norm": 0.11771199107170105, "learning_rate": 1e-06, "loss": -0.0425, "step": 984 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0848214285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 1994.0, "completions/mean_length": 870.1283569335938, "completions/mean_terminated_length": 571.1451416015625, "completions/min_length": 146.0, "completions/min_terminated_length": 146.0, "epoch": 2.300962379702537, "grad_norm": 0.2959081530570984, "learning_rate": 1e-06, "loss": -0.0295, "num_tokens": 144584809.0, "reward": 0.5848214626312256, "reward_std": 0.17141281068325043, "rewards/verify_math_reward/mean": 0.5848214030265808, "rewards/verify_math_reward/std": 0.49302801489830017, "step": 985 }, { "clip_ratio/high_max": 0.0034289229879505, "clip_ratio/high_mean": 0.0012806988306692801, "clip_ratio/low_mean": 0.0010748653003247455, "clip_ratio/low_min": 5.0382072004140355e-05, "clip_ratio/region_mean": 0.002355564123718068, "epoch": 2.303295421405658, "grad_norm": 0.18195629119873047, "learning_rate": 1e-06, "loss": -0.0296, "step": 986 }, { "clip_ratio/high_max": 0.004310492477088701, "clip_ratio/high_mean": 0.0016175189048226457, "clip_ratio/low_mean": 0.0016315010143443942, "clip_ratio/low_min": 0.00010932858276646584, "clip_ratio/region_mean": 0.003249019879149273, "epoch": 2.305628463108778, "grad_norm": 0.13629980385303497, "learning_rate": 1e-06, "loss": -0.0299, "step": 987 }, { "clip_ratio/high_max": 0.0050334651677985676, "clip_ratio/high_mean": 0.0019314193668833468, "clip_ratio/low_mean": 0.002226080025138799, "clip_ratio/low_min": 0.00010932858276646584, "clip_ratio/region_mean": 0.004157499453867786, "epoch": 2.3079615048118987, "grad_norm": 0.10960060358047485, "learning_rate": 1e-06, "loss": -0.0302, "step": 988 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0747767857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 2714.0, "completions/mean_length": 815.552490234375, "completions/mean_terminated_length": 550.4258422851562, "completions/min_length": 142.0, "completions/min_terminated_length": 142.0, "epoch": 2.310294546515019, "grad_norm": 0.3173504173755646, "learning_rate": 1e-06, "loss": -0.0368, "num_tokens": 145132112.0, "reward": 0.5837053656578064, "reward_std": 0.1535729467868805, "rewards/verify_math_reward/mean": 0.5837053656578064, "rewards/verify_math_reward/std": 0.49321892857551575, "step": 989 }, { "clip_ratio/high_max": 0.0027791734246420674, "clip_ratio/high_mean": 0.0012030766374664381, "clip_ratio/low_mean": 0.001003384339128388, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002206461002060678, "epoch": 2.3126275882181395, "grad_norm": 0.198551207780838, "learning_rate": 1e-06, "loss": -0.037, "step": 990 }, { "clip_ratio/high_max": 0.00395104251219891, "clip_ratio/high_mean": 0.0016067340184235945, "clip_ratio/low_mean": 0.001465802670281846, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0030725366959813982, "epoch": 2.3149606299212597, "grad_norm": 0.13706685602664948, "learning_rate": 1e-06, "loss": -0.0373, "step": 991 }, { "clip_ratio/high_max": 0.0047568896698066965, "clip_ratio/high_mean": 0.001931460890773451, "clip_ratio/low_mean": 0.0019244005961809307, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0038558615051442757, "epoch": 2.3172936716243804, "grad_norm": 0.11281009018421173, "learning_rate": 1e-06, "loss": -0.0375, "step": 992 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0792410714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3262.0, "completions/mean_length": 857.8660888671875, "completions/mean_terminated_length": 579.1903076171875, "completions/min_length": 139.0, "completions/min_terminated_length": 139.0, "epoch": 2.3196267133275006, "grad_norm": 0.25712868571281433, "learning_rate": 1e-06, "loss": -0.0308, "num_tokens": 145700600.0, "reward": 0.606026828289032, "reward_std": 0.13249292969703674, "rewards/verify_math_reward/mean": 0.6060267686843872, "rewards/verify_math_reward/std": 0.48890191316604614, "step": 993 }, { "clip_ratio/high_max": 0.003208918988093501, "clip_ratio/high_mean": 0.0010700633865781128, "clip_ratio/low_mean": 0.0007410268508465379, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0018110902201442514, "epoch": 2.3219597550306212, "grad_norm": 0.17831414937973022, "learning_rate": 1e-06, "loss": -0.0309, "step": 994 }, { "clip_ratio/high_max": 0.0044528229045681655, "clip_ratio/high_mean": 0.0014157141176838195, "clip_ratio/low_mean": 0.0010962704891426256, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002511984646844212, "epoch": 2.3242927967337415, "grad_norm": 0.12479043751955032, "learning_rate": 1e-06, "loss": -0.0311, "step": 995 }, { "clip_ratio/high_max": 0.005441093766421545, "clip_ratio/high_mean": 0.0017395365466654766, "clip_ratio/low_mean": 0.0015535709462710656, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.003293107496574521, "epoch": 2.326625838436862, "grad_norm": 0.10163594782352448, "learning_rate": 1e-06, "loss": -0.0313, "step": 996 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 4096.0, "completions/max_terminated_length": 2849.0, "completions/mean_length": 778.6217041015625, "completions/mean_terminated_length": 557.463134765625, "completions/min_length": 160.0, "completions/min_terminated_length": 160.0, "epoch": 2.3289588801399823, "grad_norm": 0.3132249116897583, "learning_rate": 1e-06, "loss": -0.0492, "num_tokens": 146251933.0, "reward": 0.6595982313156128, "reward_std": 0.18423475325107574, "rewards/verify_math_reward/mean": 0.6595982313156128, "rewards/verify_math_reward/std": 0.4741089344024658, "step": 997 }, { "clip_ratio/high_max": 0.0035422182336333208, "clip_ratio/high_mean": 0.0014149074850138277, "clip_ratio/low_mean": 0.0009679109589342261, "clip_ratio/low_min": 0.00011353746958775446, "clip_ratio/region_mean": 0.002382818449405022, "epoch": 2.331291921843103, "grad_norm": 0.1978176087141037, "learning_rate": 1e-06, "loss": -0.0493, "step": 998 }, { "clip_ratio/high_max": 0.004634974466171116, "clip_ratio/high_mean": 0.0018357506232860032, "clip_ratio/low_mean": 0.0014038572699064389, "clip_ratio/low_min": 0.00014242095494410023, "clip_ratio/region_mean": 0.0032396079041063786, "epoch": 2.3336249635462236, "grad_norm": 0.1460041105747223, "learning_rate": 1e-06, "loss": -0.0497, "step": 999 }, { "clip_ratio/high_max": 0.00551866578462068, "clip_ratio/high_mean": 0.0022352681116899475, "clip_ratio/low_mean": 0.002003313034947496, "clip_ratio/low_min": 0.00023108992900233716, "clip_ratio/region_mean": 0.004238581110257655, "epoch": 2.335958005249344, "grad_norm": 0.11759836971759796, "learning_rate": 1e-06, "loss": -0.05, "step": 1000 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0859375, "completions/max_length": 4096.0, "completions/max_terminated_length": 3652.0, "completions/mean_length": 852.9017944335938, "completions/mean_terminated_length": 547.9951171875, "completions/min_length": 93.0, "completions/min_terminated_length": 93.0, "epoch": 2.338291046952464, "grad_norm": 0.4015813171863556, "learning_rate": 1e-06, "loss": -0.0247, "num_tokens": 146792581.0, "reward": 0.5535714626312256, "reward_std": 0.17795631289482117, "rewards/verify_math_reward/mean": 0.5535714030265808, "rewards/verify_math_reward/std": 0.4973995089530945, "step": 1001 }, { "clip_ratio/high_max": 0.0033500263416499365, "clip_ratio/high_mean": 0.0012333556660450995, "clip_ratio/low_mean": 0.0011686323323374381, "clip_ratio/low_min": 4.825043561140774e-05, "clip_ratio/region_mean": 0.0024019880438572727, "epoch": 2.3406240886555847, "grad_norm": 0.2709669768810272, "learning_rate": 1e-06, "loss": -0.0249, "step": 1002 }, { "clip_ratio/high_max": 0.00429432479722891, "clip_ratio/high_mean": 0.0016669586912030354, "clip_ratio/low_mean": 0.001887107213406125, "clip_ratio/low_min": 9.580298865330406e-05, "clip_ratio/region_mean": 0.003554065799107775, "epoch": 2.3429571303587053, "grad_norm": 0.14204944670200348, "learning_rate": 1e-06, "loss": -0.0254, "step": 1003 }, { "clip_ratio/high_max": 0.005315733782481402, "clip_ratio/high_mean": 0.0020475269229791593, "clip_ratio/low_mean": 0.0025601273591746576, "clip_ratio/low_min": 8.873996921465732e-05, "clip_ratio/region_mean": 0.00460765439493116, "epoch": 2.3452901720618256, "grad_norm": 0.11985014379024506, "learning_rate": 1e-06, "loss": -0.0256, "step": 1004 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0703125, "completions/max_length": 4096.0, "completions/max_terminated_length": 3120.0, "completions/mean_length": 818.021240234375, "completions/mean_terminated_length": 570.1068115234375, "completions/min_length": 141.0, "completions/min_terminated_length": 141.0, "epoch": 2.347623213764946, "grad_norm": 0.29815515875816345, "learning_rate": 1e-06, "loss": -0.0458, "num_tokens": 147352808.0, "reward": 0.5725446939468384, "reward_std": 0.18584097921848297, "rewards/verify_math_reward/mean": 0.5725446343421936, "rewards/verify_math_reward/std": 0.49498558044433594, "step": 1005 }, { "clip_ratio/high_max": 0.0038093357434263453, "clip_ratio/high_mean": 0.001371762868075166, "clip_ratio/low_mean": 0.0012708666963590076, "clip_ratio/low_min": 8.378515667573083e-05, "clip_ratio/region_mean": 0.002642629566253163, "epoch": 2.3499562554680664, "grad_norm": 0.214836984872818, "learning_rate": 1e-06, "loss": -0.0459, "step": 1006 }, { "clip_ratio/high_max": 0.005364573386032134, "clip_ratio/high_mean": 0.0018275018119311426, "clip_ratio/low_mean": 0.0018586331916594645, "clip_ratio/low_min": 0.0001122395206039073, "clip_ratio/region_mean": 0.0036861349435639568, "epoch": 2.352289297171187, "grad_norm": 0.16491520404815674, "learning_rate": 1e-06, "loss": -0.0463, "step": 1007 }, { "clip_ratio/high_max": 0.006200419142260216, "clip_ratio/high_mean": 0.002232190854556393, "clip_ratio/low_mean": 0.002681330508494284, "clip_ratio/low_min": 0.00020867393686785363, "clip_ratio/region_mean": 0.004913521421258338, "epoch": 2.3546223388743073, "grad_norm": 0.13148236274719238, "learning_rate": 1e-06, "loss": -0.0466, "step": 1008 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0680803571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3840.0, "completions/mean_length": 830.3906860351562, "completions/mean_terminated_length": 591.8251342773438, "completions/min_length": 133.0, "completions/min_terminated_length": 133.0, "epoch": 2.356955380577428, "grad_norm": 0.31411510705947876, "learning_rate": 1e-06, "loss": -0.0046, "num_tokens": 147932118.0, "reward": 0.6339285969734192, "reward_std": 0.14384031295776367, "rewards/verify_math_reward/mean": 0.6339285969734192, "rewards/verify_math_reward/std": 0.48199835419654846, "step": 1009 }, { "clip_ratio/high_max": 0.002819517503667157, "clip_ratio/high_mean": 0.0009731084282975644, "clip_ratio/low_mean": 0.0008743232674532919, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0018474317039363086, "epoch": 2.359288422280548, "grad_norm": 0.18095746636390686, "learning_rate": 1e-06, "loss": -0.0048, "step": 1010 }, { "clip_ratio/high_max": 0.00372962630353868, "clip_ratio/high_mean": 0.0012503485831985017, "clip_ratio/low_mean": 0.0012928696996823419, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0025432183101656847, "epoch": 2.361621463983669, "grad_norm": 0.1326814591884613, "learning_rate": 1e-06, "loss": -0.005, "step": 1011 }, { "clip_ratio/high_max": 0.004666901484597474, "clip_ratio/high_mean": 0.0015448136146005709, "clip_ratio/low_mean": 0.0017834821483120322, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.003328295875689946, "epoch": 2.363954505686789, "grad_norm": 0.09687606245279312, "learning_rate": 1e-06, "loss": -0.0053, "step": 1012 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 4096.0, "completions/max_terminated_length": 2797.0, "completions/mean_length": 841.1607666015625, "completions/mean_terminated_length": 624.1714477539062, "completions/min_length": 105.0, "completions/min_terminated_length": 105.0, "epoch": 2.3662875473899097, "grad_norm": 0.3063875436782837, "learning_rate": 1e-06, "loss": -0.0173, "num_tokens": 148538878.0, "reward": 0.5457589626312256, "reward_std": 0.18141502141952515, "rewards/verify_math_reward/mean": 0.5457589030265808, "rewards/verify_math_reward/std": 0.4981798231601715, "step": 1013 }, { "clip_ratio/high_max": 0.0027807048318209127, "clip_ratio/high_mean": 0.0010531487696425756, "clip_ratio/low_mean": 0.0011644352252915269, "clip_ratio/low_min": 3.065916916966671e-05, "clip_ratio/region_mean": 0.0022175840349518694, "epoch": 2.36862058909303, "grad_norm": 0.2081756442785263, "learning_rate": 1e-06, "loss": -0.0174, "step": 1014 }, { "clip_ratio/high_max": 0.004001321707619354, "clip_ratio/high_mean": 0.0014076707084313966, "clip_ratio/low_mean": 0.0017073952476494014, "clip_ratio/low_min": 8.791588152234908e-05, "clip_ratio/region_mean": 0.0031150659051490948, "epoch": 2.3709536307961505, "grad_norm": 0.14376379549503326, "learning_rate": 1e-06, "loss": -0.0178, "step": 1015 }, { "clip_ratio/high_max": 0.004599665924615692, "clip_ratio/high_mean": 0.0017331737035419792, "clip_ratio/low_mean": 0.0023616487596882507, "clip_ratio/low_min": 0.00011289324538665824, "clip_ratio/region_mean": 0.0040948225650936365, "epoch": 2.3732866724992707, "grad_norm": 0.11644219607114792, "learning_rate": 1e-06, "loss": -0.018, "step": 1016 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0669642857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 2175.0, "completions/mean_length": 794.9754638671875, "completions/mean_terminated_length": 558.059814453125, "completions/min_length": 123.0, "completions/min_terminated_length": 123.0, "epoch": 2.3756197142023914, "grad_norm": 0.29162803292274475, "learning_rate": 1e-06, "loss": -0.0327, "num_tokens": 149090448.0, "reward": 0.613839328289032, "reward_std": 0.1405295431613922, "rewards/verify_math_reward/mean": 0.6138392686843872, "rewards/verify_math_reward/std": 0.48714008927345276, "step": 1017 }, { "clip_ratio/high_max": 0.0036549191863741726, "clip_ratio/high_mean": 0.0010849349891941529, "clip_ratio/low_mean": 0.0009356143114018778, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0020205492910463363, "epoch": 2.377952755905512, "grad_norm": 0.18158027529716492, "learning_rate": 1e-06, "loss": -0.0328, "step": 1018 }, { "clip_ratio/high_max": 0.00466604972461937, "clip_ratio/high_mean": 0.0014606683780584717, "clip_ratio/low_mean": 0.0014437575009651482, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002904425942688249, "epoch": 2.3802857976086322, "grad_norm": 0.1273406744003296, "learning_rate": 1e-06, "loss": -0.0331, "step": 1019 }, { "clip_ratio/high_max": 0.006208714272361249, "clip_ratio/high_mean": 0.0018532880767452298, "clip_ratio/low_mean": 0.001910078262881143, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.003763366417842917, "epoch": 2.382618839311753, "grad_norm": 0.11987081915140152, "learning_rate": 1e-06, "loss": -0.0333, "step": 1020 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0513392857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 2821.0, "completions/mean_length": 771.6875610351562, "completions/mean_terminated_length": 591.7835083007812, "completions/min_length": 130.0, "completions/min_terminated_length": 130.0, "epoch": 2.384951881014873, "grad_norm": 0.2600451409816742, "learning_rate": 1e-06, "loss": -0.011, "num_tokens": 149687160.0, "reward": 0.6160714626312256, "reward_std": 0.13598594069480896, "rewards/verify_math_reward/mean": 0.6160714030265808, "rewards/verify_math_reward/std": 0.486612468957901, "step": 1021 }, { "clip_ratio/high_max": 0.0023381557693937793, "clip_ratio/high_mean": 0.0009928873732860666, "clip_ratio/low_mean": 0.0008246902525570476, "clip_ratio/low_min": 7.41048133932054e-05, "clip_ratio/region_mean": 0.0018175776713178493, "epoch": 2.3872849227179938, "grad_norm": 0.18240097165107727, "learning_rate": 1e-06, "loss": -0.0111, "step": 1022 }, { "clip_ratio/high_max": 0.003184251778293401, "clip_ratio/high_mean": 0.0013538576095015742, "clip_ratio/low_mean": 0.0011776503852161113, "clip_ratio/low_min": 4.667662506108172e-05, "clip_ratio/region_mean": 0.0025315079765277915, "epoch": 2.389617964421114, "grad_norm": 0.5047183036804199, "learning_rate": 1e-06, "loss": -0.0113, "step": 1023 }, { "clip_ratio/high_max": 0.003741693501069676, "clip_ratio/high_mean": 0.00159192433784483, "clip_ratio/low_mean": 0.001554143767862115, "clip_ratio/low_min": 9.335325012216344e-05, "clip_ratio/region_mean": 0.003146068163914606, "epoch": 2.3919510061242346, "grad_norm": 0.10050997138023376, "learning_rate": 1e-06, "loss": -0.0115, "step": 1024 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.078125, "completions/max_length": 4096.0, "completions/max_terminated_length": 2720.0, "completions/mean_length": 863.9564819335938, "completions/mean_terminated_length": 590.0545043945312, "completions/min_length": 129.0, "completions/min_terminated_length": 129.0, "epoch": 2.394284047827355, "grad_norm": 0.26464805006980896, "learning_rate": 1e-06, "loss": -0.0194, "num_tokens": 150263057.0, "reward": 0.5970982313156128, "reward_std": 0.15819180011749268, "rewards/verify_math_reward/mean": 0.5970982313156128, "rewards/verify_math_reward/std": 0.4907552897930145, "step": 1025 }, { "clip_ratio/high_max": 0.003802422565058805, "clip_ratio/high_mean": 0.0014096090581006138, "clip_ratio/low_mean": 0.0009652753342379583, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0023748844178044237, "epoch": 2.3966170895304755, "grad_norm": 0.1932213306427002, "learning_rate": 1e-06, "loss": -0.0194, "step": 1026 }, { "clip_ratio/high_max": 0.003587095154216513, "clip_ratio/high_mean": 0.0015879234324529534, "clip_ratio/low_mean": 0.0014265637000789866, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.003014487127074972, "epoch": 2.3989501312335957, "grad_norm": 0.1440485268831253, "learning_rate": 1e-06, "loss": -0.0198, "step": 1027 }, { "clip_ratio/high_max": 0.004565332594211213, "clip_ratio/high_mean": 0.002022788921749452, "clip_ratio/low_mean": 0.0018373069615336135, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0038600958650931716, "epoch": 2.4012831729367163, "grad_norm": 0.12027224898338318, "learning_rate": 1e-06, "loss": -0.02, "step": 1028 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0479910714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3797.0, "completions/mean_length": 706.1495971679688, "completions/mean_terminated_length": 535.26611328125, "completions/min_length": 154.0, "completions/min_terminated_length": 154.0, "epoch": 2.4036162146398365, "grad_norm": 0.34053853154182434, "learning_rate": 1e-06, "loss": -0.0137, "num_tokens": 150804543.0, "reward": 0.6238839626312256, "reward_std": 0.20429165661334991, "rewards/verify_math_reward/mean": 0.6238839030265808, "rewards/verify_math_reward/std": 0.48468026518821716, "step": 1029 }, { "clip_ratio/high_max": 0.003162131972203497, "clip_ratio/high_mean": 0.0013750297512160614, "clip_ratio/low_mean": 0.0011996002449450316, "clip_ratio/low_min": 7.283724698936567e-05, "clip_ratio/region_mean": 0.002574630045273807, "epoch": 2.405949256342957, "grad_norm": 0.22447893023490906, "learning_rate": 1e-06, "loss": -0.0139, "step": 1030 }, { "clip_ratio/high_max": 0.004086865184945054, "clip_ratio/high_mean": 0.001815404430089984, "clip_ratio/low_mean": 0.001895701378089143, "clip_ratio/low_min": 0.00011261732288403437, "clip_ratio/region_mean": 0.0037111057608854026, "epoch": 2.4082822980460774, "grad_norm": 0.16039365530014038, "learning_rate": 1e-06, "loss": -0.0143, "step": 1031 }, { "clip_ratio/high_max": 0.004898560611763969, "clip_ratio/high_mean": 0.002239122550236061, "clip_ratio/low_mean": 0.0024830453949107323, "clip_ratio/low_min": 0.00017482250041211955, "clip_ratio/region_mean": 0.004722167839645408, "epoch": 2.410615339749198, "grad_norm": 0.1284562051296234, "learning_rate": 1e-06, "loss": -0.0145, "step": 1032 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.044642857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 2333.0, "completions/mean_length": 713.5670166015625, "completions/mean_terminated_length": 555.5093383789062, "completions/min_length": 150.0, "completions/min_terminated_length": 150.0, "epoch": 2.4129483814523183, "grad_norm": 0.28918784856796265, "learning_rate": 1e-06, "loss": -0.0331, "num_tokens": 151371595.0, "reward": 0.621651828289032, "reward_std": 0.16766269505023956, "rewards/verify_math_reward/mean": 0.6216517686843872, "rewards/verify_math_reward/std": 0.485245943069458, "step": 1033 }, { "clip_ratio/high_max": 0.003264218321419321, "clip_ratio/high_mean": 0.0013273253534862306, "clip_ratio/low_mean": 0.0010011833292082883, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0023285087518161163, "epoch": 2.415281423155439, "grad_norm": 0.19808898866176605, "learning_rate": 1e-06, "loss": -0.0333, "step": 1034 }, { "clip_ratio/high_max": 0.004625303583452478, "clip_ratio/high_mean": 0.001847638424806064, "clip_ratio/low_mean": 0.0015420400559378322, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0033896783425007015, "epoch": 2.417614464858559, "grad_norm": 0.14128273725509644, "learning_rate": 1e-06, "loss": -0.0336, "step": 1035 }, { "clip_ratio/high_max": 0.005169005642528646, "clip_ratio/high_mean": 0.0021258723354549147, "clip_ratio/low_mean": 0.00210681775570265, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.004232690072967671, "epoch": 2.41994750656168, "grad_norm": 0.11212188750505447, "learning_rate": 1e-06, "loss": -0.0339, "step": 1036 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0680803571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3768.0, "completions/mean_length": 814.896240234375, "completions/mean_terminated_length": 575.1987915039062, "completions/min_length": 114.0, "completions/min_terminated_length": 114.0, "epoch": 2.4222805482648004, "grad_norm": 0.2794017493724823, "learning_rate": 1e-06, "loss": -0.0195, "num_tokens": 151943518.0, "reward": 0.5301339626312256, "reward_std": 0.1726948320865631, "rewards/verify_math_reward/mean": 0.5301339030265808, "rewards/verify_math_reward/std": 0.49936985969543457, "step": 1037 }, { "clip_ratio/high_max": 0.002562400317401625, "clip_ratio/high_mean": 0.001061193454916065, "clip_ratio/low_mean": 0.0012074264086550102, "clip_ratio/low_min": 5.487269663717598e-05, "clip_ratio/region_mean": 0.0022686198935844004, "epoch": 2.4246135899679206, "grad_norm": 0.1821690797805786, "learning_rate": 1e-06, "loss": -0.0196, "step": 1038 }, { "clip_ratio/high_max": 0.003560554796422366, "clip_ratio/high_mean": 0.0014375716218637535, "clip_ratio/low_mean": 0.0016191082977456972, "clip_ratio/low_min": 4.115452247788198e-05, "clip_ratio/region_mean": 0.0030566799687221646, "epoch": 2.4269466316710413, "grad_norm": 0.143729567527771, "learning_rate": 1e-06, "loss": -0.02, "step": 1039 }, { "clip_ratio/high_max": 0.004737823015602771, "clip_ratio/high_mean": 0.0017852852870419156, "clip_ratio/low_mean": 0.0022528952722495887, "clip_ratio/low_min": 7.278726843651384e-05, "clip_ratio/region_mean": 0.004038180661154911, "epoch": 2.4292796733741615, "grad_norm": 0.1117033064365387, "learning_rate": 1e-06, "loss": -0.0202, "step": 1040 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0502232142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3092.0, "completions/mean_length": 761.8928833007812, "completions/mean_terminated_length": 585.5886840820312, "completions/min_length": 110.0, "completions/min_terminated_length": 110.0, "epoch": 2.431612715077282, "grad_norm": 0.3087382912635803, "learning_rate": 1e-06, "loss": -0.0317, "num_tokens": 152537134.0, "reward": 0.5859375, "reward_std": 0.1717958152294159, "rewards/verify_math_reward/mean": 0.5859375, "rewards/verify_math_reward/std": 0.4928344786167145, "step": 1041 }, { "clip_ratio/high_max": 0.002987992818816565, "clip_ratio/high_mean": 0.0011159214009239804, "clip_ratio/low_mean": 0.000978822627075715, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.00209474401344778, "epoch": 2.4339457567804024, "grad_norm": 0.2627030313014984, "learning_rate": 1e-06, "loss": -0.0318, "step": 1042 }, { "clip_ratio/high_max": 0.0040967139502754435, "clip_ratio/high_mean": 0.0014933395395928528, "clip_ratio/low_mean": 0.0014490817229670938, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.00294242131349165, "epoch": 2.436278798483523, "grad_norm": 0.13277971744537354, "learning_rate": 1e-06, "loss": -0.0322, "step": 1043 }, { "clip_ratio/high_max": 0.005063831442384981, "clip_ratio/high_mean": 0.0017772947394405492, "clip_ratio/low_mean": 0.0020204046013532206, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0037976994062773883, "epoch": 2.4386118401866432, "grad_norm": 0.11958345025777817, "learning_rate": 1e-06, "loss": -0.0324, "step": 1044 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0669642857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3849.0, "completions/mean_length": 788.2042846679688, "completions/mean_terminated_length": 550.8026123046875, "completions/min_length": 141.0, "completions/min_terminated_length": 141.0, "epoch": 2.440944881889764, "grad_norm": 0.3247753083705902, "learning_rate": 1e-06, "loss": -0.0201, "num_tokens": 153086445.0, "reward": 0.5301339626312256, "reward_std": 0.1579296588897705, "rewards/verify_math_reward/mean": 0.5301339030265808, "rewards/verify_math_reward/std": 0.49936985969543457, "step": 1045 }, { "clip_ratio/high_max": 0.0025208994557033293, "clip_ratio/high_mean": 0.0009242404175893171, "clip_ratio/low_mean": 0.0009571269110892899, "clip_ratio/low_min": 3.4578148188302293e-05, "clip_ratio/region_mean": 0.0018813672941178083, "epoch": 2.443277923592884, "grad_norm": 0.1906348615884781, "learning_rate": 1e-06, "loss": -0.0202, "step": 1046 }, { "clip_ratio/high_max": 0.0031910837860777974, "clip_ratio/high_mean": 0.0012728060082736192, "clip_ratio/low_mean": 0.001656089747484657, "clip_ratio/low_min": 0.00011156158507219516, "clip_ratio/region_mean": 0.0029288957739481702, "epoch": 2.4456109652960047, "grad_norm": 0.1336827576160431, "learning_rate": 1e-06, "loss": -0.0205, "step": 1047 }, { "clip_ratio/high_max": 0.0038916826888453215, "clip_ratio/high_mean": 0.0015823885696590878, "clip_ratio/low_mean": 0.002292922683409415, "clip_ratio/low_min": 0.00014761991769773886, "clip_ratio/region_mean": 0.0038753113331040367, "epoch": 2.447944006999125, "grad_norm": 0.10803224891424179, "learning_rate": 1e-06, "loss": -0.0207, "step": 1048 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0479910714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 2814.0, "completions/mean_length": 724.0178833007812, "completions/mean_terminated_length": 554.03515625, "completions/min_length": 127.0, "completions/min_terminated_length": 127.0, "epoch": 2.4502770487022456, "grad_norm": 0.2474566102027893, "learning_rate": 1e-06, "loss": -0.0174, "num_tokens": 153643845.0, "reward": 0.629464328289032, "reward_std": 0.12598443031311035, "rewards/verify_math_reward/mean": 0.6294642686843872, "rewards/verify_math_reward/std": 0.4832179844379425, "step": 1049 }, { "clip_ratio/high_max": 0.0028653112167376094, "clip_ratio/high_mean": 0.0010218269417237025, "clip_ratio/low_mean": 0.0008725939605938038, "clip_ratio/low_min": 7.492721488233656e-05, "clip_ratio/region_mean": 0.0018944208859466016, "epoch": 2.452610090405366, "grad_norm": 0.17991343140602112, "learning_rate": 1e-06, "loss": -0.0175, "step": 1050 }, { "clip_ratio/high_max": 0.0033059757042792626, "clip_ratio/high_mean": 0.001222067721755593, "clip_ratio/low_mean": 0.001259532647964079, "clip_ratio/low_min": 7.492721488233656e-05, "clip_ratio/region_mean": 0.002481600364262704, "epoch": 2.4549431321084865, "grad_norm": 0.1296132355928421, "learning_rate": 1e-06, "loss": -0.0178, "step": 1051 }, { "clip_ratio/high_max": 0.003802577128226403, "clip_ratio/high_mean": 0.0014699396160722245, "clip_ratio/low_mean": 0.0016939273227762897, "clip_ratio/low_min": 6.422332808142528e-05, "clip_ratio/region_mean": 0.003163866938848514, "epoch": 2.457276173811607, "grad_norm": 0.10071777552366257, "learning_rate": 1e-06, "loss": -0.0179, "step": 1052 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0546875, "completions/max_length": 4096.0, "completions/max_terminated_length": 2693.0, "completions/mean_length": 767.7120971679688, "completions/mean_terminated_length": 575.1664428710938, "completions/min_length": 142.0, "completions/min_terminated_length": 142.0, "epoch": 2.4596092155147273, "grad_norm": 0.28964322805404663, "learning_rate": 1e-06, "loss": -0.0239, "num_tokens": 154225243.0, "reward": 0.6116071939468384, "reward_std": 0.16901634633541107, "rewards/verify_math_reward/mean": 0.6116071343421936, "rewards/verify_math_reward/std": 0.48765692114830017, "step": 1053 }, { "clip_ratio/high_max": 0.0029480768789653666, "clip_ratio/high_mean": 0.0010633877973305061, "clip_ratio/low_mean": 0.001049758659064537, "clip_ratio/low_min": 2.3937189325806685e-05, "clip_ratio/region_mean": 0.0021131464454811066, "epoch": 2.4619422572178475, "grad_norm": 0.18854869902133942, "learning_rate": 1e-06, "loss": -0.0235, "step": 1054 }, { "clip_ratio/high_max": 0.003991086050518788, "clip_ratio/high_mean": 0.001428112209396204, "clip_ratio/low_mean": 0.0014330233934742864, "clip_ratio/low_min": 5.8576022638590075e-05, "clip_ratio/region_mean": 0.002861135588318575, "epoch": 2.464275298920968, "grad_norm": 0.14923107624053955, "learning_rate": 1e-06, "loss": -0.0238, "step": 1055 }, { "clip_ratio/high_max": 0.004766242986079305, "clip_ratio/high_mean": 0.0017373336449963972, "clip_ratio/low_mean": 0.002014619982219301, "clip_ratio/low_min": 0.00011799667845480144, "clip_ratio/region_mean": 0.0037519536272156984, "epoch": 2.466608340624089, "grad_norm": 0.11867931485176086, "learning_rate": 1e-06, "loss": -0.0241, "step": 1056 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0848214285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3709.0, "completions/mean_length": 873.6473388671875, "completions/mean_terminated_length": 574.990234375, "completions/min_length": 123.0, "completions/min_terminated_length": 123.0, "epoch": 2.468941382327209, "grad_norm": 0.25968077778816223, "learning_rate": 1e-06, "loss": -0.0254, "num_tokens": 154785479.0, "reward": 0.5993303656578064, "reward_std": 0.11888891458511353, "rewards/verify_math_reward/mean": 0.5993303656578064, "rewards/verify_math_reward/std": 0.49030786752700806, "step": 1057 }, { "clip_ratio/high_max": 0.002135501432348974, "clip_ratio/high_mean": 0.0007720635148871224, "clip_ratio/low_mean": 0.000697759975992085, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014698235063406173, "epoch": 2.4712744240303297, "grad_norm": 0.16340304911136627, "learning_rate": 1e-06, "loss": -0.0255, "step": 1058 }, { "clip_ratio/high_max": 0.002744408506259788, "clip_ratio/high_mean": 0.0010452386650285916, "clip_ratio/low_mean": 0.001065333271981217, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002110571960656671, "epoch": 2.47360746573345, "grad_norm": 0.11369729042053223, "learning_rate": 1e-06, "loss": -0.0258, "step": 1059 }, { "clip_ratio/high_max": 0.003470261173788458, "clip_ratio/high_mean": 0.0012793511559721082, "clip_ratio/low_mean": 0.0015054304403747665, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0027847816090798005, "epoch": 2.4759405074365706, "grad_norm": 0.09186240285634995, "learning_rate": 1e-06, "loss": -0.0259, "step": 1060 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0691964285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 2097.0, "completions/mean_length": 805.4241333007812, "completions/mean_terminated_length": 560.8009643554688, "completions/min_length": 160.0, "completions/min_terminated_length": 160.0, "epoch": 2.478273549139691, "grad_norm": 0.3037368357181549, "learning_rate": 1e-06, "loss": -0.0035, "num_tokens": 155336531.0, "reward": 0.6194196939468384, "reward_std": 0.16634675860404968, "rewards/verify_math_reward/mean": 0.6194196343421936, "rewards/verify_math_reward/std": 0.48580074310302734, "step": 1061 }, { "clip_ratio/high_max": 0.0023709857268841006, "clip_ratio/high_mean": 0.0010360522828705143, "clip_ratio/low_mean": 0.0010782685421872884, "clip_ratio/low_min": 3.767329690163024e-05, "clip_ratio/region_mean": 0.002114320799591951, "epoch": 2.4806065908428114, "grad_norm": 0.22804927825927734, "learning_rate": 1e-06, "loss": -0.0035, "step": 1062 }, { "clip_ratio/high_max": 0.003120264154858887, "clip_ratio/high_mean": 0.001321113551966846, "clip_ratio/low_mean": 0.0016703110341040883, "clip_ratio/low_min": 3.767329690163024e-05, "clip_ratio/region_mean": 0.0029914245824329555, "epoch": 2.4829396325459316, "grad_norm": 0.15553656220436096, "learning_rate": 1e-06, "loss": -0.004, "step": 1063 }, { "clip_ratio/high_max": 0.0037984287046128884, "clip_ratio/high_mean": 0.0016010045874281786, "clip_ratio/low_mean": 0.0022939012123970315, "clip_ratio/low_min": 5.650994717143476e-05, "clip_ratio/region_mean": 0.003894905748893507, "epoch": 2.4852726742490523, "grad_norm": 0.11585551500320435, "learning_rate": 1e-06, "loss": -0.0042, "step": 1064 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0558035714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3777.0, "completions/mean_length": 786.5223388671875, "completions/mean_terminated_length": 590.9266967773438, "completions/min_length": 147.0, "completions/min_terminated_length": 147.0, "epoch": 2.4876057159521725, "grad_norm": 0.29605942964553833, "learning_rate": 1e-06, "loss": -0.0085, "num_tokens": 155922663.0, "reward": 0.6104910969734192, "reward_std": 0.15022864937782288, "rewards/verify_math_reward/mean": 0.6104910969734192, "rewards/verify_math_reward/std": 0.48791125416755676, "step": 1065 }, { "clip_ratio/high_max": 0.002890609801397659, "clip_ratio/high_mean": 0.0012244143144926056, "clip_ratio/low_mean": 0.000829600052384194, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0020540143668767996, "epoch": 2.489938757655293, "grad_norm": 0.19091498851776123, "learning_rate": 1e-06, "loss": -0.0086, "step": 1066 }, { "clip_ratio/high_max": 0.0038265396942733787, "clip_ratio/high_mean": 0.0016040206246543676, "clip_ratio/low_mean": 0.0013420886498352047, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0029461092926794663, "epoch": 2.4922717993584134, "grad_norm": 0.13746377825737, "learning_rate": 1e-06, "loss": -0.0089, "step": 1067 }, { "clip_ratio/high_max": 0.0045402543764794245, "clip_ratio/high_mean": 0.0018906939731095918, "clip_ratio/low_mean": 0.0017647529020905495, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.003655446882476099, "epoch": 2.494604841061534, "grad_norm": 0.1171862781047821, "learning_rate": 1e-06, "loss": -0.0091, "step": 1068 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0803571428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3517.0, "completions/mean_length": 915.8795166015625, "completions/mean_terminated_length": 638.0048828125, "completions/min_length": 121.0, "completions/min_terminated_length": 121.0, "epoch": 2.4969378827646542, "grad_norm": 0.26330357789993286, "learning_rate": 1e-06, "loss": -0.0126, "num_tokens": 156547355.0, "reward": 0.5412946939468384, "reward_std": 0.16360372304916382, "rewards/verify_math_reward/mean": 0.5412946343421936, "rewards/verify_math_reward/std": 0.49857014417648315, "step": 1069 }, { "clip_ratio/high_max": 0.0027068074996350333, "clip_ratio/high_mean": 0.0010278240879415534, "clip_ratio/low_mean": 0.0008681679664732656, "clip_ratio/low_min": 1.686909672571346e-05, "clip_ratio/region_mean": 0.0018959920780616812, "epoch": 2.499270924467775, "grad_norm": 0.20774012804031372, "learning_rate": 1e-06, "loss": -0.0126, "step": 1070 }, { "clip_ratio/high_max": 0.003633194231952075, "clip_ratio/high_mean": 0.0013518992018362042, "clip_ratio/low_mean": 0.001254269365745131, "clip_ratio/low_min": 1.686909672571346e-05, "clip_ratio/region_mean": 0.002606168585771229, "epoch": 2.5016039661708955, "grad_norm": 0.13715651631355286, "learning_rate": 1e-06, "loss": -0.0129, "step": 1071 }, { "clip_ratio/high_max": 0.0043079474053229205, "clip_ratio/high_mean": 0.0015909263820503838, "clip_ratio/low_mean": 0.0016765490545367356, "clip_ratio/low_min": 3.373819345142692e-05, "clip_ratio/region_mean": 0.003267475389293395, "epoch": 2.5039370078740157, "grad_norm": 0.1067027896642685, "learning_rate": 1e-06, "loss": -0.0131, "step": 1072 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0457589285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3241.0, "completions/mean_length": 727.6361694335938, "completions/mean_terminated_length": 566.1122436523438, "completions/min_length": 131.0, "completions/min_terminated_length": 131.0, "epoch": 2.506270049577136, "grad_norm": 0.27734288573265076, "learning_rate": 1e-06, "loss": -0.0314, "num_tokens": 157116221.0, "reward": 0.6517857313156128, "reward_std": 0.15529470145702362, "rewards/verify_math_reward/mean": 0.6517857313156128, "rewards/verify_math_reward/std": 0.47667041420936584, "step": 1073 }, { "clip_ratio/high_max": 0.002997396564751398, "clip_ratio/high_mean": 0.0011699941314873286, "clip_ratio/low_mean": 0.0009021184605444432, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0020721126056741923, "epoch": 2.5086030912802566, "grad_norm": 0.20268534123897552, "learning_rate": 1e-06, "loss": -0.0315, "step": 1074 }, { "clip_ratio/high_max": 0.003807239540037699, "clip_ratio/high_mean": 0.0015222434667521156, "clip_ratio/low_mean": 0.0013570738628914114, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.00287931728962576, "epoch": 2.5109361329833773, "grad_norm": 0.33463749289512634, "learning_rate": 1e-06, "loss": -0.0318, "step": 1075 }, { "clip_ratio/high_max": 0.004653554715332575, "clip_ratio/high_mean": 0.0019292644610686693, "clip_ratio/low_mean": 0.0018876706453738734, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.003816935117356479, "epoch": 2.5132691746864975, "grad_norm": 0.11448302119970322, "learning_rate": 1e-06, "loss": -0.0321, "step": 1076 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0647321428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 2426.0, "completions/mean_length": 813.2756958007812, "completions/mean_terminated_length": 586.0704345703125, "completions/min_length": 144.0, "completions/min_terminated_length": 144.0, "epoch": 2.515602216389618, "grad_norm": 0.3155544102191925, "learning_rate": 1e-06, "loss": -0.0349, "num_tokens": 157701124.0, "reward": 0.5993303656578064, "reward_std": 0.1497400999069214, "rewards/verify_math_reward/mean": 0.5993303656578064, "rewards/verify_math_reward/std": 0.49030786752700806, "step": 1077 }, { "clip_ratio/high_max": 0.0031389233699883334, "clip_ratio/high_mean": 0.0011203710973859415, "clip_ratio/low_mean": 0.000950996112806024, "clip_ratio/low_min": 3.239013221900677e-05, "clip_ratio/region_mean": 0.002071367132884916, "epoch": 2.5179352580927383, "grad_norm": 0.21796831488609314, "learning_rate": 1e-06, "loss": -0.035, "step": 1078 }, { "clip_ratio/high_max": 0.003695586034154985, "clip_ratio/high_mean": 0.0013653955793415662, "clip_ratio/low_mean": 0.0015059499164635781, "clip_ratio/low_min": 0.00015608488320140168, "clip_ratio/region_mean": 0.002871345466701314, "epoch": 2.520268299795859, "grad_norm": 0.14433956146240234, "learning_rate": 1e-06, "loss": -0.0353, "step": 1079 }, { "clip_ratio/high_max": 0.0045968017730046995, "clip_ratio/high_mean": 0.001680445186138968, "clip_ratio/low_mean": 0.001973265316337347, "clip_ratio/low_min": 0.00020485442655626684, "clip_ratio/region_mean": 0.0036537104679155163, "epoch": 2.522601341498979, "grad_norm": 0.11486305296421051, "learning_rate": 1e-06, "loss": -0.0355, "step": 1080 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0591517857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 2379.0, "completions/mean_length": 799.3795166015625, "completions/mean_terminated_length": 592.11865234375, "completions/min_length": 184.0, "completions/min_terminated_length": 184.0, "epoch": 2.5249343832021, "grad_norm": 0.345584899187088, "learning_rate": 1e-06, "loss": -0.0118, "num_tokens": 158311280.0, "reward": 0.5613839626312256, "reward_std": 0.15646544098854065, "rewards/verify_math_reward/mean": 0.5613839030265808, "rewards/verify_math_reward/std": 0.496494859457016, "step": 1081 }, { "clip_ratio/high_max": 0.005575609298830386, "clip_ratio/high_mean": 0.001612366193512571, "clip_ratio/low_mean": 0.0009921136361299432, "clip_ratio/low_min": 2.9076529244775884e-05, "clip_ratio/region_mean": 0.002604479763249401, "epoch": 2.52726742490522, "grad_norm": 1.624076247215271, "learning_rate": 1e-06, "loss": -0.0117, "step": 1082 }, { "clip_ratio/high_max": 0.00628514307027217, "clip_ratio/high_mean": 0.0019201068353140727, "clip_ratio/low_mean": 0.00151008740067482, "clip_ratio/low_min": 2.9076529244775884e-05, "clip_ratio/region_mean": 0.0034301941996091045, "epoch": 2.5296004666083407, "grad_norm": 0.19106411933898926, "learning_rate": 1e-06, "loss": -0.0122, "step": 1083 }, { "clip_ratio/high_max": 0.006659940554527566, "clip_ratio/high_mean": 0.002151996159227565, "clip_ratio/low_mean": 0.0019737910442927387, "clip_ratio/low_min": 6.435006798710674e-05, "clip_ratio/region_mean": 0.0041257871489506215, "epoch": 2.531933508311461, "grad_norm": 0.1398921012878418, "learning_rate": 1e-06, "loss": -0.0124, "step": 1084 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0703125, "completions/max_length": 4096.0, "completions/max_terminated_length": 3826.0, "completions/mean_length": 823.0234985351562, "completions/mean_terminated_length": 575.4873657226562, "completions/min_length": 84.0, "completions/min_terminated_length": 84.0, "epoch": 2.5342665500145816, "grad_norm": 0.350163996219635, "learning_rate": 1e-06, "loss": -0.0294, "num_tokens": 158885749.0, "reward": 0.606026828289032, "reward_std": 0.16793116927146912, "rewards/verify_math_reward/mean": 0.6060267686843872, "rewards/verify_math_reward/std": 0.48890194296836853, "step": 1085 }, { "clip_ratio/high_max": 0.003848471853416413, "clip_ratio/high_mean": 0.0013531698641600087, "clip_ratio/low_mean": 0.0017304592802247498, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.003083629038883373, "epoch": 2.536599591717702, "grad_norm": 0.3339444696903229, "learning_rate": 1e-06, "loss": -0.0295, "step": 1086 }, { "clip_ratio/high_max": 0.004930611539748497, "clip_ratio/high_mean": 0.001797224369511241, "clip_ratio/low_mean": 0.0024601256409368943, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.00425734999589622, "epoch": 2.5389326334208224, "grad_norm": 0.29226914048194885, "learning_rate": 1e-06, "loss": -0.0299, "step": 1087 }, { "clip_ratio/high_max": 0.005953012121608481, "clip_ratio/high_mean": 0.0021998797783453483, "clip_ratio/low_mean": 0.003039024282770697, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.005238904079305939, "epoch": 2.5412656751239426, "grad_norm": 0.24573369324207306, "learning_rate": 1e-06, "loss": -0.0301, "step": 1088 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.060267857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 2453.0, "completions/mean_length": 793.5736694335938, "completions/mean_terminated_length": 581.7791137695312, "completions/min_length": 155.0, "completions/min_terminated_length": 155.0, "epoch": 2.5435987168270633, "grad_norm": 0.3311831057071686, "learning_rate": 1e-06, "loss": -0.0404, "num_tokens": 159457743.0, "reward": 0.5926339626312256, "reward_std": 0.19513778388500214, "rewards/verify_math_reward/mean": 0.5926339030265808, "rewards/verify_math_reward/std": 0.49161845445632935, "step": 1089 }, { "clip_ratio/high_max": 0.003931187566195149, "clip_ratio/high_mean": 0.0015525297130807303, "clip_ratio/low_mean": 0.001172068135929294, "clip_ratio/low_min": 1.2820512893085834e-05, "clip_ratio/region_mean": 0.0027245978999417275, "epoch": 2.545931758530184, "grad_norm": 0.21334409713745117, "learning_rate": 1e-06, "loss": -0.0406, "step": 1090 }, { "clip_ratio/high_max": 0.0049241842061746866, "clip_ratio/high_mean": 0.0020290440224925987, "clip_ratio/low_mean": 0.0016544076970603783, "clip_ratio/low_min": 5.1282051572343335e-05, "clip_ratio/region_mean": 0.0036834517813986167, "epoch": 2.548264800233304, "grad_norm": 0.15283729135990143, "learning_rate": 1e-06, "loss": -0.041, "step": 1091 }, { "clip_ratio/high_max": 0.0058032793895108625, "clip_ratio/high_mean": 0.0023768597893649712, "clip_ratio/low_mean": 0.0023178711198852398, "clip_ratio/low_min": 5.1282051572343335e-05, "clip_ratio/region_mean": 0.00469473103294149, "epoch": 2.5505978419364244, "grad_norm": 0.1183098778128624, "learning_rate": 1e-06, "loss": -0.0412, "step": 1092 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0658482142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 4003.0, "completions/mean_length": 872.3192138671875, "completions/mean_terminated_length": 645.0823974609375, "completions/min_length": 101.0, "completions/min_terminated_length": 101.0, "epoch": 2.552930883639545, "grad_norm": 0.2866666316986084, "learning_rate": 1e-06, "loss": -0.0194, "num_tokens": 160082485.0, "reward": 0.5479910969734192, "reward_std": 0.17127293348312378, "rewards/verify_math_reward/mean": 0.5479910969734192, "rewards/verify_math_reward/std": 0.49796950817108154, "step": 1093 }, { "clip_ratio/high_max": 0.0028096948444726877, "clip_ratio/high_mean": 0.0011181901863892563, "clip_ratio/low_mean": 0.0010909732518484816, "clip_ratio/low_min": 9.11211554921465e-06, "clip_ratio/region_mean": 0.002209163452789653, "epoch": 2.5552639253426657, "grad_norm": 0.19223226606845856, "learning_rate": 1e-06, "loss": -0.0195, "step": 1094 }, { "clip_ratio/high_max": 0.0033654441504040733, "clip_ratio/high_mean": 0.0014265891077229753, "clip_ratio/low_mean": 0.0017713801280478947, "clip_ratio/low_min": 1.82242310984293e-05, "clip_ratio/region_mean": 0.0031979693012544885, "epoch": 2.557596967045786, "grad_norm": 0.14704082906246185, "learning_rate": 1e-06, "loss": -0.0199, "step": 1095 }, { "clip_ratio/high_max": 0.004293163772672415, "clip_ratio/high_mean": 0.0017444030527258292, "clip_ratio/low_mean": 0.0021700088764191605, "clip_ratio/low_min": 4.556057683657855e-05, "clip_ratio/region_mean": 0.003914411907317117, "epoch": 2.5599300087489065, "grad_norm": 0.11653164029121399, "learning_rate": 1e-06, "loss": -0.0201, "step": 1096 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0725446428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 2548.0, "completions/mean_length": 833.404052734375, "completions/mean_terminated_length": 578.2069702148438, "completions/min_length": 161.0, "completions/min_terminated_length": 161.0, "epoch": 2.5622630504520267, "grad_norm": 0.33775022625923157, "learning_rate": 1e-06, "loss": -0.0215, "num_tokens": 160650055.0, "reward": 0.5424107313156128, "reward_std": 0.2035776823759079, "rewards/verify_math_reward/mean": 0.5424107313156128, "rewards/verify_math_reward/std": 0.4984763562679291, "step": 1097 }, { "clip_ratio/high_max": 0.0029904611146776006, "clip_ratio/high_mean": 0.0013147650333849015, "clip_ratio/low_mean": 0.0014533434914483223, "clip_ratio/low_min": 0.00011978569546045037, "clip_ratio/region_mean": 0.002768108490272425, "epoch": 2.5645960921551474, "grad_norm": 1.1509780883789062, "learning_rate": 1e-06, "loss": -0.0216, "step": 1098 }, { "clip_ratio/high_max": 0.004144865441048751, "clip_ratio/high_mean": 0.0018176337262048037, "clip_ratio/low_mean": 0.0021568329830188304, "clip_ratio/low_min": 0.00026173672085860744, "clip_ratio/region_mean": 0.003974466832005419, "epoch": 2.5669291338582676, "grad_norm": 0.17197741568088531, "learning_rate": 1e-06, "loss": -0.022, "step": 1099 }, { "clip_ratio/high_max": 0.004748230101540685, "clip_ratio/high_mean": 0.0021249136407277547, "clip_ratio/low_mean": 0.002907815658545587, "clip_ratio/low_min": 0.00027799932649941184, "clip_ratio/region_mean": 0.0050327292701695114, "epoch": 2.5692621755613883, "grad_norm": 0.1491142213344574, "learning_rate": 1e-06, "loss": -0.0223, "step": 1100 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0803571428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 2076.0, "completions/mean_length": 858.4241333007812, "completions/mean_terminated_length": 575.5291137695312, "completions/min_length": 150.0, "completions/min_terminated_length": 150.0, "epoch": 2.5715952172645085, "grad_norm": 0.3079274296760559, "learning_rate": 1e-06, "loss": -0.0166, "num_tokens": 161221563.0, "reward": 0.6439732313156128, "reward_std": 0.14632193744182587, "rewards/verify_math_reward/mean": 0.6439732313156128, "rewards/verify_math_reward/std": 0.47909072041511536, "step": 1101 }, { "clip_ratio/high_max": 0.0032717546346248128, "clip_ratio/high_mean": 0.0012973258817510214, "clip_ratio/low_mean": 0.0009182204366879887, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002215546373918187, "epoch": 2.573928258967629, "grad_norm": 0.19289128482341766, "learning_rate": 1e-06, "loss": -0.0168, "step": 1102 }, { "clip_ratio/high_max": 0.0042270737467333674, "clip_ratio/high_mean": 0.0016167840622074436, "clip_ratio/low_mean": 0.0012921032821395784, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002908887283410877, "epoch": 2.5762613006707493, "grad_norm": 0.1452503502368927, "learning_rate": 1e-06, "loss": -0.0171, "step": 1103 }, { "clip_ratio/high_max": 0.005087190322228707, "clip_ratio/high_mean": 0.0019440584219410084, "clip_ratio/low_mean": 0.0018177765105065191, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.00376183489424875, "epoch": 2.57859434237387, "grad_norm": 0.11627428978681564, "learning_rate": 1e-06, "loss": -0.0173, "step": 1104 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0747767857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 4034.0, "completions/mean_length": 826.4453735351562, "completions/mean_terminated_length": 562.1990356445312, "completions/min_length": 166.0, "completions/min_terminated_length": 166.0, "epoch": 2.5809273840769906, "grad_norm": 0.2613348662853241, "learning_rate": 1e-06, "loss": -0.004, "num_tokens": 161774562.0, "reward": 0.5926339626312256, "reward_std": 0.1383524388074875, "rewards/verify_math_reward/mean": 0.5926339030265808, "rewards/verify_math_reward/std": 0.49161845445632935, "step": 1105 }, { "clip_ratio/high_max": 0.0022318987830658443, "clip_ratio/high_mean": 0.0008069053546932992, "clip_ratio/low_mean": 0.0008397562414756976, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0016466615634271875, "epoch": 2.583260425780111, "grad_norm": 0.5397358536720276, "learning_rate": 1e-06, "loss": -0.004, "step": 1106 }, { "clip_ratio/high_max": 0.002918021535151638, "clip_ratio/high_mean": 0.0010527163904043846, "clip_ratio/low_mean": 0.001265492941456614, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0023182092918432318, "epoch": 2.585593467483231, "grad_norm": 0.11904197931289673, "learning_rate": 1e-06, "loss": -0.0043, "step": 1107 }, { "clip_ratio/high_max": 0.003772616939386353, "clip_ratio/high_mean": 0.0013279205668368377, "clip_ratio/low_mean": 0.0016410615971835796, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002968982240417972, "epoch": 2.5879265091863517, "grad_norm": 0.09788796305656433, "learning_rate": 1e-06, "loss": -0.0045, "step": 1108 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0859375, "completions/max_length": 4096.0, "completions/max_terminated_length": 3946.0, "completions/mean_length": 897.9397583007812, "completions/mean_terminated_length": 597.2673950195312, "completions/min_length": 118.0, "completions/min_terminated_length": 118.0, "epoch": 2.5902595508894724, "grad_norm": 0.34306058287620544, "learning_rate": 1e-06, "loss": -0.018, "num_tokens": 162356316.0, "reward": 0.551339328289032, "reward_std": 0.15857087075710297, "rewards/verify_math_reward/mean": 0.5513392686843872, "rewards/verify_math_reward/std": 0.4976350665092468, "step": 1109 }, { "clip_ratio/high_max": 0.0027331740930094384, "clip_ratio/high_mean": 0.0011854879048769362, "clip_ratio/low_mean": 0.00094071699277265, "clip_ratio/low_min": 3.4935717849293724e-05, "clip_ratio/region_mean": 0.0021262049704091623, "epoch": 2.5925925925925926, "grad_norm": 0.1935845911502838, "learning_rate": 1e-06, "loss": -0.0182, "step": 1110 }, { "clip_ratio/high_max": 0.0034374186434433796, "clip_ratio/high_mean": 0.0015694707362854388, "clip_ratio/low_mean": 0.0013292152434587479, "clip_ratio/low_min": 1.1645239283097908e-05, "clip_ratio/region_mean": 0.002898686027037911, "epoch": 2.5949256342957128, "grad_norm": 0.12884077429771423, "learning_rate": 1e-06, "loss": -0.0185, "step": 1111 }, { "clip_ratio/high_max": 0.004478572896914557, "clip_ratio/high_mean": 0.0018549358937889338, "clip_ratio/low_mean": 0.0018071835038426798, "clip_ratio/low_min": 3.4935717849293724e-05, "clip_ratio/region_mean": 0.0036621194158215076, "epoch": 2.5972586759988334, "grad_norm": 0.10767252743244171, "learning_rate": 1e-06, "loss": -0.0187, "step": 1112 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0848214285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 4082.0, "completions/mean_length": 886.864990234375, "completions/mean_terminated_length": 589.4329223632812, "completions/min_length": 145.0, "completions/min_terminated_length": 145.0, "epoch": 2.599591717701954, "grad_norm": 0.2987234890460968, "learning_rate": 1e-06, "loss": -0.0428, "num_tokens": 162926675.0, "reward": 0.6227678656578064, "reward_std": 0.1587986946105957, "rewards/verify_math_reward/mean": 0.6227678656578064, "rewards/verify_math_reward/std": 0.4849644899368286, "step": 1113 }, { "clip_ratio/high_max": 0.0027459955708764028, "clip_ratio/high_mean": 0.0011337052474118536, "clip_ratio/low_mean": 0.000820765320895589, "clip_ratio/low_min": 3.939075759262778e-05, "clip_ratio/region_mean": 0.001954470601049252, "epoch": 2.6019247594050743, "grad_norm": 0.190657839179039, "learning_rate": 1e-06, "loss": -0.0428, "step": 1114 }, { "clip_ratio/high_max": 0.0033297136396868154, "clip_ratio/high_mean": 0.001389202885548002, "clip_ratio/low_mean": 0.0012262786003702786, "clip_ratio/low_min": 5.283178325043991e-05, "clip_ratio/region_mean": 0.002615481505927164, "epoch": 2.604257801108195, "grad_norm": 0.13383787870407104, "learning_rate": 1e-06, "loss": -0.0432, "step": 1115 }, { "clip_ratio/high_max": 0.0041082084499066696, "clip_ratio/high_mean": 0.0017446737365389708, "clip_ratio/low_mean": 0.0016148812173923943, "clip_ratio/low_min": 4.226542660035193e-05, "clip_ratio/region_mean": 0.0033595548738958314, "epoch": 2.606590842811315, "grad_norm": 0.10907389968633652, "learning_rate": 1e-06, "loss": -0.0434, "step": 1116 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0669642857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3151.0, "completions/mean_length": 803.5089721679688, "completions/mean_terminated_length": 567.2057495117188, "completions/min_length": 111.0, "completions/min_terminated_length": 111.0, "epoch": 2.608923884514436, "grad_norm": 0.3283275365829468, "learning_rate": 1e-06, "loss": -0.0406, "num_tokens": 163485243.0, "reward": 0.5881696939468384, "reward_std": 0.1761464774608612, "rewards/verify_math_reward/mean": 0.5881696343421936, "rewards/verify_math_reward/std": 0.4924395978450775, "step": 1117 }, { "clip_ratio/high_max": 0.00374542735517025, "clip_ratio/high_mean": 0.0013177727232687175, "clip_ratio/low_mean": 0.0009153561750281369, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0022331289219437167, "epoch": 2.611256926217556, "grad_norm": 0.19675523042678833, "learning_rate": 1e-06, "loss": -0.0408, "step": 1118 }, { "clip_ratio/high_max": 0.0049266470814473, "clip_ratio/high_mean": 0.001697329538728809, "clip_ratio/low_mean": 0.001339159811323043, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0030364893973455764, "epoch": 2.6135899679206767, "grad_norm": 0.14947852492332458, "learning_rate": 1e-06, "loss": -0.0411, "step": 1119 }, { "clip_ratio/high_max": 0.006256492335523944, "clip_ratio/high_mean": 0.002049953083769651, "clip_ratio/low_mean": 0.001770440758264158, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0038203938311198726, "epoch": 2.615923009623797, "grad_norm": 0.12289398163557053, "learning_rate": 1e-06, "loss": -0.0413, "step": 1120 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0736607142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3710.0, "completions/mean_length": 768.1875610351562, "completions/mean_terminated_length": 503.5662841796875, "completions/min_length": 119.0, "completions/min_terminated_length": 119.0, "epoch": 2.6182560513269175, "grad_norm": 0.45103728771209717, "learning_rate": 1e-06, "loss": -0.042, "num_tokens": 163984123.0, "reward": 0.6116071939468384, "reward_std": 0.18896587193012238, "rewards/verify_math_reward/mean": 0.6116071343421936, "rewards/verify_math_reward/std": 0.4876568913459778, "step": 1121 }, { "clip_ratio/high_max": 0.003312238783109933, "clip_ratio/high_mean": 0.001537856223876588, "clip_ratio/low_mean": 0.0012754857962136157, "clip_ratio/low_min": 8.466951476293616e-05, "clip_ratio/region_mean": 0.002813342034642119, "epoch": 2.6205890930300377, "grad_norm": 0.2174340784549713, "learning_rate": 1e-06, "loss": -0.0422, "step": 1122 }, { "clip_ratio/high_max": 0.004384795640362427, "clip_ratio/high_mean": 0.00199497421272099, "clip_ratio/low_mean": 0.0019955076786573045, "clip_ratio/low_min": 0.00017309342001681216, "clip_ratio/region_mean": 0.003990481869550422, "epoch": 2.6229221347331584, "grad_norm": 0.16953922808170319, "learning_rate": 1e-06, "loss": -0.0426, "step": 1123 }, { "clip_ratio/high_max": 0.005155437786015682, "clip_ratio/high_mean": 0.002387687381997239, "clip_ratio/low_mean": 0.002651596034411341, "clip_ratio/low_min": 0.00022866368817631155, "clip_ratio/region_mean": 0.005039283467340283, "epoch": 2.625255176436279, "grad_norm": 0.13442985713481903, "learning_rate": 1e-06, "loss": -0.0429, "step": 1124 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0580357142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3118.0, "completions/mean_length": 789.3471069335938, "completions/mean_terminated_length": 585.6196899414062, "completions/min_length": 119.0, "completions/min_terminated_length": 119.0, "epoch": 2.6275882181393992, "grad_norm": 0.3350958526134491, "learning_rate": 1e-06, "loss": 0.0022, "num_tokens": 164563050.0, "reward": 0.668526828289032, "reward_std": 0.17322292923927307, "rewards/verify_math_reward/mean": 0.6685267686843872, "rewards/verify_math_reward/std": 0.4710056483745575, "step": 1125 }, { "clip_ratio/high_max": 0.0037342222058214247, "clip_ratio/high_mean": 0.0013656187693413813, "clip_ratio/low_mean": 0.0012035143245157087, "clip_ratio/low_min": 5.504986438609194e-05, "clip_ratio/region_mean": 0.0025691330520203337, "epoch": 2.6299212598425195, "grad_norm": 0.25795790553092957, "learning_rate": 1e-06, "loss": 0.002, "step": 1126 }, { "clip_ratio/high_max": 0.005171180404431652, "clip_ratio/high_mean": 0.0018295465124538168, "clip_ratio/low_mean": 0.0016464981163153425, "clip_ratio/low_min": 9.627348299545702e-05, "clip_ratio/region_mean": 0.003476044614217244, "epoch": 2.63225430154564, "grad_norm": 0.19608137011528015, "learning_rate": 1e-06, "loss": 0.0016, "step": 1127 }, { "clip_ratio/high_max": 0.005751778153353371, "clip_ratio/high_mean": 0.0020953356251993682, "clip_ratio/low_mean": 0.0022351800471369643, "clip_ratio/low_min": 0.00011645823542494327, "clip_ratio/region_mean": 0.004330515774199739, "epoch": 2.6345873432487608, "grad_norm": 0.1455148607492447, "learning_rate": 1e-06, "loss": 0.0014, "step": 1128 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0647321428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 2959.0, "completions/mean_length": 766.8638916015625, "completions/mean_terminated_length": 536.4463500976562, "completions/min_length": 148.0, "completions/min_terminated_length": 148.0, "epoch": 2.636920384951881, "grad_norm": 0.34519731998443604, "learning_rate": 1e-06, "loss": -0.0228, "num_tokens": 165093104.0, "reward": 0.6618303656578064, "reward_std": 0.17356063425540924, "rewards/verify_math_reward/mean": 0.6618303656578064, "rewards/verify_math_reward/std": 0.4733508229255676, "step": 1129 }, { "clip_ratio/high_max": 0.003344809461850673, "clip_ratio/high_mean": 0.0014446555214817636, "clip_ratio/low_mean": 0.0011635191840468906, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002608174632769078, "epoch": 2.6392534266550016, "grad_norm": 0.2136227935552597, "learning_rate": 1e-06, "loss": -0.023, "step": 1130 }, { "clip_ratio/high_max": 0.004392346090753563, "clip_ratio/high_mean": 0.0019184709672117606, "clip_ratio/low_mean": 0.001764910397469066, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0036833814083365723, "epoch": 2.641586468358122, "grad_norm": 0.15250639617443085, "learning_rate": 1e-06, "loss": -0.0234, "step": 1131 }, { "clip_ratio/high_max": 0.005124148825416341, "clip_ratio/high_mean": 0.0023066592548275366, "clip_ratio/low_mean": 0.002405812541837804, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.004712471825769171, "epoch": 2.6439195100612425, "grad_norm": 0.12942498922348022, "learning_rate": 1e-06, "loss": -0.0237, "step": 1132 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0390625, "completions/max_length": 4096.0, "completions/max_terminated_length": 2601.0, "completions/mean_length": 676.0736694335938, "completions/mean_terminated_length": 537.05224609375, "completions/min_length": 153.0, "completions/min_terminated_length": 153.0, "epoch": 2.6462525517643627, "grad_norm": 0.35268962383270264, "learning_rate": 1e-06, "loss": -0.027, "num_tokens": 165650762.0, "reward": 0.6662946939468384, "reward_std": 0.19700364768505096, "rewards/verify_math_reward/mean": 0.6662946343421936, "rewards/verify_math_reward/std": 0.47179922461509705, "step": 1133 }, { "clip_ratio/high_max": 0.0037898902883171104, "clip_ratio/high_mean": 0.0015644764716853388, "clip_ratio/low_mean": 0.001151175398263149, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002715651884500403, "epoch": 2.6485855934674833, "grad_norm": 0.21668580174446106, "learning_rate": 1e-06, "loss": -0.0272, "step": 1134 }, { "clip_ratio/high_max": 0.0048405375418951735, "clip_ratio/high_mean": 0.0020050459424965084, "clip_ratio/low_mean": 0.0017173725900647696, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0037224185653030872, "epoch": 2.6509186351706036, "grad_norm": 0.1551775485277176, "learning_rate": 1e-06, "loss": -0.0276, "step": 1135 }, { "clip_ratio/high_max": 0.005666614568326622, "clip_ratio/high_mean": 0.002380868900218047, "clip_ratio/low_mean": 0.002351787807128858, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.004732656758278608, "epoch": 2.653251676873724, "grad_norm": 0.12676647305488586, "learning_rate": 1e-06, "loss": -0.0278, "step": 1136 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3300.0, "completions/mean_length": 797.9553833007812, "completions/mean_terminated_length": 578.0857543945312, "completions/min_length": 152.0, "completions/min_terminated_length": 152.0, "epoch": 2.6555847185768444, "grad_norm": 0.27810534834861755, "learning_rate": 1e-06, "loss": -0.0416, "num_tokens": 166225922.0, "reward": 0.5926339626312256, "reward_std": 0.14301365613937378, "rewards/verify_math_reward/mean": 0.5926339030265808, "rewards/verify_math_reward/std": 0.49161848425865173, "step": 1137 }, { "clip_ratio/high_max": 0.0031149539790931158, "clip_ratio/high_mean": 0.001137869377998868, "clip_ratio/low_mean": 0.0007708802768320311, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001908749676658772, "epoch": 2.657917760279965, "grad_norm": 0.21173426508903503, "learning_rate": 1e-06, "loss": -0.0417, "step": 1138 }, { "clip_ratio/high_max": 0.00409403168305289, "clip_ratio/high_mean": 0.0014394582394743338, "clip_ratio/low_mean": 0.0010967580310534686, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002536216299631633, "epoch": 2.6602508019830857, "grad_norm": 0.1324605643749237, "learning_rate": 1e-06, "loss": -0.0419, "step": 1139 }, { "clip_ratio/high_max": 0.00473773418343626, "clip_ratio/high_mean": 0.0017663060789345764, "clip_ratio/low_mean": 0.0014356294959725346, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0032019355421653017, "epoch": 2.662583843686206, "grad_norm": 0.10866788774728775, "learning_rate": 1e-06, "loss": -0.0421, "step": 1140 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0859375, "completions/max_length": 4096.0, "completions/max_terminated_length": 2926.0, "completions/mean_length": 918.3560791015625, "completions/mean_terminated_length": 619.6032104492188, "completions/min_length": 146.0, "completions/min_terminated_length": 146.0, "epoch": 2.664916885389326, "grad_norm": 0.3212529420852661, "learning_rate": 1e-06, "loss": -0.0549, "num_tokens": 166822337.0, "reward": 0.4732142984867096, "reward_std": 0.20121163129806519, "rewards/verify_math_reward/mean": 0.4732142984867096, "rewards/verify_math_reward/std": 0.4995608627796173, "step": 1141 }, { "clip_ratio/high_max": 0.003228579917049501, "clip_ratio/high_mean": 0.0013575190041592577, "clip_ratio/low_mean": 0.0012270740226085763, "clip_ratio/low_min": 8.709044232091401e-05, "clip_ratio/region_mean": 0.0025845930649666116, "epoch": 2.667249927092447, "grad_norm": 0.2410682886838913, "learning_rate": 1e-06, "loss": -0.055, "step": 1142 }, { "clip_ratio/high_max": 0.00422186337527819, "clip_ratio/high_mean": 0.0017241819332411978, "clip_ratio/low_mean": 0.0019226440890633967, "clip_ratio/low_min": 9.304987543146126e-05, "clip_ratio/region_mean": 0.0036468261532718316, "epoch": 2.6695829687955674, "grad_norm": 0.16022157669067383, "learning_rate": 1e-06, "loss": -0.0555, "step": 1143 }, { "clip_ratio/high_max": 0.0054074300423962995, "clip_ratio/high_mean": 0.002106369134708075, "clip_ratio/low_mean": 0.002589252828329336, "clip_ratio/low_min": 0.0001919782953336835, "clip_ratio/region_mean": 0.004695621886639856, "epoch": 2.6719160104986877, "grad_norm": 0.12964707612991333, "learning_rate": 1e-06, "loss": -0.0558, "step": 1144 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0859375, "completions/max_length": 4096.0, "completions/max_terminated_length": 2028.0, "completions/mean_length": 889.3895263671875, "completions/mean_terminated_length": 587.913330078125, "completions/min_length": 142.0, "completions/min_terminated_length": 142.0, "epoch": 2.674249052201808, "grad_norm": 0.3604341149330139, "learning_rate": 1e-06, "loss": -0.0231, "num_tokens": 167398454.0, "reward": 0.5145089626312256, "reward_std": 0.17757722735404968, "rewards/verify_math_reward/mean": 0.5145089030265808, "rewards/verify_math_reward/std": 0.5000685453414917, "step": 1145 }, { "clip_ratio/high_max": 0.003224997235520277, "clip_ratio/high_mean": 0.0011867033663293114, "clip_ratio/low_mean": 0.0012634524209715892, "clip_ratio/low_min": 1.2972187505511101e-05, "clip_ratio/region_mean": 0.0024501557782059535, "epoch": 2.6765820939049285, "grad_norm": 0.2449527084827423, "learning_rate": 1e-06, "loss": -0.0232, "step": 1146 }, { "clip_ratio/high_max": 0.0040107801323756576, "clip_ratio/high_mean": 0.0015393011181004113, "clip_ratio/low_mean": 0.0017733677123032976, "clip_ratio/low_min": 3.530558115016902e-05, "clip_ratio/region_mean": 0.003312668872240465, "epoch": 2.678915135608049, "grad_norm": 0.14445987343788147, "learning_rate": 1e-06, "loss": -0.0236, "step": 1147 }, { "clip_ratio/high_max": 0.005097054978250526, "clip_ratio/high_mean": 0.0018765503191389143, "clip_ratio/low_mean": 0.0023821186186978593, "clip_ratio/low_min": 9.00114064279478e-05, "clip_ratio/region_mean": 0.004258669068804011, "epoch": 2.6812481773111694, "grad_norm": 0.12173958122730255, "learning_rate": 1e-06, "loss": -0.0238, "step": 1148 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0647321428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3986.0, "completions/mean_length": 800.1004638671875, "completions/mean_terminated_length": 571.9833374023438, "completions/min_length": 156.0, "completions/min_terminated_length": 156.0, "epoch": 2.68358121901429, "grad_norm": 0.34717878699302673, "learning_rate": 1e-06, "loss": -0.0171, "num_tokens": 167962624.0, "reward": 0.5926339626312256, "reward_std": 0.17536230385303497, "rewards/verify_math_reward/mean": 0.5926339030265808, "rewards/verify_math_reward/std": 0.49161845445632935, "step": 1149 }, { "clip_ratio/high_max": 0.0032167174722417258, "clip_ratio/high_mean": 0.001167235772300046, "clip_ratio/low_mean": 0.0011169809004059061, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0022842166799819097, "epoch": 2.6859142607174102, "grad_norm": 0.5955332517623901, "learning_rate": 1e-06, "loss": -0.0172, "step": 1150 }, { "clip_ratio/high_max": 0.004058792437717784, "clip_ratio/high_mean": 0.0015518672898906516, "clip_ratio/low_mean": 0.001982474379474297, "clip_ratio/low_min": 3.9901842683320865e-05, "clip_ratio/region_mean": 0.00353434169664979, "epoch": 2.688247302420531, "grad_norm": 0.15985262393951416, "learning_rate": 1e-06, "loss": -0.0177, "step": 1151 }, { "clip_ratio/high_max": 0.004946439490595367, "clip_ratio/high_mean": 0.001853895555541385, "clip_ratio/low_mean": 0.0026429085337440483, "clip_ratio/low_min": 8.997194163384847e-05, "clip_ratio/region_mean": 0.004496804074733518, "epoch": 2.690580344123651, "grad_norm": 0.1263619214296341, "learning_rate": 1e-06, "loss": -0.018, "step": 1152 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0803571428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 2444.0, "completions/mean_length": 853.8471069335938, "completions/mean_terminated_length": 570.5521850585938, "completions/min_length": 148.0, "completions/min_terminated_length": 148.0, "epoch": 2.6929133858267718, "grad_norm": 0.3227909803390503, "learning_rate": 1e-06, "loss": -0.0347, "num_tokens": 168517047.0, "reward": 0.5256696939468384, "reward_std": 0.17341092228889465, "rewards/verify_math_reward/mean": 0.5256696343421936, "rewards/verify_math_reward/std": 0.4996195435523987, "step": 1153 }, { "clip_ratio/high_max": 0.0030781754248891957, "clip_ratio/high_mean": 0.0010886035270232242, "clip_ratio/low_mean": 0.0010505291147637763, "clip_ratio/low_min": 4.4661287574854214e-05, "clip_ratio/region_mean": 0.002139132615411654, "epoch": 2.695246427529892, "grad_norm": 0.24289044737815857, "learning_rate": 1e-06, "loss": -0.0348, "step": 1154 }, { "clip_ratio/high_max": 0.004091189111932181, "clip_ratio/high_mean": 0.0014565861165465321, "clip_ratio/low_mean": 0.0016115867292683106, "clip_ratio/low_min": 0.00010570655649644323, "clip_ratio/region_mean": 0.003068172882194631, "epoch": 2.6975794692330126, "grad_norm": 0.14242219924926758, "learning_rate": 1e-06, "loss": -0.0351, "step": 1155 }, { "clip_ratio/high_max": 0.0048147055495064706, "clip_ratio/high_mean": 0.0017542720052006189, "clip_ratio/low_mean": 0.002179104536480736, "clip_ratio/low_min": 0.00016062226131907664, "clip_ratio/region_mean": 0.003933376588975079, "epoch": 2.699912510936133, "grad_norm": 0.11657572537660599, "learning_rate": 1e-06, "loss": -0.0353, "step": 1156 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0691964285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 2319.0, "completions/mean_length": 793.5011596679688, "completions/mean_terminated_length": 547.9916381835938, "completions/min_length": 47.0, "completions/min_terminated_length": 47.0, "epoch": 2.7022455526392535, "grad_norm": 0.34469184279441833, "learning_rate": 1e-06, "loss": -0.0157, "num_tokens": 169061520.0, "reward": 0.6462053656578064, "reward_std": 0.1737472116947174, "rewards/verify_math_reward/mean": 0.6462053656578064, "rewards/verify_math_reward/std": 0.478413462638855, "step": 1157 }, { "clip_ratio/high_max": 0.0034827227354981005, "clip_ratio/high_mean": 0.0012823303695768118, "clip_ratio/low_mean": 0.0011255464232817758, "clip_ratio/low_min": 9.311780195275787e-05, "clip_ratio/region_mean": 0.002407876731012948, "epoch": 2.704578594342374, "grad_norm": 0.21074877679347992, "learning_rate": 1e-06, "loss": -0.0159, "step": 1158 }, { "clip_ratio/high_max": 0.00458967886515893, "clip_ratio/high_mean": 0.0017441444397263695, "clip_ratio/low_mean": 0.001757547852321295, "clip_ratio/low_min": 0.00020897329159197398, "clip_ratio/region_mean": 0.00350169233570341, "epoch": 2.7069116360454943, "grad_norm": 0.16888529062271118, "learning_rate": 1e-06, "loss": -0.0162, "step": 1159 }, { "clip_ratio/high_max": 0.005260829595499672, "clip_ratio/high_mean": 0.0020864128309767693, "clip_ratio/low_mean": 0.0023281337962544058, "clip_ratio/low_min": 0.00025683197600301355, "clip_ratio/region_mean": 0.004414546594489366, "epoch": 2.7092446777486145, "grad_norm": 0.1280195415019989, "learning_rate": 1e-06, "loss": -0.0165, "step": 1160 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.056919642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 2595.0, "completions/mean_length": 780.2645263671875, "completions/mean_terminated_length": 580.1431884765625, "completions/min_length": 145.0, "completions/min_terminated_length": 145.0, "epoch": 2.711577719451735, "grad_norm": 0.33142831921577454, "learning_rate": 1e-06, "loss": -0.0206, "num_tokens": 169642037.0, "reward": 0.6004464626312256, "reward_std": 0.14278794825077057, "rewards/verify_math_reward/mean": 0.6004464030265808, "rewards/verify_math_reward/std": 0.49008017778396606, "step": 1161 }, { "clip_ratio/high_max": 0.002607446411275305, "clip_ratio/high_mean": 0.0010025420451711398, "clip_ratio/low_mean": 0.0010818620557984104, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0020844041064265184, "epoch": 2.713910761154856, "grad_norm": 0.18777261674404144, "learning_rate": 1e-06, "loss": -0.0207, "step": 1162 }, { "clip_ratio/high_max": 0.0033877383393701166, "clip_ratio/high_mean": 0.0013564015462179668, "clip_ratio/low_mean": 0.0014375339942489518, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.00279393550590612, "epoch": 2.716243802857976, "grad_norm": 0.13843920826911926, "learning_rate": 1e-06, "loss": -0.021, "step": 1163 }, { "clip_ratio/high_max": 0.004221174094709568, "clip_ratio/high_mean": 0.0016554303692828398, "clip_ratio/low_mean": 0.001967411757505033, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.003622841992182657, "epoch": 2.7185768445610963, "grad_norm": 0.10962489992380142, "learning_rate": 1e-06, "loss": -0.0212, "step": 1164 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0959821428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3596.0, "completions/mean_length": 937.021240234375, "completions/mean_terminated_length": 601.6234741210938, "completions/min_length": 143.0, "completions/min_terminated_length": 143.0, "epoch": 2.720909886264217, "grad_norm": 0.3317437767982483, "learning_rate": 1e-06, "loss": -0.0094, "num_tokens": 170231720.0, "reward": 0.551339328289032, "reward_std": 0.15495839715003967, "rewards/verify_math_reward/mean": 0.5513392686843872, "rewards/verify_math_reward/std": 0.4976350665092468, "step": 1165 }, { "clip_ratio/high_max": 0.0034817729756468907, "clip_ratio/high_mean": 0.0012487000713008456, "clip_ratio/low_mean": 0.0009951036463462515, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002243803686724277, "epoch": 2.7232429279673376, "grad_norm": 0.22728894650936127, "learning_rate": 1e-06, "loss": -0.0095, "step": 1166 }, { "clip_ratio/high_max": 0.004814259489648975, "clip_ratio/high_mean": 0.001722113745927345, "clip_ratio/low_mean": 0.0014378605264937505, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0031599742942489684, "epoch": 2.725575969670458, "grad_norm": 0.15128082036972046, "learning_rate": 1e-06, "loss": -0.0099, "step": 1167 }, { "clip_ratio/high_max": 0.005680880538420752, "clip_ratio/high_mean": 0.002098101485898951, "clip_ratio/low_mean": 0.0018972814577864483, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0039953829400474206, "epoch": 2.7279090113735784, "grad_norm": 0.11779941618442535, "learning_rate": 1e-06, "loss": -0.0101, "step": 1168 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0636160714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3798.0, "completions/mean_length": 852.5335083007812, "completions/mean_terminated_length": 632.1787719726562, "completions/min_length": 170.0, "completions/min_terminated_length": 170.0, "epoch": 2.7302420530766986, "grad_norm": 0.2875055968761444, "learning_rate": 1e-06, "loss": -0.0294, "num_tokens": 170853078.0, "reward": 0.520089328289032, "reward_std": 0.17479775846004486, "rewards/verify_math_reward/mean": 0.5200892686843872, "rewards/verify_math_reward/std": 0.4998753070831299, "step": 1169 }, { "clip_ratio/high_max": 0.0029533146880567074, "clip_ratio/high_mean": 0.001079723057046067, "clip_ratio/low_mean": 0.0010504335950827226, "clip_ratio/low_min": 0.00011824368266388774, "clip_ratio/region_mean": 0.002130156659404747, "epoch": 2.7325750947798193, "grad_norm": 0.18695271015167236, "learning_rate": 1e-06, "loss": -0.0295, "step": 1170 }, { "clip_ratio/high_max": 0.003755842801183462, "clip_ratio/high_mean": 0.0014002461139170919, "clip_ratio/low_mean": 0.0014619142148148967, "clip_ratio/low_min": 0.00015890673967078328, "clip_ratio/region_mean": 0.002862160326912999, "epoch": 2.7349081364829395, "grad_norm": 0.13739876449108124, "learning_rate": 1e-06, "loss": -0.0298, "step": 1171 }, { "clip_ratio/high_max": 0.004559639768558554, "clip_ratio/high_mean": 0.001678780885413289, "clip_ratio/low_mean": 0.001984005906706443, "clip_ratio/low_min": 0.0002068737376248464, "clip_ratio/region_mean": 0.00366278673755005, "epoch": 2.73724117818606, "grad_norm": 0.10813868045806885, "learning_rate": 1e-06, "loss": -0.03, "step": 1172 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0993303571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 4002.0, "completions/mean_length": 938.75341796875, "completions/mean_terminated_length": 590.556396484375, "completions/min_length": 121.0, "completions/min_terminated_length": 121.0, "epoch": 2.7395742198891804, "grad_norm": 0.627103865146637, "learning_rate": 1e-06, "loss": -0.0132, "num_tokens": 171430489.0, "reward": 0.5189732313156128, "reward_std": 0.16360372304916382, "rewards/verify_math_reward/mean": 0.5189732313156128, "rewards/verify_math_reward/std": 0.49991893768310547, "step": 1173 }, { "clip_ratio/high_max": 0.004270101031579543, "clip_ratio/high_mean": 0.0013738352972723078, "clip_ratio/low_mean": 0.0014192380076565314, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002793073253997136, "epoch": 2.741907261592301, "grad_norm": 0.2866986393928528, "learning_rate": 1e-06, "loss": 29.6456, "step": 1174 }, { "clip_ratio/high_max": 0.005313769375788979, "clip_ratio/high_mean": 0.0016816987736092415, "clip_ratio/low_mean": 0.0020912761428917293, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0037729749747086316, "epoch": 2.7442403032954212, "grad_norm": 0.159773051738739, "learning_rate": 1e-06, "loss": 29.6452, "step": 1175 }, { "clip_ratio/high_max": 0.00613604616955854, "clip_ratio/high_mean": 0.0020067754230694845, "clip_ratio/low_mean": 0.002807909098919481, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.004814684492885135, "epoch": 2.746573344998542, "grad_norm": 0.12992709875106812, "learning_rate": 1e-06, "loss": 29.6449, "step": 1176 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0457589285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3505.0, "completions/mean_length": 716.8594360351562, "completions/mean_terminated_length": 554.8187255859375, "completions/min_length": 124.0, "completions/min_terminated_length": 124.0, "epoch": 2.7489063867016625, "grad_norm": 0.31859350204467773, "learning_rate": 1e-06, "loss": -0.0057, "num_tokens": 171990779.0, "reward": 0.6194196939468384, "reward_std": 0.18318095803260803, "rewards/verify_math_reward/mean": 0.6194196343421936, "rewards/verify_math_reward/std": 0.48580074310302734, "step": 1177 }, { "clip_ratio/high_max": 0.003521402941260021, "clip_ratio/high_mean": 0.0013140234659658745, "clip_ratio/low_mean": 0.0011080226449848851, "clip_ratio/low_min": 0.00012063859321642667, "clip_ratio/region_mean": 0.0024220461273216642, "epoch": 2.7512394284047827, "grad_norm": 0.2782540023326874, "learning_rate": 1e-06, "loss": -0.0058, "step": 1178 }, { "clip_ratio/high_max": 0.004800724011147395, "clip_ratio/high_mean": 0.0017899968697747681, "clip_ratio/low_mean": 0.0016856752336025238, "clip_ratio/low_min": 0.00011926587194466265, "clip_ratio/region_mean": 0.0034756721070152707, "epoch": 2.753572470107903, "grad_norm": 0.1749497652053833, "learning_rate": 1e-06, "loss": -0.0062, "step": 1179 }, { "clip_ratio/high_max": 0.005828393128467724, "clip_ratio/high_mean": 0.0021071992232464254, "clip_ratio/low_mean": 0.0023443235404556617, "clip_ratio/low_min": 0.00023146539751905948, "clip_ratio/region_mean": 0.004451522880117409, "epoch": 2.7559055118110236, "grad_norm": 0.12605196237564087, "learning_rate": 1e-06, "loss": -0.0065, "step": 1180 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.078125, "completions/max_length": 4096.0, "completions/max_terminated_length": 3038.0, "completions/mean_length": 866.8939819335938, "completions/mean_terminated_length": 593.240966796875, "completions/min_length": 158.0, "completions/min_terminated_length": 158.0, "epoch": 2.7582385535141443, "grad_norm": 0.33375027775764465, "learning_rate": 1e-06, "loss": -0.0181, "num_tokens": 172569460.0, "reward": 0.5502232313156128, "reward_std": 0.16370996832847595, "rewards/verify_math_reward/mean": 0.5502232313156128, "rewards/verify_math_reward/std": 0.49774909019470215, "step": 1181 }, { "clip_ratio/high_max": 0.0028466511648730375, "clip_ratio/high_mean": 0.001174334222014295, "clip_ratio/low_mean": 0.00109425500886573, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0022685892618028447, "epoch": 2.7605715952172645, "grad_norm": 0.22197236120700836, "learning_rate": 1e-06, "loss": -0.0182, "step": 1182 }, { "clip_ratio/high_max": 0.0035652700244099833, "clip_ratio/high_mean": 0.0015172383755270857, "clip_ratio/low_mean": 0.0016169079790415708, "clip_ratio/low_min": 2.9009050194872543e-05, "clip_ratio/region_mean": 0.0031341463909484446, "epoch": 2.7629046369203847, "grad_norm": 0.15755388140678406, "learning_rate": 1e-06, "loss": -0.0186, "step": 1183 }, { "clip_ratio/high_max": 0.0046166229658410884, "clip_ratio/high_mean": 0.0019251614357926883, "clip_ratio/low_mean": 0.00229043441140675, "clip_ratio/low_min": 0.00010153168113902211, "clip_ratio/region_mean": 0.004215595778077841, "epoch": 2.7652376786235053, "grad_norm": 0.12124405801296234, "learning_rate": 1e-06, "loss": -0.0188, "step": 1184 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0859375, "completions/max_length": 4096.0, "completions/max_terminated_length": 4092.0, "completions/mean_length": 875.8549194335938, "completions/mean_terminated_length": 573.1062622070312, "completions/min_length": 113.0, "completions/min_terminated_length": 113.0, "epoch": 2.767570720326626, "grad_norm": 0.38679590821266174, "learning_rate": 1e-06, "loss": -0.0337, "num_tokens": 173127122.0, "reward": 0.5290178656578064, "reward_std": 0.17171983420848846, "rewards/verify_math_reward/mean": 0.5290178656578064, "rewards/verify_math_reward/std": 0.49943602085113525, "step": 1185 }, { "clip_ratio/high_max": 0.0038599047111347318, "clip_ratio/high_mean": 0.0013876525990781374, "clip_ratio/low_mean": 0.0011978357051702915, "clip_ratio/low_min": 5.602031342277769e-05, "clip_ratio/region_mean": 0.002585488313343376, "epoch": 2.769903762029746, "grad_norm": 0.21823209524154663, "learning_rate": 1e-06, "loss": -0.0339, "step": 1186 }, { "clip_ratio/high_max": 0.005497384292539209, "clip_ratio/high_mean": 0.0019723278455785476, "clip_ratio/low_mean": 0.0017662692589510698, "clip_ratio/low_min": 6.899165418872144e-05, "clip_ratio/region_mean": 0.0037385971227195114, "epoch": 2.772236803732867, "grad_norm": 0.15762393176555634, "learning_rate": 1e-06, "loss": -0.0343, "step": 1187 }, { "clip_ratio/high_max": 0.006500968491309322, "clip_ratio/high_mean": 0.002290677795826923, "clip_ratio/low_mean": 0.002335721714189276, "clip_ratio/low_min": 9.181525456369855e-05, "clip_ratio/region_mean": 0.004626399677363224, "epoch": 2.774569845435987, "grad_norm": 0.12866410613059998, "learning_rate": 1e-06, "loss": -0.0345, "step": 1188 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0714285714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3161.0, "completions/mean_length": 831.8750610351562, "completions/mean_terminated_length": 580.7885131835938, "completions/min_length": 123.0, "completions/min_terminated_length": 123.0, "epoch": 2.7769028871391077, "grad_norm": 0.3046405017375946, "learning_rate": 1e-06, "loss": -0.0207, "num_tokens": 173702610.0, "reward": 0.5424107313156128, "reward_std": 0.1522563099861145, "rewards/verify_math_reward/mean": 0.5424107313156128, "rewards/verify_math_reward/std": 0.4984763264656067, "step": 1189 }, { "clip_ratio/high_max": 0.0032145067598321475, "clip_ratio/high_mean": 0.0011382217671780381, "clip_ratio/low_mean": 0.0011451777136244345, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0022833995026303455, "epoch": 2.779235928842228, "grad_norm": 0.22096525132656097, "learning_rate": 1e-06, "loss": -0.0208, "step": 1190 }, { "clip_ratio/high_max": 0.0043055450296378694, "clip_ratio/high_mean": 0.0014961833476263564, "clip_ratio/low_mean": 0.0016877311791176908, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0031839145231060684, "epoch": 2.7815689705453486, "grad_norm": 0.13466063141822815, "learning_rate": 1e-06, "loss": -0.0211, "step": 1191 }, { "clip_ratio/high_max": 0.005489215414854698, "clip_ratio/high_mean": 0.0018422853645461146, "clip_ratio/low_mean": 0.002158605922886636, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.004000891261966899, "epoch": 2.783902012248469, "grad_norm": 0.11774160712957382, "learning_rate": 1e-06, "loss": -0.0213, "step": 1192 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0814732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 2311.0, "completions/mean_length": 850.1730346679688, "completions/mean_terminated_length": 562.2685546875, "completions/min_length": 133.0, "completions/min_terminated_length": 133.0, "epoch": 2.7862350539515894, "grad_norm": 0.33023250102996826, "learning_rate": 1e-06, "loss": -0.0217, "num_tokens": 174253189.0, "reward": 0.5390625, "reward_std": 0.19114413857460022, "rewards/verify_math_reward/mean": 0.5390625, "rewards/verify_math_reward/std": 0.4987502098083496, "step": 1193 }, { "clip_ratio/high_max": 0.004307364346459508, "clip_ratio/high_mean": 0.0014116505735728424, "clip_ratio/low_mean": 0.0010770547323772917, "clip_ratio/low_min": 3.268435102654621e-05, "clip_ratio/region_mean": 0.0024887053477868903, "epoch": 2.7885680956547096, "grad_norm": 0.21808961033821106, "learning_rate": 1e-06, "loss": -0.0219, "step": 1194 }, { "clip_ratio/high_max": 0.005147901174495928, "clip_ratio/high_mean": 0.0018716721242526546, "clip_ratio/low_mean": 0.0016479005680594128, "clip_ratio/low_min": 0.00012350621545920148, "clip_ratio/region_mean": 0.0035195727250538766, "epoch": 2.7909011373578303, "grad_norm": 0.1431431621313095, "learning_rate": 1e-06, "loss": -0.0222, "step": 1195 }, { "clip_ratio/high_max": 0.00657718391448725, "clip_ratio/high_mean": 0.002264056067360798, "clip_ratio/low_mean": 0.00221359860006487, "clip_ratio/low_min": 0.00016589872393524274, "clip_ratio/region_mean": 0.004477654663787689, "epoch": 2.793234179060951, "grad_norm": 0.11956056207418442, "learning_rate": 1e-06, "loss": -0.0224, "step": 1196 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0714285714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3444.0, "completions/mean_length": 848.1004638671875, "completions/mean_terminated_length": 598.2620239257812, "completions/min_length": 124.0, "completions/min_terminated_length": 124.0, "epoch": 2.795567220764071, "grad_norm": 0.30866947770118713, "learning_rate": 1e-06, "loss": -0.0352, "num_tokens": 174834903.0, "reward": 0.5959821939468384, "reward_std": 0.14553098380565643, "rewards/verify_math_reward/mean": 0.5959821343421936, "rewards/verify_math_reward/std": 0.490975022315979, "step": 1197 }, { "clip_ratio/high_max": 0.0035712522294488735, "clip_ratio/high_mean": 0.001188511600048514, "clip_ratio/low_mean": 0.0009317527346865973, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0021202643183642067, "epoch": 2.7979002624671914, "grad_norm": 0.2478400468826294, "learning_rate": 1e-06, "loss": -0.0353, "step": 1198 }, { "clip_ratio/high_max": 0.004651036484574433, "clip_ratio/high_mean": 0.0015722573407401796, "clip_ratio/low_mean": 0.0014255966161726974, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002997853996930644, "epoch": 2.800233304170312, "grad_norm": 0.14276562631130219, "learning_rate": 1e-06, "loss": -0.0357, "step": 1199 }, { "clip_ratio/high_max": 0.005601046825177036, "clip_ratio/high_mean": 0.0019657792181533296, "clip_ratio/low_mean": 0.0019666296611831058, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.003932408886612393, "epoch": 2.8025663458734327, "grad_norm": 0.11162565648555756, "learning_rate": 1e-06, "loss": -0.0359, "step": 1200 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0736607142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3891.0, "completions/mean_length": 828.4397583007812, "completions/mean_terminated_length": 568.6096801757812, "completions/min_length": 142.0, "completions/min_terminated_length": 142.0, "epoch": 2.804899387576553, "grad_norm": 0.33346137404441833, "learning_rate": 1e-06, "loss": -0.0226, "num_tokens": 175387497.0, "reward": 0.5792410969734192, "reward_std": 0.16284668445587158, "rewards/verify_math_reward/mean": 0.5792410969734192, "rewards/verify_math_reward/std": 0.49395665526390076, "step": 1201 }, { "clip_ratio/high_max": 0.002922565152402967, "clip_ratio/high_mean": 0.001182207775855204, "clip_ratio/low_mean": 0.0010027703137893695, "clip_ratio/low_min": 4.0033747609413695e-05, "clip_ratio/region_mean": 0.002184978104196489, "epoch": 2.8072324292796735, "grad_norm": 0.22856587171554565, "learning_rate": 1e-06, "loss": -0.0228, "step": 1202 }, { "clip_ratio/high_max": 0.004096301905519795, "clip_ratio/high_mean": 0.0016682941095496062, "clip_ratio/low_mean": 0.0014479478559223935, "clip_ratio/low_min": 0.00010245711928291712, "clip_ratio/region_mean": 0.0031162420054897666, "epoch": 2.8095654709827937, "grad_norm": 0.1454327404499054, "learning_rate": 1e-06, "loss": -0.0231, "step": 1203 }, { "clip_ratio/high_max": 0.00488254711672198, "clip_ratio/high_mean": 0.0019777269117184915, "clip_ratio/low_mean": 0.0019363761857675854, "clip_ratio/low_min": 0.00022602033641305752, "clip_ratio/region_mean": 0.003914103173883632, "epoch": 2.8118985126859144, "grad_norm": 0.11684436351060867, "learning_rate": 1e-06, "loss": -0.0233, "step": 1204 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0904017857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3372.0, "completions/mean_length": 894.77685546875, "completions/mean_terminated_length": 576.618408203125, "completions/min_length": 154.0, "completions/min_terminated_length": 154.0, "epoch": 2.8142315543890346, "grad_norm": 0.3191275894641876, "learning_rate": 1e-06, "loss": -0.0392, "num_tokens": 175941129.0, "reward": 0.5892857313156128, "reward_std": 0.14564228057861328, "rewards/verify_math_reward/mean": 0.5892857313156128, "rewards/verify_math_reward/std": 0.49223825335502625, "step": 1205 }, { "clip_ratio/high_max": 0.003430502925766632, "clip_ratio/high_mean": 0.0012290641170693561, "clip_ratio/low_mean": 0.0008186477771232603, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002047711859631818, "epoch": 2.8165645960921553, "grad_norm": 0.18834351003170013, "learning_rate": 1e-06, "loss": -0.0394, "step": 1206 }, { "clip_ratio/high_max": 0.004212196348817088, "clip_ratio/high_mean": 0.0014970942102081608, "clip_ratio/low_mean": 0.0012327062177064363, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0027298004570184276, "epoch": 2.8188976377952755, "grad_norm": 0.13084357976913452, "learning_rate": 1e-06, "loss": -0.0397, "step": 1207 }, { "clip_ratio/high_max": 0.005360584822483361, "clip_ratio/high_mean": 0.0018231360700156074, "clip_ratio/low_mean": 0.0016369832264899742, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0034601193328853697, "epoch": 2.821230679498396, "grad_norm": 0.10555332899093628, "learning_rate": 1e-06, "loss": -0.0399, "step": 1208 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0848214285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 2231.0, "completions/mean_length": 870.552490234375, "completions/mean_terminated_length": 571.6085205078125, "completions/min_length": 145.0, "completions/min_terminated_length": 145.0, "epoch": 2.8235637212015163, "grad_norm": 0.3249732255935669, "learning_rate": 1e-06, "loss": -0.0559, "num_tokens": 176499712.0, "reward": 0.5457589626312256, "reward_std": 0.18080884218215942, "rewards/verify_math_reward/mean": 0.5457589030265808, "rewards/verify_math_reward/std": 0.4981797933578491, "step": 1209 }, { "clip_ratio/high_max": 0.002904116438003257, "clip_ratio/high_mean": 0.0012518669609562494, "clip_ratio/low_mean": 0.001037543841448496, "clip_ratio/low_min": 4.199563409201801e-05, "clip_ratio/region_mean": 0.002289410862431396, "epoch": 2.825896762904637, "grad_norm": 0.4041960835456848, "learning_rate": 1e-06, "loss": -0.056, "step": 1210 }, { "clip_ratio/high_max": 0.004392165952594951, "clip_ratio/high_mean": 0.0017412040069757495, "clip_ratio/low_mean": 0.0015392326713481452, "clip_ratio/low_min": 8.402039020438679e-05, "clip_ratio/region_mean": 0.003280436612840276, "epoch": 2.8282298046077576, "grad_norm": 0.17041875422000885, "learning_rate": 1e-06, "loss": -0.0564, "step": 1211 }, { "clip_ratio/high_max": 0.005009632703149691, "clip_ratio/high_mean": 0.0019915685952582862, "clip_ratio/low_mean": 0.00212219529930735, "clip_ratio/low_min": 0.00013827615475747734, "clip_ratio/region_mean": 0.004113763905479573, "epoch": 2.830562846310878, "grad_norm": 0.12721320986747742, "learning_rate": 1e-06, "loss": -0.0566, "step": 1212 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0703125, "completions/max_length": 4096.0, "completions/max_terminated_length": 2725.0, "completions/mean_length": 806.4955444335938, "completions/mean_terminated_length": 557.70947265625, "completions/min_length": 130.0, "completions/min_terminated_length": 130.0, "epoch": 2.832895888013998, "grad_norm": 0.3654422163963318, "learning_rate": 1e-06, "loss": -0.0409, "num_tokens": 177055308.0, "reward": 0.6171875, "reward_std": 0.17761960625648499, "rewards/verify_math_reward/mean": 0.6171875, "rewards/verify_math_reward/std": 0.4863446056842804, "step": 1213 }, { "clip_ratio/high_max": 0.003274388931458816, "clip_ratio/high_mean": 0.0013130276893207338, "clip_ratio/low_mean": 0.001186658329970669, "clip_ratio/low_min": 4.809542224393226e-05, "clip_ratio/region_mean": 0.002499686073861085, "epoch": 2.8352289297171187, "grad_norm": 0.2418983429670334, "learning_rate": 1e-06, "loss": -0.041, "step": 1214 }, { "clip_ratio/high_max": 0.0042960855207638815, "clip_ratio/high_mean": 0.0016887207311810926, "clip_ratio/low_mean": 0.0017972128807741683, "clip_ratio/low_min": 6.291933823376894e-05, "clip_ratio/region_mean": 0.00348593364469707, "epoch": 2.8375619714202394, "grad_norm": 0.17186492681503296, "learning_rate": 1e-06, "loss": -0.0414, "step": 1215 }, { "clip_ratio/high_max": 0.00540234346408397, "clip_ratio/high_mean": 0.0021418517208076082, "clip_ratio/low_mean": 0.002448981125780847, "clip_ratio/low_min": 7.962833478813991e-05, "clip_ratio/region_mean": 0.00459083286114037, "epoch": 2.8398950131233596, "grad_norm": 0.1269465535879135, "learning_rate": 1e-06, "loss": -0.0417, "step": 1216 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0825892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3583.0, "completions/mean_length": 840.4141235351562, "completions/mean_terminated_length": 547.3320922851562, "completions/min_length": 128.0, "completions/min_terminated_length": 128.0, "epoch": 2.8422280548264798, "grad_norm": 0.31373047828674316, "learning_rate": 1e-06, "loss": -0.0281, "num_tokens": 177604479.0, "reward": 0.574776828289032, "reward_std": 0.15131311118602753, "rewards/verify_math_reward/mean": 0.5747767686843872, "rewards/verify_math_reward/std": 0.49465295672416687, "step": 1217 }, { "clip_ratio/high_max": 0.0026574548464850523, "clip_ratio/high_mean": 0.001023850705678342, "clip_ratio/low_mean": 0.0010296625277987914, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0020535132207442075, "epoch": 2.8445610965296004, "grad_norm": 0.20707808434963226, "learning_rate": 1e-06, "loss": -0.0283, "step": 1218 }, { "clip_ratio/high_max": 0.003576529459678568, "clip_ratio/high_mean": 0.001390154538967181, "clip_ratio/low_mean": 0.0015535155107500032, "clip_ratio/low_min": 3.246753112762235e-05, "clip_ratio/region_mean": 0.0029436701152008027, "epoch": 2.846894138232721, "grad_norm": 0.14434011280536652, "learning_rate": 1e-06, "loss": -0.0286, "step": 1219 }, { "clip_ratio/high_max": 0.0044757898431271315, "clip_ratio/high_mean": 0.0016796569507278036, "clip_ratio/low_mean": 0.002029388164373813, "clip_ratio/low_min": 4.870129851042293e-05, "clip_ratio/region_mean": 0.0037090451369294897, "epoch": 2.8492271799358413, "grad_norm": 0.28782254457473755, "learning_rate": 1e-06, "loss": -0.0288, "step": 1220 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0758928571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 2929.0, "completions/mean_length": 788.9542846679688, "completions/mean_terminated_length": 517.361083984375, "completions/min_length": 114.0, "completions/min_terminated_length": 114.0, "epoch": 2.851560221638962, "grad_norm": 0.3412647247314453, "learning_rate": 1e-06, "loss": -0.0261, "num_tokens": 178115990.0, "reward": 0.5457589626312256, "reward_std": 0.15087056159973145, "rewards/verify_math_reward/mean": 0.5457589030265808, "rewards/verify_math_reward/std": 0.4981797933578491, "step": 1221 }, { "clip_ratio/high_max": 0.003054790126043372, "clip_ratio/high_mean": 0.0010999043806805275, "clip_ratio/low_mean": 0.0011979233859165106, "clip_ratio/low_min": 7.50737490307074e-05, "clip_ratio/region_mean": 0.002297827748407144, "epoch": 2.853893263342082, "grad_norm": 0.20852363109588623, "learning_rate": 1e-06, "loss": -0.0263, "step": 1222 }, { "clip_ratio/high_max": 0.004085107510036323, "clip_ratio/high_mean": 0.0014740781480213627, "clip_ratio/low_mean": 0.001798942786990665, "clip_ratio/low_min": 9.693205902294721e-05, "clip_ratio/region_mean": 0.003273020833148621, "epoch": 2.856226305045203, "grad_norm": 0.15250858664512634, "learning_rate": 1e-06, "loss": -0.0267, "step": 1223 }, { "clip_ratio/high_max": 0.004963603525538929, "clip_ratio/high_mean": 0.001850283719250001, "clip_ratio/low_mean": 0.002487012359779328, "clip_ratio/low_min": 0.00016751232942624483, "clip_ratio/region_mean": 0.004337296093581244, "epoch": 2.858559346748323, "grad_norm": 0.12383803725242615, "learning_rate": 1e-06, "loss": -0.0269, "step": 1224 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0904017857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 2657.0, "completions/mean_length": 854.1607666015625, "completions/mean_terminated_length": 531.9656372070312, "completions/min_length": 119.0, "completions/min_terminated_length": 119.0, "epoch": 2.8608923884514437, "grad_norm": 0.38747724890708923, "learning_rate": 1e-06, "loss": -0.0104, "num_tokens": 178633102.0, "reward": 0.578125, "reward_std": 0.16450342535972595, "rewards/verify_math_reward/mean": 0.578125, "rewards/verify_math_reward/std": 0.4941346049308777, "step": 1225 }, { "clip_ratio/high_max": 0.002743330245721154, "clip_ratio/high_mean": 0.00120337537737214, "clip_ratio/low_mean": 0.001138832616561558, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002342208012123592, "epoch": 2.863225430154564, "grad_norm": 0.1993362009525299, "learning_rate": 1e-06, "loss": -0.0106, "step": 1226 }, { "clip_ratio/high_max": 0.003492295028991066, "clip_ratio/high_mean": 0.0015060505247674882, "clip_ratio/low_mean": 0.0017660723424341995, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.003272122878115624, "epoch": 2.8655584718576845, "grad_norm": 0.13748352229595184, "learning_rate": 1e-06, "loss": -0.011, "step": 1227 }, { "clip_ratio/high_max": 0.0043676336063072085, "clip_ratio/high_mean": 0.001975311621208675, "clip_ratio/low_mean": 0.0023873010504757985, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.004362612598924898, "epoch": 2.8678915135608047, "grad_norm": 0.10920245200395584, "learning_rate": 1e-06, "loss": -0.0112, "step": 1228 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0747767857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 2734.0, "completions/mean_length": 800.364990234375, "completions/mean_terminated_length": 534.0108642578125, "completions/min_length": 157.0, "completions/min_terminated_length": 157.0, "epoch": 2.8702245552639254, "grad_norm": 0.38515955209732056, "learning_rate": 1e-06, "loss": -0.0076, "num_tokens": 179162605.0, "reward": 0.5892857313156128, "reward_std": 0.1743106245994568, "rewards/verify_math_reward/mean": 0.5892857313156128, "rewards/verify_math_reward/std": 0.49223825335502625, "step": 1229 }, { "clip_ratio/high_max": 0.0036710926797240973, "clip_ratio/high_mean": 0.001344829761364963, "clip_ratio/low_mean": 0.0011574461968848482, "clip_ratio/low_min": 0.00011933614950976335, "clip_ratio/region_mean": 0.0025022759873536415, "epoch": 2.872557596967046, "grad_norm": 0.35476380586624146, "learning_rate": 1e-06, "loss": -0.0078, "step": 1230 }, { "clip_ratio/high_max": 0.00457541570358444, "clip_ratio/high_mean": 0.0017016249003063422, "clip_ratio/low_mean": 0.0017903371408465318, "clip_ratio/low_min": 0.00020572220819303766, "clip_ratio/region_mean": 0.003491962037514895, "epoch": 2.8748906386701663, "grad_norm": 0.1762520968914032, "learning_rate": 1e-06, "loss": -0.0081, "step": 1231 }, { "clip_ratio/high_max": 0.005889964711968787, "clip_ratio/high_mean": 0.0021195968001848087, "clip_ratio/low_mean": 0.0023974362775334157, "clip_ratio/low_min": 0.00020317067537689582, "clip_ratio/region_mean": 0.004517033026786521, "epoch": 2.8772236803732865, "grad_norm": 0.12495679408311844, "learning_rate": 1e-06, "loss": -0.0084, "step": 1232 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0747767857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 2822.0, "completions/mean_length": 814.349365234375, "completions/mean_terminated_length": 549.12548828125, "completions/min_length": 101.0, "completions/min_terminated_length": 101.0, "epoch": 2.879556722076407, "grad_norm": 0.3414965569972992, "learning_rate": 1e-06, "loss": 0.0068, "num_tokens": 179704966.0, "reward": 0.5658482313156128, "reward_std": 0.15766116976737976, "rewards/verify_math_reward/mean": 0.5658482313156128, "rewards/verify_math_reward/std": 0.49592188000679016, "step": 1233 }, { "clip_ratio/high_max": 0.002792266182950698, "clip_ratio/high_mean": 0.0011591993097681552, "clip_ratio/low_mean": 0.0011400887306081131, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002299288011272438, "epoch": 2.8818897637795278, "grad_norm": 0.20809409022331238, "learning_rate": 1e-06, "loss": 0.0066, "step": 1234 }, { "clip_ratio/high_max": 0.003915795103239361, "clip_ratio/high_mean": 0.0016657360683893785, "clip_ratio/low_mean": 0.0017942636404768564, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0034599997306941077, "epoch": 2.884222805482648, "grad_norm": 0.1493251621723175, "learning_rate": 1e-06, "loss": 0.0062, "step": 1235 }, { "clip_ratio/high_max": 0.004723271536931861, "clip_ratio/high_mean": 0.002001582946832059, "clip_ratio/low_mean": 0.0024071769148577005, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.004408759923535399, "epoch": 2.886555847185768, "grad_norm": 0.11740856617689133, "learning_rate": 1e-06, "loss": 0.006, "step": 1236 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0647321428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 2489.0, "completions/mean_length": 830.1049194335938, "completions/mean_terminated_length": 604.064453125, "completions/min_length": 119.0, "completions/min_terminated_length": 119.0, "epoch": 2.888888888888889, "grad_norm": 0.3220398724079132, "learning_rate": 1e-06, "loss": -0.0397, "num_tokens": 180296508.0, "reward": 0.546875, "reward_std": 0.19948776066303253, "rewards/verify_math_reward/mean": 0.546875, "rewards/verify_math_reward/std": 0.4980759024620056, "step": 1237 }, { "clip_ratio/high_max": 0.003892336484568659, "clip_ratio/high_mean": 0.001342737445156672, "clip_ratio/low_mean": 0.0013019050893490203, "clip_ratio/low_min": 2.884171590267215e-05, "clip_ratio/region_mean": 0.002644642532686703, "epoch": 2.8912219305920095, "grad_norm": 0.2262744903564453, "learning_rate": 1e-06, "loss": -0.0398, "step": 1238 }, { "clip_ratio/high_max": 0.005313907044182997, "clip_ratio/high_mean": 0.0018224278501293156, "clip_ratio/low_mean": 0.0017593424308870453, "clip_ratio/low_min": 3.9062499126885086e-05, "clip_ratio/region_mean": 0.0035817704047076404, "epoch": 2.8935549722951297, "grad_norm": 0.157204732298851, "learning_rate": 1e-06, "loss": -0.0402, "step": 1239 }, { "clip_ratio/high_max": 0.006243032636120915, "clip_ratio/high_mean": 0.002214130654465407, "clip_ratio/low_mean": 0.002242102535092272, "clip_ratio/low_min": 5.859374869032763e-05, "clip_ratio/region_mean": 0.004456233189557679, "epoch": 2.8958880139982504, "grad_norm": 0.13117793202400208, "learning_rate": 1e-06, "loss": -0.0404, "step": 1240 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0680803571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 2976.0, "completions/mean_length": 778.0469360351562, "completions/mean_terminated_length": 535.657470703125, "completions/min_length": 136.0, "completions/min_terminated_length": 136.0, "epoch": 2.8982210557013706, "grad_norm": 0.36130061745643616, "learning_rate": 1e-06, "loss": -0.0274, "num_tokens": 180842966.0, "reward": 0.6395089626312256, "reward_std": 0.1723596453666687, "rewards/verify_math_reward/mean": 0.6395089030265808, "rewards/verify_math_reward/std": 0.4804111421108246, "step": 1241 }, { "clip_ratio/high_max": 0.004025627764349338, "clip_ratio/high_mean": 0.0015155056225921726, "clip_ratio/low_mean": 0.0013571466224675532, "clip_ratio/low_min": 6.0923213823116384e-05, "clip_ratio/region_mean": 0.002872652192309033, "epoch": 2.900554097404491, "grad_norm": 0.30836403369903564, "learning_rate": 1e-06, "loss": -0.0276, "step": 1242 }, { "clip_ratio/high_max": 0.004896062229818199, "clip_ratio/high_mean": 0.0018733787728706375, "clip_ratio/low_mean": 0.0020303211167629343, "clip_ratio/low_min": 9.038248208526056e-05, "clip_ratio/region_mean": 0.0039036998205119744, "epoch": 2.9028871391076114, "grad_norm": 0.1626872420310974, "learning_rate": 1e-06, "loss": -0.0279, "step": 1243 }, { "clip_ratio/high_max": 0.006024427930242382, "clip_ratio/high_mean": 0.0022998888307483867, "clip_ratio/low_mean": 0.002674718423804734, "clip_ratio/low_min": 0.00014747287787031382, "clip_ratio/region_mean": 0.004974607290932909, "epoch": 2.905220180810732, "grad_norm": 0.13462473452091217, "learning_rate": 1e-06, "loss": -0.0283, "step": 1244 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 4096.0, "completions/max_terminated_length": 2071.0, "completions/mean_length": 763.2254638671875, "completions/mean_terminated_length": 541.04052734375, "completions/min_length": 140.0, "completions/min_terminated_length": 140.0, "epoch": 2.9075532225138523, "grad_norm": 0.3002323508262634, "learning_rate": 1e-06, "loss": -0.0262, "num_tokens": 181387784.0, "reward": 0.6316964626312256, "reward_std": 0.1302351951599121, "rewards/verify_math_reward/mean": 0.6316964030265808, "rewards/verify_math_reward/std": 0.4826137125492096, "step": 1245 }, { "clip_ratio/high_max": 0.003739705825864803, "clip_ratio/high_mean": 0.001072901282896055, "clip_ratio/low_mean": 0.0007163095388023066, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001789210826245835, "epoch": 2.909886264216973, "grad_norm": 0.21162725985050201, "learning_rate": 1e-06, "loss": -0.0262, "step": 1246 }, { "clip_ratio/high_max": 0.004939486330840737, "clip_ratio/high_mean": 0.0014855509616609197, "clip_ratio/low_mean": 0.0011654639783955645, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002651014903676696, "epoch": 2.912219305920093, "grad_norm": 0.13897384703159332, "learning_rate": 1e-06, "loss": -0.0265, "step": 1247 }, { "clip_ratio/high_max": 0.005790564842754975, "clip_ratio/high_mean": 0.0017127068749687169, "clip_ratio/low_mean": 0.001565818973176647, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0032785258954390883, "epoch": 2.914552347623214, "grad_norm": 0.11345646530389786, "learning_rate": 1e-06, "loss": -0.0266, "step": 1248 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1227678571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 2984.0, "completions/mean_length": 1006.0982666015625, "completions/mean_terminated_length": 573.669189453125, "completions/min_length": 145.0, "completions/min_terminated_length": 145.0, "epoch": 2.9168853893263345, "grad_norm": 0.3224473297595978, "learning_rate": 1e-06, "loss": -0.0738, "num_tokens": 181932344.0, "reward": 0.5290178656578064, "reward_std": 0.16112099587917328, "rewards/verify_math_reward/mean": 0.5290178656578064, "rewards/verify_math_reward/std": 0.49943605065345764, "step": 1249 }, { "clip_ratio/high_max": 0.003742560882528778, "clip_ratio/high_mean": 0.0012880226568086073, "clip_ratio/low_mean": 0.0009221422806149349, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002210164922871627, "epoch": 2.9192184310294547, "grad_norm": 0.19815120100975037, "learning_rate": 1e-06, "loss": -0.0739, "step": 1250 }, { "clip_ratio/high_max": 0.004872740508290008, "clip_ratio/high_mean": 0.0017193906096508726, "clip_ratio/low_mean": 0.0014026975550223142, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.003122088121017441, "epoch": 2.921551472732575, "grad_norm": 0.14638307690620422, "learning_rate": 1e-06, "loss": -0.0742, "step": 1251 }, { "clip_ratio/high_max": 0.006193648216139991, "clip_ratio/high_mean": 0.0021285548209561966, "clip_ratio/low_mean": 0.0019114767565042712, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.004040031635668129, "epoch": 2.9238845144356955, "grad_norm": 0.11612790077924728, "learning_rate": 1e-06, "loss": -0.0745, "step": 1252 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0959821428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3770.0, "completions/mean_length": 953.8460083007812, "completions/mean_terminated_length": 620.234619140625, "completions/min_length": 148.0, "completions/min_terminated_length": 148.0, "epoch": 2.926217556138816, "grad_norm": 0.34152257442474365, "learning_rate": 1e-06, "loss": -0.0251, "num_tokens": 182524206.0, "reward": 0.4966517984867096, "reward_std": 0.18434463441371918, "rewards/verify_math_reward/mean": 0.4966517984867096, "rewards/verify_math_reward/std": 0.5002680420875549, "step": 1253 }, { "clip_ratio/high_max": 0.004108877343242057, "clip_ratio/high_mean": 0.0013525647918868344, "clip_ratio/low_mean": 0.0012255901747266762, "clip_ratio/low_min": 9.473293175688013e-05, "clip_ratio/region_mean": 0.002578154941147659, "epoch": 2.9285505978419364, "grad_norm": 0.2352941334247589, "learning_rate": 1e-06, "loss": -0.0252, "step": 1254 }, { "clip_ratio/high_max": 0.005085829165182076, "clip_ratio/high_mean": 0.0017338925063086208, "clip_ratio/low_mean": 0.0018744199041975662, "clip_ratio/low_min": 0.0001929110730998218, "clip_ratio/region_mean": 0.003608312414144166, "epoch": 2.9308836395450566, "grad_norm": 0.16066819429397583, "learning_rate": 1e-06, "loss": -0.0256, "step": 1255 }, { "clip_ratio/high_max": 0.00617186444287654, "clip_ratio/high_mean": 0.002151959248294588, "clip_ratio/low_mean": 0.0025327259572804905, "clip_ratio/low_min": 0.0002136059592885431, "clip_ratio/region_mean": 0.004684685176471248, "epoch": 2.9332166812481772, "grad_norm": 0.12735240161418915, "learning_rate": 1e-06, "loss": -0.0259, "step": 1256 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0915178571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 4027.0, "completions/mean_length": 890.544677734375, "completions/mean_terminated_length": 567.6363525390625, "completions/min_length": 151.0, "completions/min_terminated_length": 151.0, "epoch": 2.935549722951298, "grad_norm": 0.32491815090179443, "learning_rate": 1e-06, "loss": -0.031, "num_tokens": 183080974.0, "reward": 0.5814732313156128, "reward_std": 0.1723892092704773, "rewards/verify_math_reward/mean": 0.5814732313156128, "rewards/verify_math_reward/std": 0.4935929775238037, "step": 1257 }, { "clip_ratio/high_max": 0.0036223906063241884, "clip_ratio/high_mean": 0.0014371380166267045, "clip_ratio/low_mean": 0.0009037054569489555, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002340843486308586, "epoch": 2.937882764654418, "grad_norm": 0.20913903415203094, "learning_rate": 1e-06, "loss": -0.0312, "step": 1258 }, { "clip_ratio/high_max": 0.004688035129220225, "clip_ratio/high_mean": 0.0017895227974804584, "clip_ratio/low_mean": 0.0015898529600235634, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.003379375768417958, "epoch": 2.9402158063575388, "grad_norm": 0.15108264982700348, "learning_rate": 1e-06, "loss": -0.0315, "step": 1259 }, { "clip_ratio/high_max": 0.005611968808807433, "clip_ratio/high_mean": 0.002175673234887654, "clip_ratio/low_mean": 0.002075761651212815, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.004251434889738448, "epoch": 2.942548848060659, "grad_norm": 0.11977625638246536, "learning_rate": 1e-06, "loss": -0.0318, "step": 1260 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0725446428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 2118.0, "completions/mean_length": 812.8594360351562, "completions/mean_terminated_length": 556.0553588867188, "completions/min_length": 168.0, "completions/min_terminated_length": 168.0, "epoch": 2.9448818897637796, "grad_norm": 0.34086668491363525, "learning_rate": 1e-06, "loss": -0.0235, "num_tokens": 183638128.0, "reward": 0.5770089626312256, "reward_std": 0.16923318803310394, "rewards/verify_math_reward/mean": 0.5770089030265808, "rewards/verify_math_reward/std": 0.4943099319934845, "step": 1261 }, { "clip_ratio/high_max": 0.0041179699983331375, "clip_ratio/high_mean": 0.0013291670220496599, "clip_ratio/low_mean": 0.001343052885204088, "clip_ratio/low_min": 1.504573901911499e-05, "clip_ratio/region_mean": 0.0026722198963398114, "epoch": 2.9472149314669, "grad_norm": 0.24882672727108002, "learning_rate": 1e-06, "loss": -0.0237, "step": 1262 }, { "clip_ratio/high_max": 0.005144611917785369, "clip_ratio/high_mean": 0.0017332870593236294, "clip_ratio/low_mean": 0.00205996592194424, "clip_ratio/low_min": 9.56242365646176e-05, "clip_ratio/region_mean": 0.0037932530103717, "epoch": 2.9495479731700205, "grad_norm": 0.16923284530639648, "learning_rate": 1e-06, "loss": -0.0241, "step": 1263 }, { "clip_ratio/high_max": 0.006567107746377587, "clip_ratio/high_mean": 0.002158662347937934, "clip_ratio/low_mean": 0.0027636711311060935, "clip_ratio/low_min": 0.00011474908387754112, "clip_ratio/region_mean": 0.004922333420836367, "epoch": 2.9518810148731407, "grad_norm": 0.4389563202857971, "learning_rate": 1e-06, "loss": -0.0243, "step": 1264 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0747767857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3179.0, "completions/mean_length": 877.4654541015625, "completions/mean_terminated_length": 617.3425903320312, "completions/min_length": 151.0, "completions/min_terminated_length": 151.0, "epoch": 2.9542140565762613, "grad_norm": 0.31129270792007446, "learning_rate": 1e-06, "loss": -0.0297, "num_tokens": 184237217.0, "reward": 0.578125, "reward_std": 0.1637117862701416, "rewards/verify_math_reward/mean": 0.578125, "rewards/verify_math_reward/std": 0.4941346049308777, "step": 1265 }, { "clip_ratio/high_max": 0.002561697387136519, "clip_ratio/high_mean": 0.0009978618727473076, "clip_ratio/low_mean": 0.001073142322638887, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002071004200843163, "epoch": 2.9565470982793816, "grad_norm": 0.20311911404132843, "learning_rate": 1e-06, "loss": -0.0298, "step": 1266 }, { "clip_ratio/high_max": 0.003802918115979992, "clip_ratio/high_mean": 0.001375231866404647, "clip_ratio/low_mean": 0.0014018906895216787, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0027771225868491456, "epoch": 2.958880139982502, "grad_norm": 0.14562973380088806, "learning_rate": 1e-06, "loss": -0.0301, "step": 1267 }, { "clip_ratio/high_max": 0.004407259446452372, "clip_ratio/high_mean": 0.0017240505549125373, "clip_ratio/low_mean": 0.0019773931308009196, "clip_ratio/low_min": 6.066783316782676e-05, "clip_ratio/region_mean": 0.003701443725731224, "epoch": 2.961213181685623, "grad_norm": 0.12381011992692947, "learning_rate": 1e-06, "loss": -0.0303, "step": 1268 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0870535714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 2149.0, "completions/mean_length": 842.3035888671875, "completions/mean_terminated_length": 532.0489501953125, "completions/min_length": 133.0, "completions/min_terminated_length": 133.0, "epoch": 2.963546223388743, "grad_norm": 0.3510003983974457, "learning_rate": 1e-06, "loss": -0.0514, "num_tokens": 184761585.0, "reward": 0.6026785969734192, "reward_std": 0.17844374477863312, "rewards/verify_math_reward/mean": 0.6026785969734192, "rewards/verify_math_reward/std": 0.48961687088012695, "step": 1269 }, { "clip_ratio/high_max": 0.0037762470601592213, "clip_ratio/high_mean": 0.0014730968250660226, "clip_ratio/low_mean": 0.0012278395788598573, "clip_ratio/low_min": 3.704801565618254e-05, "clip_ratio/region_mean": 0.00270093646395253, "epoch": 2.9658792650918633, "grad_norm": 0.25255659222602844, "learning_rate": 1e-06, "loss": -0.0515, "step": 1270 }, { "clip_ratio/high_max": 0.004274423510651104, "clip_ratio/high_mean": 0.0017491269572929014, "clip_ratio/low_mean": 0.001928929104906274, "clip_ratio/low_min": 6.174669397296384e-05, "clip_ratio/region_mean": 0.003678056033095345, "epoch": 2.968212306794984, "grad_norm": 0.15946036577224731, "learning_rate": 1e-06, "loss": -0.0519, "step": 1271 }, { "clip_ratio/high_max": 0.005358855138183571, "clip_ratio/high_mean": 0.0021388957829913124, "clip_ratio/low_mean": 0.002551702858909266, "clip_ratio/low_min": 9.630200656829402e-05, "clip_ratio/region_mean": 0.004690598536399193, "epoch": 2.9705453484981046, "grad_norm": 0.1383240669965744, "learning_rate": 1e-06, "loss": -0.0521, "step": 1272 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0959821428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3775.0, "completions/mean_length": 908.3582763671875, "completions/mean_terminated_length": 569.9172973632812, "completions/min_length": 136.0, "completions/min_terminated_length": 136.0, "epoch": 2.972878390201225, "grad_norm": 0.32730382680892944, "learning_rate": 1e-06, "loss": -0.0154, "num_tokens": 185313002.0, "reward": 0.5770089626312256, "reward_std": 0.1692010909318924, "rewards/verify_math_reward/mean": 0.5770089030265808, "rewards/verify_math_reward/std": 0.4943099319934845, "step": 1273 }, { "clip_ratio/high_max": 0.003224958825740032, "clip_ratio/high_mean": 0.0013082327295705909, "clip_ratio/low_mean": 0.0010734459065133706, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002381678714300506, "epoch": 2.9752114319043454, "grad_norm": 0.21008794009685516, "learning_rate": 1e-06, "loss": -0.0156, "step": 1274 }, { "clip_ratio/high_max": 0.004328435752540827, "clip_ratio/high_mean": 0.0017579489285708405, "clip_ratio/low_mean": 0.0016764302163210232, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0034343791339779273, "epoch": 2.9775444736074657, "grad_norm": 0.15935666859149933, "learning_rate": 1e-06, "loss": -0.0159, "step": 1275 }, { "clip_ratio/high_max": 0.005523221436305903, "clip_ratio/high_mean": 0.002152206168830162, "clip_ratio/low_mean": 0.0021203861069807317, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.004272592326742597, "epoch": 2.9798775153105863, "grad_norm": 0.11957625299692154, "learning_rate": 1e-06, "loss": -0.0162, "step": 1276 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3685.0, "completions/mean_length": 803.0123291015625, "completions/mean_terminated_length": 583.4797973632812, "completions/min_length": 178.0, "completions/min_terminated_length": 178.0, "epoch": 2.9822105570137065, "grad_norm": 0.3326095640659332, "learning_rate": 1e-06, "loss": -0.0149, "num_tokens": 185899829.0, "reward": 0.6183035969734192, "reward_std": 0.18058060109615326, "rewards/verify_math_reward/mean": 0.6183035969734192, "rewards/verify_math_reward/std": 0.4860740303993225, "step": 1277 }, { "clip_ratio/high_max": 0.0035458617567201145, "clip_ratio/high_mean": 0.0014172384326229803, "clip_ratio/low_mean": 0.0012984473287360743, "clip_ratio/low_min": 3.2946758437901735e-05, "clip_ratio/region_mean": 0.002715685768635012, "epoch": 2.984543598716827, "grad_norm": 0.23744042217731476, "learning_rate": 1e-06, "loss": -0.015, "step": 1278 }, { "clip_ratio/high_max": 0.004517386099905707, "clip_ratio/high_mean": 0.001789726404240355, "clip_ratio/low_mean": 0.002089250869175885, "clip_ratio/low_min": 6.589351687580347e-05, "clip_ratio/region_mean": 0.003878977193380706, "epoch": 2.9868766404199474, "grad_norm": 0.19358785450458527, "learning_rate": 1e-06, "loss": -0.0154, "step": 1279 }, { "clip_ratio/high_max": 0.005412775179138407, "clip_ratio/high_mean": 0.0021559614833677188, "clip_ratio/low_mean": 0.0026569639921945054, "clip_ratio/low_min": 4.94201376568526e-05, "clip_ratio/region_mean": 0.004812925442820415, "epoch": 2.989209682123068, "grad_norm": 0.14558716118335724, "learning_rate": 1e-06, "loss": -0.0157, "step": 1280 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0825892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3666.0, "completions/mean_length": 843.9754638671875, "completions/mean_terminated_length": 551.214111328125, "completions/min_length": 117.0, "completions/min_terminated_length": 117.0, "epoch": 2.9915427238261882, "grad_norm": 0.3646296560764313, "learning_rate": 1e-06, "loss": -0.0122, "num_tokens": 186457023.0, "reward": 0.59375, "reward_std": 0.16172830760478973, "rewards/verify_math_reward/mean": 0.59375, "rewards/verify_math_reward/std": 0.4914066195487976, "step": 1281 }, { "clip_ratio/high_max": 0.003118972876109183, "clip_ratio/high_mean": 0.0011167291449964978, "clip_ratio/low_mean": 0.0011998026529909112, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0023165318052633666, "epoch": 2.993875765529309, "grad_norm": 0.2160804569721222, "learning_rate": 1e-06, "loss": -0.0123, "step": 1282 }, { "clip_ratio/high_max": 0.004510902872425504, "clip_ratio/high_mean": 0.0015314042611862533, "clip_ratio/low_mean": 0.001807251013815403, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0033386552677256986, "epoch": 2.9962088072324295, "grad_norm": 0.147269606590271, "learning_rate": 1e-06, "loss": -0.0127, "step": 1283 }, { "clip_ratio/high_max": 0.005252306655165739, "clip_ratio/high_mean": 0.0018510089939809404, "clip_ratio/low_mean": 0.002443813908030279, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.004294822880183347, "epoch": 2.9985418489355498, "grad_norm": 0.11825108528137207, "learning_rate": 1e-06, "loss": -0.0129, "step": 1284 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 2526.0, "completions/mean_length": 993.56591796875, "completions/mean_terminated_length": 642.8558959960938, "completions/min_length": 122.0, "completions/min_terminated_length": 122.0, "epoch": 3.0023330417031207, "grad_norm": 0.3235376179218292, "learning_rate": 1e-06, "loss": -0.033, "num_tokens": 187064810.0, "reward": 0.4888392984867096, "reward_std": 0.17423506081104279, "rewards/verify_math_reward/mean": 0.4888392984867096, "rewards/verify_math_reward/std": 0.5001546144485474, "step": 1285 }, { "clip_ratio/high_max": 0.003347896141349338, "clip_ratio/high_mean": 0.0013063332662568428, "clip_ratio/low_mean": 0.0010055549500975758, "clip_ratio/low_min": 1.33832973006065e-05, "clip_ratio/region_mean": 0.0023118882163544185, "epoch": 3.004666083406241, "grad_norm": 0.21671877801418304, "learning_rate": 1e-06, "loss": -0.0331, "step": 1286 }, { "clip_ratio/high_max": 0.004363463696790859, "clip_ratio/high_mean": 0.0017142184187832754, "clip_ratio/low_mean": 0.001571965922266827, "clip_ratio/low_min": 2.6766594601213e-05, "clip_ratio/region_mean": 0.0032861843501450494, "epoch": 3.0069991251093615, "grad_norm": 0.14550215005874634, "learning_rate": 1e-06, "loss": -0.0335, "step": 1287 }, { "clip_ratio/high_max": 0.004968255481799133, "clip_ratio/high_mean": 0.0020306847072788514, "clip_ratio/low_mean": 0.0021761699754279107, "clip_ratio/low_min": 6.69164874125272e-05, "clip_ratio/region_mean": 0.004206854660878889, "epoch": 3.0093321668124817, "grad_norm": 0.11264042556285858, "learning_rate": 1e-06, "loss": -0.0338, "step": 1288 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0792410714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 4090.0, "completions/mean_length": 885.69091796875, "completions/mean_terminated_length": 609.4097290039062, "completions/min_length": 95.0, "completions/min_terminated_length": 95.0, "epoch": 3.0116652085156024, "grad_norm": 0.27951478958129883, "learning_rate": 1e-06, "loss": -0.044, "num_tokens": 187650069.0, "reward": 0.598214328289032, "reward_std": 0.17325684428215027, "rewards/verify_math_reward/mean": 0.5982142686843872, "rewards/verify_math_reward/std": 0.49053287506103516, "step": 1289 }, { "clip_ratio/high_max": 0.0028348319610813633, "clip_ratio/high_mean": 0.0011417440728109796, "clip_ratio/low_mean": 0.0009080118143174332, "clip_ratio/low_min": 3.001123604917666e-05, "clip_ratio/region_mean": 0.0020497558871284127, "epoch": 3.0139982502187226, "grad_norm": 0.18120995163917542, "learning_rate": 1e-06, "loss": -0.044, "step": 1290 }, { "clip_ratio/high_max": 0.003840992707409896, "clip_ratio/high_mean": 0.0014871199127810542, "clip_ratio/low_mean": 0.00142103423786466, "clip_ratio/low_min": 8.654556222609244e-05, "clip_ratio/region_mean": 0.0029081541433697566, "epoch": 3.0163312919218432, "grad_norm": 0.1384991705417633, "learning_rate": 1e-06, "loss": -0.0444, "step": 1291 }, { "clip_ratio/high_max": 0.0043859604629687965, "clip_ratio/high_mean": 0.0017995306552620605, "clip_ratio/low_mean": 0.0019162701501045376, "clip_ratio/low_min": 0.0001328603466390632, "clip_ratio/region_mean": 0.0037158007835387252, "epoch": 3.0186643336249634, "grad_norm": 0.10875407606363297, "learning_rate": 1e-06, "loss": -0.0445, "step": 1292 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0613839285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 2343.0, "completions/mean_length": 784.6116333007812, "completions/mean_terminated_length": 568.0523071289062, "completions/min_length": 142.0, "completions/min_terminated_length": 142.0, "epoch": 3.020997375328084, "grad_norm": 0.2986029386520386, "learning_rate": 1e-06, "loss": -0.024, "num_tokens": 188217465.0, "reward": 0.5535714626312256, "reward_std": 0.1493610143661499, "rewards/verify_math_reward/mean": 0.5535714030265808, "rewards/verify_math_reward/std": 0.4973994791507721, "step": 1293 }, { "clip_ratio/high_max": 0.002566226619819645, "clip_ratio/high_mean": 0.0009442674127058126, "clip_ratio/low_mean": 0.0009327176467195386, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001876985053968383, "epoch": 3.0233304170312043, "grad_norm": 0.17231304943561554, "learning_rate": 1e-06, "loss": -0.0241, "step": 1294 }, { "clip_ratio/high_max": 0.003394087732885964, "clip_ratio/high_mean": 0.001184509186714422, "clip_ratio/low_mean": 0.0013050622910668608, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0024895715250750072, "epoch": 3.025663458734325, "grad_norm": 0.13227148354053497, "learning_rate": 1e-06, "loss": -0.0244, "step": 1295 }, { "clip_ratio/high_max": 0.003962675960792694, "clip_ratio/high_mean": 0.0014485803294519428, "clip_ratio/low_mean": 0.0018191339768236503, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0032677143317414448, "epoch": 3.027996500437445, "grad_norm": 0.10773932933807373, "learning_rate": 1e-06, "loss": -0.0246, "step": 1296 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0691964285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 2576.0, "completions/mean_length": 807.896240234375, "completions/mean_terminated_length": 563.4568481445312, "completions/min_length": 120.0, "completions/min_terminated_length": 120.0, "epoch": 3.030329542140566, "grad_norm": 0.2933730185031891, "learning_rate": 1e-06, "loss": -0.0255, "num_tokens": 188777980.0, "reward": 0.5970982313156128, "reward_std": 0.1582634449005127, "rewards/verify_math_reward/mean": 0.5970982313156128, "rewards/verify_math_reward/std": 0.4907552897930145, "step": 1297 }, { "clip_ratio/high_max": 0.0028347538609523326, "clip_ratio/high_mean": 0.0011242183591093635, "clip_ratio/low_mean": 0.0008396432203880977, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001963861592230387, "epoch": 3.032662583843686, "grad_norm": 0.20799927413463593, "learning_rate": 1e-06, "loss": -0.0255, "step": 1298 }, { "clip_ratio/high_max": 0.003585598540666979, "clip_ratio/high_mean": 0.0014342015874717617, "clip_ratio/low_mean": 0.0013248831510281889, "clip_ratio/low_min": 1.2462612176022958e-05, "clip_ratio/region_mean": 0.0027590847166720778, "epoch": 3.0349956255468067, "grad_norm": 0.1440235823392868, "learning_rate": 1e-06, "loss": -0.0259, "step": 1299 }, { "clip_ratio/high_max": 0.004522770534094889, "clip_ratio/high_mean": 0.0017792403159546666, "clip_ratio/low_mean": 0.0018176820194639731, "clip_ratio/low_min": 4.985044870409183e-05, "clip_ratio/region_mean": 0.003596922368160449, "epoch": 3.037328667249927, "grad_norm": 0.10914633423089981, "learning_rate": 1e-06, "loss": -0.0261, "step": 1300 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0613839285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3658.0, "completions/mean_length": 764.5145263671875, "completions/mean_terminated_length": 546.640869140625, "completions/min_length": 148.0, "completions/min_terminated_length": 148.0, "epoch": 3.0396617089530475, "grad_norm": 0.31581422686576843, "learning_rate": 1e-06, "loss": 0.0029, "num_tokens": 189317969.0, "reward": 0.6618303656578064, "reward_std": 0.14060832560062408, "rewards/verify_math_reward/mean": 0.6618303656578064, "rewards/verify_math_reward/std": 0.4733508229255676, "step": 1301 }, { "clip_ratio/high_max": 0.0030436955494224094, "clip_ratio/high_mean": 0.001129852968006162, "clip_ratio/low_mean": 0.0009551413340886938, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002084994313918287, "epoch": 3.041994750656168, "grad_norm": 0.19620144367218018, "learning_rate": 1e-06, "loss": 0.0027, "step": 1302 }, { "clip_ratio/high_max": 0.00373860755644273, "clip_ratio/high_mean": 0.0015050145466375398, "clip_ratio/low_mean": 0.0014064186161704129, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0029114332210156135, "epoch": 3.0443277923592884, "grad_norm": 0.13803702592849731, "learning_rate": 1e-06, "loss": 0.0024, "step": 1303 }, { "clip_ratio/high_max": 0.0044464999955380335, "clip_ratio/high_mean": 0.0018024718738161027, "clip_ratio/low_mean": 0.001904217858282209, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.003706689807586372, "epoch": 3.046660834062409, "grad_norm": 0.10832430422306061, "learning_rate": 1e-06, "loss": 0.0022, "step": 1304 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0747767857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3877.0, "completions/mean_length": 873.6127319335938, "completions/mean_terminated_length": 613.1785278320312, "completions/min_length": 163.0, "completions/min_terminated_length": 163.0, "epoch": 3.0489938757655293, "grad_norm": 0.30333733558654785, "learning_rate": 1e-06, "loss": -0.0342, "num_tokens": 189918446.0, "reward": 0.5870535969734192, "reward_std": 0.14985068142414093, "rewards/verify_math_reward/mean": 0.5870535969734192, "rewards/verify_math_reward/std": 0.49263837933540344, "step": 1305 }, { "clip_ratio/high_max": 0.0025459287789999507, "clip_ratio/high_mean": 0.0011344791728333803, "clip_ratio/low_mean": 0.0009002292972581927, "clip_ratio/low_min": 2.2999080101726577e-05, "clip_ratio/region_mean": 0.002034708439168753, "epoch": 3.05132691746865, "grad_norm": 0.1892513483762741, "learning_rate": 1e-06, "loss": -0.0344, "step": 1306 }, { "clip_ratio/high_max": 0.0035360960391699336, "clip_ratio/high_mean": 0.0015167588117037667, "clip_ratio/low_mean": 0.0012873805171693675, "clip_ratio/low_min": 1.6765021428000182e-05, "clip_ratio/region_mean": 0.002804139323416166, "epoch": 3.05365995917177, "grad_norm": 0.14323550462722778, "learning_rate": 1e-06, "loss": -0.0346, "step": 1307 }, { "clip_ratio/high_max": 0.0043331809574738145, "clip_ratio/high_mean": 0.0017947711639862973, "clip_ratio/low_mean": 0.0017718231028993614, "clip_ratio/low_min": 4.599816020345315e-05, "clip_ratio/region_mean": 0.0035665942195919342, "epoch": 3.055993000874891, "grad_norm": 0.1408172845840454, "learning_rate": 1e-06, "loss": -0.0348, "step": 1308 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1205357142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3732.0, "completions/mean_length": 981.5982666015625, "completions/mean_terminated_length": 554.7512817382812, "completions/min_length": 155.0, "completions/min_terminated_length": 155.0, "epoch": 3.058326042578011, "grad_norm": 0.3233025372028351, "learning_rate": 1e-06, "loss": -0.0512, "num_tokens": 190448342.0, "reward": 0.5770089626312256, "reward_std": 0.13079974055290222, "rewards/verify_math_reward/mean": 0.5770089030265808, "rewards/verify_math_reward/std": 0.4943099319934845, "step": 1309 }, { "clip_ratio/high_max": 0.00351512309134705, "clip_ratio/high_mean": 0.0011678094633680303, "clip_ratio/low_mean": 0.0007460702490789117, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0019138796778861433, "epoch": 3.0606590842811316, "grad_norm": 0.19027425348758698, "learning_rate": 1e-06, "loss": -0.0513, "step": 1310 }, { "clip_ratio/high_max": 0.004973391653038561, "clip_ratio/high_mean": 0.0016260582451650407, "clip_ratio/low_mean": 0.001142242696005269, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0027683009684551507, "epoch": 3.062992125984252, "grad_norm": 0.13113045692443848, "learning_rate": 1e-06, "loss": -0.0516, "step": 1311 }, { "clip_ratio/high_max": 0.005662573283188976, "clip_ratio/high_mean": 0.0018825908700819127, "clip_ratio/low_mean": 0.0015145689721975941, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0033971597949857824, "epoch": 3.0653251676873725, "grad_norm": 0.11086880415678024, "learning_rate": 1e-06, "loss": -0.0518, "step": 1312 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.078125, "completions/max_length": 4096.0, "completions/max_terminated_length": 2603.0, "completions/mean_length": 814.7756958007812, "completions/mean_terminated_length": 536.705810546875, "completions/min_length": 145.0, "completions/min_terminated_length": 145.0, "epoch": 3.0676582093904927, "grad_norm": 0.308420866727829, "learning_rate": 1e-06, "loss": -0.0298, "num_tokens": 190982469.0, "reward": 0.637276828289032, "reward_std": 0.14777731895446777, "rewards/verify_math_reward/mean": 0.6372767686843872, "rewards/verify_math_reward/std": 0.481054425239563, "step": 1313 }, { "clip_ratio/high_max": 0.003531349793775007, "clip_ratio/high_mean": 0.0012903727474622428, "clip_ratio/low_mean": 0.0009310632267442998, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0022214359778445214, "epoch": 3.0699912510936134, "grad_norm": 0.29667773842811584, "learning_rate": 1e-06, "loss": -0.0299, "step": 1314 }, { "clip_ratio/high_max": 0.003660500718979165, "clip_ratio/high_mean": 0.0015936737472657114, "clip_ratio/low_mean": 0.0014559493947672308, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.003049623112019617, "epoch": 3.0723242927967336, "grad_norm": 0.16481493413448334, "learning_rate": 1e-06, "loss": -0.0302, "step": 1315 }, { "clip_ratio/high_max": 0.00463547176332213, "clip_ratio/high_mean": 0.0019562412780942395, "clip_ratio/low_mean": 0.0019980457982455846, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0039542870508739725, "epoch": 3.0746573344998542, "grad_norm": 0.149788498878479, "learning_rate": 1e-06, "loss": -0.0304, "step": 1316 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0736607142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 2418.0, "completions/mean_length": 853.8795166015625, "completions/mean_terminated_length": 596.0723266601562, "completions/min_length": 144.0, "completions/min_terminated_length": 144.0, "epoch": 3.0769903762029744, "grad_norm": 0.3489627242088318, "learning_rate": 1e-06, "loss": -0.0276, "num_tokens": 191572945.0, "reward": 0.5323660969734192, "reward_std": 0.17953890562057495, "rewards/verify_math_reward/mean": 0.5323660969734192, "rewards/verify_math_reward/std": 0.4992299973964691, "step": 1317 }, { "clip_ratio/high_max": 0.0034274694189662114, "clip_ratio/high_mean": 0.001396500701957848, "clip_ratio/low_mean": 0.0010530337131058332, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0024495344114257023, "epoch": 3.079323417906095, "grad_norm": 0.3230558931827545, "learning_rate": 1e-06, "loss": -0.0277, "step": 1318 }, { "clip_ratio/high_max": 0.004044176661409438, "clip_ratio/high_mean": 0.0016798571195977274, "clip_ratio/low_mean": 0.0017272685872740112, "clip_ratio/low_min": 3.356605884619057e-05, "clip_ratio/region_mean": 0.0034071255940943956, "epoch": 3.0816564596092153, "grad_norm": 0.19693563878536224, "learning_rate": 1e-06, "loss": -0.0281, "step": 1319 }, { "clip_ratio/high_max": 0.0049320129546686076, "clip_ratio/high_mean": 0.0020905789897369687, "clip_ratio/low_mean": 0.002328898357518483, "clip_ratio/low_min": 3.356605884619057e-05, "clip_ratio/region_mean": 0.004419477379997261, "epoch": 3.083989501312336, "grad_norm": 0.16925373673439026, "learning_rate": 1e-06, "loss": -0.0284, "step": 1320 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0647321428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3710.0, "completions/mean_length": 775.443115234375, "completions/mean_terminated_length": 545.6193237304688, "completions/min_length": 127.0, "completions/min_terminated_length": 127.0, "epoch": 3.0863225430154566, "grad_norm": 0.4020577371120453, "learning_rate": 1e-06, "loss": -0.0312, "num_tokens": 192116950.0, "reward": 0.6707589626312256, "reward_std": 0.1665322184562683, "rewards/verify_math_reward/mean": 0.6707589030265808, "rewards/verify_math_reward/std": 0.4702001214027405, "step": 1321 }, { "clip_ratio/high_max": 0.006342264219711069, "clip_ratio/high_mean": 0.0019260492190369405, "clip_ratio/low_mean": 0.001029795080285112, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002955844385724049, "epoch": 3.088655584718577, "grad_norm": 0.2245895117521286, "learning_rate": 1e-06, "loss": -0.0314, "step": 1322 }, { "clip_ratio/high_max": 0.008194772861315869, "clip_ratio/high_mean": 0.002484952776285354, "clip_ratio/low_mean": 0.0015532885627180804, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.004038241284433752, "epoch": 3.0909886264216975, "grad_norm": 0.1628023236989975, "learning_rate": 1e-06, "loss": -0.0318, "step": 1323 }, { "clip_ratio/high_max": 0.009211260752636008, "clip_ratio/high_mean": 0.0028014933450322133, "clip_ratio/low_mean": 0.002071933715342311, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.004873427053098567, "epoch": 3.0933216681248177, "grad_norm": 0.12150697410106659, "learning_rate": 1e-06, "loss": -0.032, "step": 1324 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0591517857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3472.0, "completions/mean_length": 764.9074096679688, "completions/mean_terminated_length": 555.479248046875, "completions/min_length": 127.0, "completions/min_terminated_length": 127.0, "epoch": 3.0956547098279383, "grad_norm": 0.28108730912208557, "learning_rate": 1e-06, "loss": -0.0223, "num_tokens": 192672435.0, "reward": 0.6618303656578064, "reward_std": 0.12058320641517639, "rewards/verify_math_reward/mean": 0.6618303656578064, "rewards/verify_math_reward/std": 0.4733508229255676, "step": 1325 }, { "clip_ratio/high_max": 0.0028635841081268154, "clip_ratio/high_mean": 0.0009906221348501276, "clip_ratio/low_mean": 0.0006775758361072803, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0016681979614077136, "epoch": 3.0979877515310585, "grad_norm": 0.16088968515396118, "learning_rate": 1e-06, "loss": -0.0224, "step": 1326 }, { "clip_ratio/high_max": 0.003477334976196289, "clip_ratio/high_mean": 0.0011994206324743573, "clip_ratio/low_mean": 0.0010375597375968937, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0022369803409674205, "epoch": 3.100320793234179, "grad_norm": 0.12072911113500595, "learning_rate": 1e-06, "loss": -0.0226, "step": 1327 }, { "clip_ratio/high_max": 0.00389429391361773, "clip_ratio/high_mean": 0.0013570113624155056, "clip_ratio/low_mean": 0.001396839050357812, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0027538503927644342, "epoch": 3.1026538349372994, "grad_norm": 0.09798149019479752, "learning_rate": 1e-06, "loss": -0.0228, "step": 1328 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0535714285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3601.0, "completions/mean_length": 717.263427734375, "completions/mean_terminated_length": 526.01416015625, "completions/min_length": 155.0, "completions/min_terminated_length": 155.0, "epoch": 3.10498687664042, "grad_norm": 0.2809072434902191, "learning_rate": 1e-06, "loss": -0.0168, "num_tokens": 193213823.0, "reward": 0.6517857313156128, "reward_std": 0.12403164803981781, "rewards/verify_math_reward/mean": 0.6517857313156128, "rewards/verify_math_reward/std": 0.47667041420936584, "step": 1329 }, { "clip_ratio/high_max": 0.002934366955742007, "clip_ratio/high_mean": 0.0009227501068380661, "clip_ratio/low_mean": 0.0008383680351471412, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0017611181592656067, "epoch": 3.1073199183435403, "grad_norm": 0.23173615336418152, "learning_rate": 1e-06, "loss": -0.0169, "step": 1330 }, { "clip_ratio/high_max": 0.003883779121679254, "clip_ratio/high_mean": 0.001245880892383866, "clip_ratio/low_mean": 0.0012814350438929978, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0025273159226344433, "epoch": 3.109652960046661, "grad_norm": 0.13678884506225586, "learning_rate": 1e-06, "loss": -0.0172, "step": 1331 }, { "clip_ratio/high_max": 0.0043861636222573, "clip_ratio/high_mean": 0.0014671811186417472, "clip_ratio/low_mean": 0.0016659919679113955, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0031331730606325436, "epoch": 3.111986001749781, "grad_norm": 0.10945151001214981, "learning_rate": 1e-06, "loss": -0.0173, "step": 1332 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.046875, "completions/max_length": 4096.0, "completions/max_terminated_length": 3400.0, "completions/mean_length": 769.5748291015625, "completions/mean_terminated_length": 605.9800415039062, "completions/min_length": 158.0, "completions/min_terminated_length": 158.0, "epoch": 3.114319043452902, "grad_norm": 0.3080281913280487, "learning_rate": 1e-06, "loss": -0.0177, "num_tokens": 193832162.0, "reward": 0.5915178656578064, "reward_std": 0.16427844762802124, "rewards/verify_math_reward/mean": 0.5915178656578064, "rewards/verify_math_reward/std": 0.49182769656181335, "step": 1333 }, { "clip_ratio/high_max": 0.003496193719911389, "clip_ratio/high_mean": 0.001364412304610596, "clip_ratio/low_mean": 0.0009881848554869066, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0023525971482740715, "epoch": 3.116652085156022, "grad_norm": 0.24993965029716492, "learning_rate": 1e-06, "loss": -0.0177, "step": 1334 }, { "clip_ratio/high_max": 0.004484617224079557, "clip_ratio/high_mean": 0.0017437187561881728, "clip_ratio/low_mean": 0.00147363840142134, "clip_ratio/low_min": 1.5715362678747624e-05, "clip_ratio/region_mean": 0.003217357152607292, "epoch": 3.1189851268591426, "grad_norm": 0.533083975315094, "learning_rate": 1e-06, "loss": -0.018, "step": 1335 }, { "clip_ratio/high_max": 0.005386135206208564, "clip_ratio/high_mean": 0.0020800697348022368, "clip_ratio/low_mean": 0.0019994208987554885, "clip_ratio/low_min": 3.143072535749525e-05, "clip_ratio/region_mean": 0.00407949069631286, "epoch": 3.121318168562263, "grad_norm": 0.2689291536808014, "learning_rate": 1e-06, "loss": -0.0183, "step": 1336 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0558035714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3352.0, "completions/mean_length": 725.2299194335938, "completions/mean_terminated_length": 526.0118408203125, "completions/min_length": 164.0, "completions/min_terminated_length": 164.0, "epoch": 3.1236512102653835, "grad_norm": 0.3273763060569763, "learning_rate": 1e-06, "loss": -0.0372, "num_tokens": 194358712.0, "reward": 0.6741071939468384, "reward_std": 0.15849420428276062, "rewards/verify_math_reward/mean": 0.6741071343421936, "rewards/verify_math_reward/std": 0.4689692556858063, "step": 1337 }, { "clip_ratio/high_max": 0.0033786642761697294, "clip_ratio/high_mean": 0.0012114479818592372, "clip_ratio/low_mean": 0.0011333897564327344, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002344837732380256, "epoch": 3.1259842519685037, "grad_norm": 0.22608540952205658, "learning_rate": 1e-06, "loss": -0.0372, "step": 1338 }, { "clip_ratio/high_max": 0.004101422637177166, "clip_ratio/high_mean": 0.001552664971768536, "clip_ratio/low_mean": 0.0017679704978945665, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.003320635463751387, "epoch": 3.1283172936716244, "grad_norm": 0.1571611911058426, "learning_rate": 1e-06, "loss": -0.0376, "step": 1339 }, { "clip_ratio/high_max": 0.004942597912304336, "clip_ratio/high_mean": 0.001896972801660013, "clip_ratio/low_mean": 0.0022597504903387744, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.004156723283813335, "epoch": 3.130650335374745, "grad_norm": 0.1424572765827179, "learning_rate": 1e-06, "loss": -0.0378, "step": 1340 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0647321428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3003.0, "completions/mean_length": 815.9944458007812, "completions/mean_terminated_length": 588.9773559570312, "completions/min_length": 168.0, "completions/min_terminated_length": 168.0, "epoch": 3.1329833770778652, "grad_norm": 0.3142790198326111, "learning_rate": 1e-06, "loss": -0.0239, "num_tokens": 194942611.0, "reward": 0.5870535969734192, "reward_std": 0.1738220751285553, "rewards/verify_math_reward/mean": 0.5870535969734192, "rewards/verify_math_reward/std": 0.49263834953308105, "step": 1341 }, { "clip_ratio/high_max": 0.0027846384364238475, "clip_ratio/high_mean": 0.0011054652168240864, "clip_ratio/low_mean": 0.0011949132313020527, "clip_ratio/low_min": 7.493981502193492e-05, "clip_ratio/region_mean": 0.002300378466316033, "epoch": 3.135316418780986, "grad_norm": 0.22868481278419495, "learning_rate": 1e-06, "loss": -0.024, "step": 1342 }, { "clip_ratio/high_max": 0.0035944798510172404, "clip_ratio/high_mean": 0.001510964870249154, "clip_ratio/low_mean": 0.0017316992598352954, "clip_ratio/low_min": 6.598970685445238e-05, "clip_ratio/region_mean": 0.00324266409734264, "epoch": 3.137649460484106, "grad_norm": 0.1427541822195053, "learning_rate": 1e-06, "loss": -0.0243, "step": 1343 }, { "clip_ratio/high_max": 0.004253444807545748, "clip_ratio/high_mean": 0.0017797188957047183, "clip_ratio/low_mean": 0.0023530904945801012, "clip_ratio/low_min": 0.00013901395868742839, "clip_ratio/region_mean": 0.004132809321163222, "epoch": 3.1399825021872267, "grad_norm": 0.11625955253839493, "learning_rate": 1e-06, "loss": -0.0245, "step": 1344 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0502232142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3479.0, "completions/mean_length": 724.5870971679688, "completions/mean_terminated_length": 546.3102416992188, "completions/min_length": 153.0, "completions/min_terminated_length": 153.0, "epoch": 3.142315543890347, "grad_norm": 0.34080374240875244, "learning_rate": 1e-06, "loss": -0.0209, "num_tokens": 195494825.0, "reward": 0.6852678656578064, "reward_std": 0.15282267332077026, "rewards/verify_math_reward/mean": 0.6852678656578064, "rewards/verify_math_reward/std": 0.4646684527397156, "step": 1345 }, { "clip_ratio/high_max": 0.003194547156454064, "clip_ratio/high_mean": 0.001189547674584901, "clip_ratio/low_mean": 0.0010994807926181238, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0022890284744789824, "epoch": 3.1446485855934676, "grad_norm": 0.7207558751106262, "learning_rate": 1e-06, "loss": -0.0209, "step": 1346 }, { "clip_ratio/high_max": 0.0036659339821198955, "clip_ratio/high_mean": 0.001495066397183109, "clip_ratio/low_mean": 0.0015924110848573036, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0030874775475240313, "epoch": 3.146981627296588, "grad_norm": 0.22017349302768707, "learning_rate": 1e-06, "loss": -0.0212, "step": 1347 }, { "clip_ratio/high_max": 0.00442534415924456, "clip_ratio/high_mean": 0.0017140406707767397, "clip_ratio/low_mean": 0.0020087938828510232, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0037228345463518053, "epoch": 3.1493146689997085, "grad_norm": 0.15542852878570557, "learning_rate": 1e-06, "loss": -0.0213, "step": 1348 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1383928571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 2530.0, "completions/mean_length": 1121.6741943359375, "completions/mean_terminated_length": 643.9326171875, "completions/min_length": 110.0, "completions/min_terminated_length": 110.0, "epoch": 3.1516477107028287, "grad_norm": 0.3724318742752075, "learning_rate": 1e-06, "loss": -0.0675, "num_tokens": 196106525.0, "reward": 0.4665178656578064, "reward_std": 0.18768569827079773, "rewards/verify_math_reward/mean": 0.4665178656578064, "rewards/verify_math_reward/std": 0.49915629625320435, "step": 1349 }, { "clip_ratio/high_max": 0.0037856314520468004, "clip_ratio/high_mean": 0.0016181451064767316, "clip_ratio/low_mean": 0.0012675985090027098, "clip_ratio/low_min": 4.2312763071095105e-05, "clip_ratio/region_mean": 0.0028857436263933778, "epoch": 3.1539807524059493, "grad_norm": 0.24437792599201202, "learning_rate": 1e-06, "loss": -0.0676, "step": 1350 }, { "clip_ratio/high_max": 0.004996480347472243, "clip_ratio/high_mean": 0.0020778304533450864, "clip_ratio/low_mean": 0.001729010426060995, "clip_ratio/low_min": 9.641976066632196e-05, "clip_ratio/region_mean": 0.0038068409194238484, "epoch": 3.1563137941090695, "grad_norm": 0.250063419342041, "learning_rate": 1e-06, "loss": -0.068, "step": 1351 }, { "clip_ratio/high_max": 0.0056334636465180665, "clip_ratio/high_mean": 0.002439655439957278, "clip_ratio/low_mean": 0.002318218135769712, "clip_ratio/low_min": 9.100221723201685e-05, "clip_ratio/region_mean": 0.004757873612106778, "epoch": 3.15864683581219, "grad_norm": 0.14646685123443604, "learning_rate": 1e-06, "loss": -0.0683, "step": 1352 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.049107142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 2929.0, "completions/mean_length": 766.5078735351562, "completions/mean_terminated_length": 594.5621948242188, "completions/min_length": 147.0, "completions/min_terminated_length": 147.0, "epoch": 3.1609798775153104, "grad_norm": 0.28004342317581177, "learning_rate": 1e-06, "loss": -0.0297, "num_tokens": 196705620.0, "reward": 0.6238839626312256, "reward_std": 0.14019393920898438, "rewards/verify_math_reward/mean": 0.6238839030265808, "rewards/verify_math_reward/std": 0.4846802353858948, "step": 1353 }, { "clip_ratio/high_max": 0.002768811049463693, "clip_ratio/high_mean": 0.001085884272470139, "clip_ratio/low_mean": 0.0009146504398813704, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0020005346959806047, "epoch": 3.163312919218431, "grad_norm": 0.19815340638160706, "learning_rate": 1e-06, "loss": -0.0298, "step": 1354 }, { "clip_ratio/high_max": 0.0035544572747312486, "clip_ratio/high_mean": 0.0014132764172245516, "clip_ratio/low_mean": 0.0013671878223249223, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0027804641795228235, "epoch": 3.1656459609215517, "grad_norm": 0.1399182379245758, "learning_rate": 1e-06, "loss": -0.0301, "step": 1355 }, { "clip_ratio/high_max": 0.004157399001996964, "clip_ratio/high_mean": 0.0016327423327311408, "clip_ratio/low_mean": 0.0018685515642573591, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0035012938460567966, "epoch": 3.167979002624672, "grad_norm": 0.10548589378595352, "learning_rate": 1e-06, "loss": -0.0303, "step": 1356 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.078125, "completions/max_length": 4096.0, "completions/max_terminated_length": 3245.0, "completions/mean_length": 844.4029541015625, "completions/mean_terminated_length": 568.8438720703125, "completions/min_length": 137.0, "completions/min_terminated_length": 137.0, "epoch": 3.1703120443277926, "grad_norm": 0.3227315843105316, "learning_rate": 1e-06, "loss": -0.0127, "num_tokens": 197260381.0, "reward": 0.6495535969734192, "reward_std": 0.15409007668495178, "rewards/verify_math_reward/mean": 0.6495535969734192, "rewards/verify_math_reward/std": 0.477376252412796, "step": 1357 }, { "clip_ratio/high_max": 0.002895631463616155, "clip_ratio/high_mean": 0.001038059604979935, "clip_ratio/low_mean": 0.001065221214958001, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0021032808435847983, "epoch": 3.1726450860309128, "grad_norm": 0.20992279052734375, "learning_rate": 1e-06, "loss": -0.0128, "step": 1358 }, { "clip_ratio/high_max": 0.003969629338826053, "clip_ratio/high_mean": 0.0014088470379647333, "clip_ratio/low_mean": 0.001592251366673736, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0030010983828105964, "epoch": 3.1749781277340334, "grad_norm": 0.1447216123342514, "learning_rate": 1e-06, "loss": -0.0131, "step": 1359 }, { "clip_ratio/high_max": 0.004560449917335063, "clip_ratio/high_mean": 0.0016285045421682298, "clip_ratio/low_mean": 0.0020669290315709077, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.003695433581015095, "epoch": 3.1773111694371536, "grad_norm": 0.12238189578056335, "learning_rate": 1e-06, "loss": -0.0133, "step": 1360 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0970982142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3177.0, "completions/mean_length": 933.1897583007812, "completions/mean_terminated_length": 593.060546875, "completions/min_length": 135.0, "completions/min_terminated_length": 135.0, "epoch": 3.1796442111402743, "grad_norm": 0.3557065725326538, "learning_rate": 1e-06, "loss": -0.0616, "num_tokens": 197822839.0, "reward": 0.590401828289032, "reward_std": 0.18919271230697632, "rewards/verify_math_reward/mean": 0.5904017686843872, "rewards/verify_math_reward/std": 0.49203425645828247, "step": 1361 }, { "clip_ratio/high_max": 0.005481453757965937, "clip_ratio/high_mean": 0.0017864424735307693, "clip_ratio/low_mean": 0.0011057568663090933, "clip_ratio/low_min": 4.295532562537119e-05, "clip_ratio/region_mean": 0.0028921993289259262, "epoch": 3.1819772528433945, "grad_norm": 0.20615904033184052, "learning_rate": 1e-06, "loss": -0.0617, "step": 1362 }, { "clip_ratio/high_max": 0.007055405672872439, "clip_ratio/high_mean": 0.0022905863079358824, "clip_ratio/low_mean": 0.0016182903636945412, "clip_ratio/low_min": 3.669994111987762e-05, "clip_ratio/region_mean": 0.003908876664354466, "epoch": 3.184310294546515, "grad_norm": 0.16670063138008118, "learning_rate": 1e-06, "loss": -0.0621, "step": 1363 }, { "clip_ratio/high_max": 0.007973649146151729, "clip_ratio/high_mean": 0.0026886569830821827, "clip_ratio/low_mean": 0.002161819335015025, "clip_ratio/low_min": 9.0492278104648e-05, "clip_ratio/region_mean": 0.00485047628171742, "epoch": 3.1866433362496354, "grad_norm": 0.1282145380973816, "learning_rate": 1e-06, "loss": -0.0623, "step": 1364 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0725446428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3433.0, "completions/mean_length": 901.7109985351562, "completions/mean_terminated_length": 651.8568115234375, "completions/min_length": 135.0, "completions/min_terminated_length": 135.0, "epoch": 3.188976377952756, "grad_norm": 0.3124520480632782, "learning_rate": 1e-06, "loss": -0.0172, "num_tokens": 198458004.0, "reward": 0.5613839626312256, "reward_std": 0.19448700547218323, "rewards/verify_math_reward/mean": 0.5613839030265808, "rewards/verify_math_reward/std": 0.496494859457016, "step": 1365 }, { "clip_ratio/high_max": 0.0032276794299832545, "clip_ratio/high_mean": 0.0013194877392379567, "clip_ratio/low_mean": 0.0011541293115442386, "clip_ratio/low_min": 2.8415548513294198e-05, "clip_ratio/region_mean": 0.0024736170671531, "epoch": 3.1913094196558762, "grad_norm": 0.2646138072013855, "learning_rate": 1e-06, "loss": -0.0172, "step": 1366 }, { "clip_ratio/high_max": 0.004418410382641014, "clip_ratio/high_mean": 0.00169579167777556, "clip_ratio/low_mean": 0.001586827049322892, "clip_ratio/low_min": 4.2623323679436e-05, "clip_ratio/region_mean": 0.003282618723460473, "epoch": 3.193642461358997, "grad_norm": 0.15246938169002533, "learning_rate": 1e-06, "loss": -0.0176, "step": 1367 }, { "clip_ratio/high_max": 0.0051454535278026015, "clip_ratio/high_mean": 0.0019998202842543833, "clip_ratio/low_mean": 0.002086849730403628, "clip_ratio/low_min": 5.6831097026588395e-05, "clip_ratio/region_mean": 0.004086670043761842, "epoch": 3.195975503062117, "grad_norm": 0.11949945986270905, "learning_rate": 1e-06, "loss": -0.0179, "step": 1368 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1026785714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 2421.0, "completions/mean_length": 922.989990234375, "completions/mean_terminated_length": 559.9091796875, "completions/min_length": 98.0, "completions/min_terminated_length": 98.0, "epoch": 3.1983085447652377, "grad_norm": 0.2877159118652344, "learning_rate": 1e-06, "loss": -0.0457, "num_tokens": 198994275.0, "reward": 0.6026785969734192, "reward_std": 0.1479288637638092, "rewards/verify_math_reward/mean": 0.6026785969734192, "rewards/verify_math_reward/std": 0.48961687088012695, "step": 1369 }, { "clip_ratio/high_max": 0.0030664305777463596, "clip_ratio/high_mean": 0.0010278064382873708, "clip_ratio/low_mean": 0.0008596889019827358, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0018874953566410113, "epoch": 3.200641586468358, "grad_norm": 0.3405218720436096, "learning_rate": 1e-06, "loss": -0.0457, "step": 1370 }, { "clip_ratio/high_max": 0.0040393521267105825, "clip_ratio/high_mean": 0.0013687296086573042, "clip_ratio/low_mean": 0.0014340775560413022, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0028028071828885004, "epoch": 3.2029746281714786, "grad_norm": 0.279525488615036, "learning_rate": 1e-06, "loss": -0.0461, "step": 1371 }, { "clip_ratio/high_max": 0.004902106687950436, "clip_ratio/high_mean": 0.0016658278836985119, "clip_ratio/low_mean": 0.0018290330372110475, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.003494860968203284, "epoch": 3.205307669874599, "grad_norm": 0.12011164426803589, "learning_rate": 1e-06, "loss": -0.0463, "step": 1372 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0747767857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 2489.0, "completions/mean_length": 844.2154541015625, "completions/mean_terminated_length": 581.4053344726562, "completions/min_length": 153.0, "completions/min_terminated_length": 153.0, "epoch": 3.2076407115777195, "grad_norm": 0.31882616877555847, "learning_rate": 1e-06, "loss": -0.0318, "num_tokens": 199564820.0, "reward": 0.5446428656578064, "reward_std": 0.12572482228279114, "rewards/verify_math_reward/mean": 0.5446428656578064, "rewards/verify_math_reward/std": 0.49828118085861206, "step": 1373 }, { "clip_ratio/high_max": 0.0031357479383586906, "clip_ratio/high_mean": 0.0010845147080544848, "clip_ratio/low_mean": 0.0009557826051604934, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0020402973023010418, "epoch": 3.20997375328084, "grad_norm": 0.24724893271923065, "learning_rate": 1e-06, "loss": -0.0319, "step": 1374 }, { "clip_ratio/high_max": 0.003972558755776845, "clip_ratio/high_mean": 0.0013608242552436423, "clip_ratio/low_mean": 0.0014136374957161024, "clip_ratio/low_min": 5.935422450420447e-05, "clip_ratio/region_mean": 0.0027744617327698506, "epoch": 3.2123067949839603, "grad_norm": 0.14517918229103088, "learning_rate": 1e-06, "loss": -0.0321, "step": 1375 }, { "clip_ratio/high_max": 0.004991469482774846, "clip_ratio/high_mean": 0.0017023847685777582, "clip_ratio/low_mean": 0.001928211260747048, "clip_ratio/low_min": 5.7612534874351695e-05, "clip_ratio/region_mean": 0.003630596023867838, "epoch": 3.214639836687081, "grad_norm": 0.11366204172372818, "learning_rate": 1e-06, "loss": -0.0323, "step": 1376 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0881696428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 2887.0, "completions/mean_length": 926.2344360351562, "completions/mean_terminated_length": 619.733154296875, "completions/min_length": 117.0, "completions/min_terminated_length": 117.0, "epoch": 3.216972878390201, "grad_norm": 0.3670353591442108, "learning_rate": 1e-06, "loss": -0.0187, "num_tokens": 200165926.0, "reward": 0.520089328289032, "reward_std": 0.17787852883338928, "rewards/verify_math_reward/mean": 0.5200892686843872, "rewards/verify_math_reward/std": 0.4998753070831299, "step": 1377 }, { "clip_ratio/high_max": 0.00304953216254944, "clip_ratio/high_mean": 0.0010785524427774362, "clip_ratio/low_mean": 0.0012442447914509103, "clip_ratio/low_min": 4.6912093239370733e-05, "clip_ratio/region_mean": 0.0023227972560562193, "epoch": 3.219305920093322, "grad_norm": 0.2190244495868683, "learning_rate": 1e-06, "loss": -0.0188, "step": 1378 }, { "clip_ratio/high_max": 0.004312402685172856, "clip_ratio/high_mean": 0.0014726780100318138, "clip_ratio/low_mean": 0.0019417062030697707, "clip_ratio/low_min": 4.441969940671697e-05, "clip_ratio/region_mean": 0.0034143841548939236, "epoch": 3.221638961796442, "grad_norm": 0.17341548204421997, "learning_rate": 1e-06, "loss": -0.0192, "step": 1379 }, { "clip_ratio/high_max": 0.004853286707657389, "clip_ratio/high_mean": 0.0017322526182397269, "clip_ratio/low_mean": 0.0025841287279035896, "clip_ratio/low_min": 0.0001008401668514125, "clip_ratio/region_mean": 0.004316381367971189, "epoch": 3.2239720034995627, "grad_norm": 0.11897829920053482, "learning_rate": 1e-06, "loss": -0.0194, "step": 1380 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0725446428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3173.0, "completions/mean_length": 816.8326416015625, "completions/mean_terminated_length": 560.33935546875, "completions/min_length": 151.0, "completions/min_terminated_length": 151.0, "epoch": 3.226305045202683, "grad_norm": 0.3449660837650299, "learning_rate": 1e-06, "loss": -0.01, "num_tokens": 200715520.0, "reward": 0.6439732313156128, "reward_std": 0.17220768332481384, "rewards/verify_math_reward/mean": 0.6439732313156128, "rewards/verify_math_reward/std": 0.47909072041511536, "step": 1381 }, { "clip_ratio/high_max": 0.0033710048301145434, "clip_ratio/high_mean": 0.0012184613951831125, "clip_ratio/low_mean": 0.0009569141730025876, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0021753756154794246, "epoch": 3.2286380869058036, "grad_norm": 2.029808759689331, "learning_rate": 1e-06, "loss": -0.0099, "step": 1382 }, { "clip_ratio/high_max": 0.004771912164869718, "clip_ratio/high_mean": 0.001614162862097146, "clip_ratio/low_mean": 0.0013728646263189148, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002987027466588188, "epoch": 3.2309711286089238, "grad_norm": 0.15824252367019653, "learning_rate": 1e-06, "loss": -0.0104, "step": 1383 }, { "clip_ratio/high_max": 0.005514569566003047, "clip_ratio/high_mean": 0.0019262970017734915, "clip_ratio/low_mean": 0.0018627353165356908, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.003789032271015458, "epoch": 3.2333041703120444, "grad_norm": 0.13315394520759583, "learning_rate": 1e-06, "loss": -0.0106, "step": 1384 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0736607142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 2376.0, "completions/mean_length": 837.2734985351562, "completions/mean_terminated_length": 578.1458129882812, "completions/min_length": 150.0, "completions/min_terminated_length": 150.0, "epoch": 3.2356372120151646, "grad_norm": 0.3086453974246979, "learning_rate": 1e-06, "loss": -0.0165, "num_tokens": 201286613.0, "reward": 0.5859375, "reward_std": 0.13891373574733734, "rewards/verify_math_reward/mean": 0.5859375, "rewards/verify_math_reward/std": 0.4928344786167145, "step": 1385 }, { "clip_ratio/high_max": 0.0024604897262179293, "clip_ratio/high_mean": 0.0009756838699104264, "clip_ratio/low_mean": 0.0008755841440688528, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0018512679962441325, "epoch": 3.2379702537182853, "grad_norm": 0.17661455273628235, "learning_rate": 1e-06, "loss": -0.0166, "step": 1386 }, { "clip_ratio/high_max": 0.0033661245979601517, "clip_ratio/high_mean": 0.0012950556483701803, "clip_ratio/low_mean": 0.001355676120510907, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0026507317961659282, "epoch": 3.2403032954214055, "grad_norm": 0.12449599802494049, "learning_rate": 1e-06, "loss": -0.0168, "step": 1387 }, { "clip_ratio/high_max": 0.0040850812729331665, "clip_ratio/high_mean": 0.0015817013663763646, "clip_ratio/low_mean": 0.0017979233307414688, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.003379624686203897, "epoch": 3.242636337124526, "grad_norm": 0.10471822321414948, "learning_rate": 1e-06, "loss": -0.017, "step": 1388 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0647321428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 2260.0, "completions/mean_length": 753.0223388671875, "completions/mean_terminated_length": 521.6467895507812, "completions/min_length": 81.0, "completions/min_terminated_length": 81.0, "epoch": 3.2449693788276464, "grad_norm": 0.36556413769721985, "learning_rate": 1e-06, "loss": -0.0522, "num_tokens": 201814953.0, "reward": 0.613839328289032, "reward_std": 0.17408309876918793, "rewards/verify_math_reward/mean": 0.6138392686843872, "rewards/verify_math_reward/std": 0.48714008927345276, "step": 1389 }, { "clip_ratio/high_max": 0.0031166108965408057, "clip_ratio/high_mean": 0.0012739974445139524, "clip_ratio/low_mean": 0.0009891142726701219, "clip_ratio/low_min": 6.630534517171327e-05, "clip_ratio/region_mean": 0.0022631117681157775, "epoch": 3.247302420530767, "grad_norm": 0.20463091135025024, "learning_rate": 1e-06, "loss": -0.0523, "step": 1390 }, { "clip_ratio/high_max": 0.0042093377851415426, "clip_ratio/high_mean": 0.0016955842183961067, "clip_ratio/low_mean": 0.001384314597089542, "clip_ratio/low_min": 5.586012593994383e-05, "clip_ratio/region_mean": 0.00307989890279714, "epoch": 3.249635462233887, "grad_norm": 0.14500582218170166, "learning_rate": 1e-06, "loss": -0.0526, "step": 1391 }, { "clip_ratio/high_max": 0.004939781210850924, "clip_ratio/high_mean": 0.0020117718013352714, "clip_ratio/low_mean": 0.0018955675113829784, "clip_ratio/low_min": 7.606944382132497e-05, "clip_ratio/region_mean": 0.003907339283614419, "epoch": 3.251968503937008, "grad_norm": 0.11729495972394943, "learning_rate": 1e-06, "loss": -0.0528, "step": 1392 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0982142857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3813.0, "completions/mean_length": 998.7969360351562, "completions/mean_terminated_length": 661.4777221679688, "completions/min_length": 97.0, "completions/min_terminated_length": 97.0, "epoch": 3.2543015456401285, "grad_norm": 0.3131501376628876, "learning_rate": 1e-06, "loss": -0.0275, "num_tokens": 202444275.0, "reward": 0.5100446939468384, "reward_std": 0.1349353790283203, "rewards/verify_math_reward/mean": 0.5100446343421936, "rewards/verify_math_reward/std": 0.5001782774925232, "step": 1393 }, { "clip_ratio/high_max": 0.0026005674080806784, "clip_ratio/high_mean": 0.0008751800514801289, "clip_ratio/low_mean": 0.0010547618494456401, "clip_ratio/low_min": 4.679895209847018e-05, "clip_ratio/region_mean": 0.0019299418636364862, "epoch": 3.2566345873432487, "grad_norm": 0.19200493395328522, "learning_rate": 1e-06, "loss": -0.0276, "step": 1394 }, { "clip_ratio/high_max": 0.003681696056446526, "clip_ratio/high_mean": 0.0011712057603290305, "clip_ratio/low_mean": 0.0014548215294780675, "clip_ratio/low_min": 8.924928260967135e-05, "clip_ratio/region_mean": 0.002626027293445077, "epoch": 3.2589676290463694, "grad_norm": 0.15075527131557465, "learning_rate": 1e-06, "loss": -0.0279, "step": 1395 }, { "clip_ratio/high_max": 0.00443665240163682, "clip_ratio/high_mean": 0.0014224598726286786, "clip_ratio/low_mean": 0.001955044515852933, "clip_ratio/low_min": 0.00011365702812327072, "clip_ratio/region_mean": 0.003377504413947463, "epoch": 3.2613006707494896, "grad_norm": 0.11321279406547546, "learning_rate": 1e-06, "loss": -0.0281, "step": 1396 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0892857142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3205.0, "completions/mean_length": 899.3873291015625, "completions/mean_terminated_length": 585.993896484375, "completions/min_length": 122.0, "completions/min_terminated_length": 122.0, "epoch": 3.2636337124526102, "grad_norm": 0.3386014699935913, "learning_rate": 1e-06, "loss": -0.0414, "num_tokens": 203014910.0, "reward": 0.5558035969734192, "reward_std": 0.15995845198631287, "rewards/verify_math_reward/mean": 0.5558035969734192, "rewards/verify_math_reward/std": 0.49715372920036316, "step": 1397 }, { "clip_ratio/high_max": 0.004114209703402594, "clip_ratio/high_mean": 0.0014585117787646595, "clip_ratio/low_mean": 0.0011556370845937636, "clip_ratio/low_min": 4.014129808638245e-05, "clip_ratio/region_mean": 0.0026141488342545927, "epoch": 3.2659667541557305, "grad_norm": 0.22558808326721191, "learning_rate": 1e-06, "loss": -0.0415, "step": 1398 }, { "clip_ratio/high_max": 0.004943598658428527, "clip_ratio/high_mean": 0.0018776509168674238, "clip_ratio/low_mean": 0.0016788302673376165, "clip_ratio/low_min": 5.4187617934076115e-05, "clip_ratio/region_mean": 0.00355648115510121, "epoch": 3.268299795858851, "grad_norm": 0.15932244062423706, "learning_rate": 1e-06, "loss": -0.0418, "step": 1399 }, { "clip_ratio/high_max": 0.005976288142846897, "clip_ratio/high_mean": 0.00227412031381391, "clip_ratio/low_mean": 0.002304413756064605, "clip_ratio/low_min": 5.4187617934076115e-05, "clip_ratio/region_mean": 0.004578534149914049, "epoch": 3.2706328375619713, "grad_norm": 0.1289454698562622, "learning_rate": 1e-06, "loss": -0.0421, "step": 1400 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1160714285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3265.0, "completions/mean_length": 977.9085083007812, "completions/mean_terminated_length": 568.4620971679688, "completions/min_length": 100.0, "completions/min_terminated_length": 100.0, "epoch": 3.272965879265092, "grad_norm": 0.33727511763572693, "learning_rate": 1e-06, "loss": -0.0332, "num_tokens": 203551788.0, "reward": 0.5412946939468384, "reward_std": 0.15883009135723114, "rewards/verify_math_reward/mean": 0.5412946343421936, "rewards/verify_math_reward/std": 0.49857014417648315, "step": 1401 }, { "clip_ratio/high_max": 0.0030714334207004867, "clip_ratio/high_mean": 0.001175930194222019, "clip_ratio/low_mean": 0.0009320073804701678, "clip_ratio/low_min": 2.439976560708601e-05, "clip_ratio/region_mean": 0.00210793756559724, "epoch": 3.275298920968212, "grad_norm": 0.20726364850997925, "learning_rate": 1e-06, "loss": -0.0333, "step": 1402 }, { "clip_ratio/high_max": 0.0037807005282957107, "clip_ratio/high_mean": 0.001510149275418371, "clip_ratio/low_mean": 0.0014118088583927602, "clip_ratio/low_min": 4.9115915317088366e-05, "clip_ratio/region_mean": 0.002921958133811131, "epoch": 3.277631962671333, "grad_norm": 0.15066851675510406, "learning_rate": 1e-06, "loss": -0.0337, "step": 1403 }, { "clip_ratio/high_max": 0.004522729192103725, "clip_ratio/high_mean": 0.0018541261706559453, "clip_ratio/low_mean": 0.001956508265720913, "clip_ratio/low_min": 6.139489414636046e-05, "clip_ratio/region_mean": 0.003810634429100901, "epoch": 3.279965004374453, "grad_norm": 0.11625725775957108, "learning_rate": 1e-06, "loss": -0.0339, "step": 1404 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1171875, "completions/max_length": 4096.0, "completions/max_terminated_length": 3345.0, "completions/mean_length": 1078.7734375, "completions/mean_terminated_length": 678.2566528320312, "completions/min_length": 149.0, "completions/min_terminated_length": 149.0, "epoch": 3.2822980460775737, "grad_norm": 0.3054969906806946, "learning_rate": 1e-06, "loss": -0.0624, "num_tokens": 204182937.0, "reward": 0.5491071939468384, "reward_std": 0.17946292459964752, "rewards/verify_math_reward/mean": 0.5491071343421936, "rewards/verify_math_reward/std": 0.49786055088043213, "step": 1405 }, { "clip_ratio/high_max": 0.002638317171658855, "clip_ratio/high_mean": 0.001206036258736276, "clip_ratio/low_mean": 0.0009100254465010948, "clip_ratio/low_min": 2.77654380624881e-05, "clip_ratio/region_mean": 0.002116061652486678, "epoch": 3.284631087780694, "grad_norm": 0.19915546476840973, "learning_rate": 1e-06, "loss": -0.0624, "step": 1406 }, { "clip_ratio/high_max": 0.0035082350004813634, "clip_ratio/high_mean": 0.0015448228623426985, "clip_ratio/low_mean": 0.001241959209437482, "clip_ratio/low_min": 1.0352824574511033e-05, "clip_ratio/region_mean": 0.0027867820390383713, "epoch": 3.2869641294838146, "grad_norm": 0.15233661234378815, "learning_rate": 1e-06, "loss": -0.0627, "step": 1407 }, { "clip_ratio/high_max": 0.0044437361575546674, "clip_ratio/high_mean": 0.0019676476549648214, "clip_ratio/low_mean": 0.0017435058325645514, "clip_ratio/low_min": 2.77654380624881e-05, "clip_ratio/region_mean": 0.003711153505719267, "epoch": 3.289297171186935, "grad_norm": 0.11581484973430634, "learning_rate": 1e-06, "loss": -0.063, "step": 1408 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0926339285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 2599.0, "completions/mean_length": 906.0379638671875, "completions/mean_terminated_length": 580.3714599609375, "completions/min_length": 119.0, "completions/min_terminated_length": 119.0, "epoch": 3.2916302128900554, "grad_norm": 0.39846235513687134, "learning_rate": 1e-06, "loss": -0.0344, "num_tokens": 204748299.0, "reward": 0.6160714626312256, "reward_std": 0.1782187521457672, "rewards/verify_math_reward/mean": 0.6160714030265808, "rewards/verify_math_reward/std": 0.486612468957901, "step": 1409 }, { "clip_ratio/high_max": 0.004705637802544516, "clip_ratio/high_mean": 0.0016721362590033095, "clip_ratio/low_mean": 0.0012411901989253238, "clip_ratio/low_min": 7.211294177977834e-05, "clip_ratio/region_mean": 0.002913326519774273, "epoch": 3.2939632545931756, "grad_norm": 0.2337198257446289, "learning_rate": 1e-06, "loss": -0.0345, "step": 1410 }, { "clip_ratio/high_max": 0.00500419270247221, "clip_ratio/high_mean": 0.0019764383614528924, "clip_ratio/low_mean": 0.0019082672806689516, "clip_ratio/low_min": 7.465260023309384e-05, "clip_ratio/region_mean": 0.003884705700329505, "epoch": 3.2962962962962963, "grad_norm": 0.17529748380184174, "learning_rate": 1e-06, "loss": -0.0349, "step": 1411 }, { "clip_ratio/high_max": 0.006356454556225799, "clip_ratio/high_mean": 0.0025016561412485316, "clip_ratio/low_mean": 0.0024054414170677774, "clip_ratio/low_min": 0.0001521948324807454, "clip_ratio/region_mean": 0.004907097551040351, "epoch": 3.298629337999417, "grad_norm": 0.1319684535264969, "learning_rate": 1e-06, "loss": -0.0351, "step": 1412 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1216517857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 2732.0, "completions/mean_length": 1031.6953125, "completions/mean_terminated_length": 607.2871704101562, "completions/min_length": 155.0, "completions/min_terminated_length": 155.0, "epoch": 3.300962379702537, "grad_norm": 0.35005077719688416, "learning_rate": 1e-06, "loss": -0.0306, "num_tokens": 205317154.0, "reward": 0.5301339626312256, "reward_std": 0.1779993623495102, "rewards/verify_math_reward/mean": 0.5301339030265808, "rewards/verify_math_reward/std": 0.49936985969543457, "step": 1413 }, { "clip_ratio/high_max": 0.003467716771410778, "clip_ratio/high_mean": 0.0011956593916693237, "clip_ratio/low_mean": 0.001207520939715323, "clip_ratio/low_min": 2.8383288736222312e-05, "clip_ratio/region_mean": 0.002403180376859382, "epoch": 3.303295421405658, "grad_norm": 0.23282280564308167, "learning_rate": 1e-06, "loss": -0.0307, "step": 1414 }, { "clip_ratio/high_max": 0.004292180441552773, "clip_ratio/high_mean": 0.0016027106285037007, "clip_ratio/low_mean": 0.001779931004421087, "clip_ratio/low_min": 5.6766577472444624e-05, "clip_ratio/region_mean": 0.0033826415601652116, "epoch": 3.305628463108778, "grad_norm": 0.1666043996810913, "learning_rate": 1e-06, "loss": -0.0311, "step": 1415 }, { "clip_ratio/high_max": 0.00520891638007015, "clip_ratio/high_mean": 0.002007245784625411, "clip_ratio/low_mean": 0.0023729883359919768, "clip_ratio/low_min": 4.2574931285344064e-05, "clip_ratio/region_mean": 0.00438023399328813, "epoch": 3.3079615048118987, "grad_norm": 0.12847065925598145, "learning_rate": 1e-06, "loss": -0.0314, "step": 1416 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0546875, "completions/max_length": 4096.0, "completions/max_terminated_length": 2898.0, "completions/mean_length": 726.8750610351562, "completions/mean_terminated_length": 531.9669189453125, "completions/min_length": 156.0, "completions/min_terminated_length": 156.0, "epoch": 3.310294546515019, "grad_norm": 0.34665900468826294, "learning_rate": 1e-06, "loss": -0.0311, "num_tokens": 205845098.0, "reward": 0.6830357313156128, "reward_std": 0.14669214189052582, "rewards/verify_math_reward/mean": 0.6830357313156128, "rewards/verify_math_reward/std": 0.46555325388908386, "step": 1417 }, { "clip_ratio/high_max": 0.0033391896067769267, "clip_ratio/high_mean": 0.0012624697665160056, "clip_ratio/low_mean": 0.0009894743725453736, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002251944111776538, "epoch": 3.3126275882181395, "grad_norm": 0.35060036182403564, "learning_rate": 1e-06, "loss": -0.0313, "step": 1418 }, { "clip_ratio/high_max": 0.004154072317760438, "clip_ratio/high_mean": 0.0016473334981128573, "clip_ratio/low_mean": 0.0014225789200281724, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0030699124035891145, "epoch": 3.3149606299212597, "grad_norm": 0.15302026271820068, "learning_rate": 1e-06, "loss": -0.0315, "step": 1419 }, { "clip_ratio/high_max": 0.005012642403016798, "clip_ratio/high_mean": 0.002029568400757853, "clip_ratio/low_mean": 0.00190137911340571, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.003930947510525584, "epoch": 3.3172936716243804, "grad_norm": 0.11411917209625244, "learning_rate": 1e-06, "loss": -0.0318, "step": 1420 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0758928571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3126.0, "completions/mean_length": 866.458740234375, "completions/mean_terminated_length": 601.2306518554688, "completions/min_length": 121.0, "completions/min_terminated_length": 121.0, "epoch": 3.3196267133275006, "grad_norm": 0.3669516146183014, "learning_rate": 1e-06, "loss": -0.0305, "num_tokens": 206433677.0, "reward": 0.6417410969734192, "reward_std": 0.18799060583114624, "rewards/verify_math_reward/mean": 0.6417410969734192, "rewards/verify_math_reward/std": 0.47975659370422363, "step": 1421 }, { "clip_ratio/high_max": 0.0038432006767834537, "clip_ratio/high_mean": 0.0015284072942449711, "clip_ratio/low_mean": 0.0013510972858057357, "clip_ratio/low_min": 3.9275240851566195e-05, "clip_ratio/region_mean": 0.0028795045873266645, "epoch": 3.3219597550306212, "grad_norm": 0.41116538643836975, "learning_rate": 1e-06, "loss": -0.0306, "step": 1422 }, { "clip_ratio/high_max": 0.004401103236887138, "clip_ratio/high_mean": 0.0018015752393694129, "clip_ratio/low_mean": 0.00215124577152892, "clip_ratio/low_min": 6.545873475261033e-05, "clip_ratio/region_mean": 0.003952821003622375, "epoch": 3.3242927967337415, "grad_norm": 0.18180954456329346, "learning_rate": 1e-06, "loss": -0.0309, "step": 1423 }, { "clip_ratio/high_max": 0.0051908153254771605, "clip_ratio/high_mean": 0.0021664574596798047, "clip_ratio/low_mean": 0.0027612549893092364, "clip_ratio/low_min": 7.855048170313239e-05, "clip_ratio/region_mean": 0.0049277124053332955, "epoch": 3.326625838436862, "grad_norm": 0.14316900074481964, "learning_rate": 1e-06, "loss": -0.0312, "step": 1424 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0993303571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3156.0, "completions/mean_length": 991.5770263671875, "completions/mean_terminated_length": 649.2056884765625, "completions/min_length": 121.0, "completions/min_terminated_length": 121.0, "epoch": 3.3289588801399823, "grad_norm": 0.2947278320789337, "learning_rate": 1e-06, "loss": -0.0282, "num_tokens": 207054690.0, "reward": 0.5602678656578064, "reward_std": 0.15797244012355804, "rewards/verify_math_reward/mean": 0.5602678656578064, "rewards/verify_math_reward/std": 0.4966317415237427, "step": 1425 }, { "clip_ratio/high_max": 0.0026212315351585858, "clip_ratio/high_mean": 0.001033810613080277, "clip_ratio/low_mean": 0.0008613563650214928, "clip_ratio/low_min": 6.58662902424112e-05, "clip_ratio/region_mean": 0.001895166980830254, "epoch": 3.331291921843103, "grad_norm": 0.2175280600786209, "learning_rate": 1e-06, "loss": -0.0283, "step": 1426 }, { "clip_ratio/high_max": 0.0035361613336135633, "clip_ratio/high_mean": 0.001332651039774646, "clip_ratio/low_mean": 0.001323376432992518, "clip_ratio/low_min": 0.00012104942288715392, "clip_ratio/region_mean": 0.0026560274709481746, "epoch": 3.3336249635462236, "grad_norm": 0.1329278200864792, "learning_rate": 1e-06, "loss": -0.0286, "step": 1427 }, { "clip_ratio/high_max": 0.0041221589053748176, "clip_ratio/high_mean": 0.0016122859651659383, "clip_ratio/low_mean": 0.0017155832301796181, "clip_ratio/low_min": 0.00013530834621633403, "clip_ratio/region_mean": 0.0033278690752922557, "epoch": 3.335958005249344, "grad_norm": 0.10869111120700836, "learning_rate": 1e-06, "loss": -0.0288, "step": 1428 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1060267857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 2632.0, "completions/mean_length": 941.65185546875, "completions/mean_terminated_length": 567.5405883789062, "completions/min_length": 140.0, "completions/min_terminated_length": 140.0, "epoch": 3.338291046952464, "grad_norm": 0.35883671045303345, "learning_rate": 1e-06, "loss": -0.0472, "num_tokens": 207593058.0, "reward": 0.6194196939468384, "reward_std": 0.16134923696517944, "rewards/verify_math_reward/mean": 0.6194196343421936, "rewards/verify_math_reward/std": 0.48580074310302734, "step": 1429 }, { "clip_ratio/high_max": 0.003240909645683132, "clip_ratio/high_mean": 0.0011943110475840513, "clip_ratio/low_mean": 0.000959783916187007, "clip_ratio/low_min": 3.7639263609889895e-05, "clip_ratio/region_mean": 0.0021540949819609523, "epoch": 3.3406240886555847, "grad_norm": 0.20617221295833588, "learning_rate": 1e-06, "loss": -0.0474, "step": 1430 }, { "clip_ratio/high_max": 0.004158303876465652, "clip_ratio/high_mean": 0.0014842696691630408, "clip_ratio/low_mean": 0.0014071289915591478, "clip_ratio/low_min": 1.8819631804944947e-05, "clip_ratio/region_mean": 0.0028913986971019767, "epoch": 3.3429571303587053, "grad_norm": 0.1464354544878006, "learning_rate": 1e-06, "loss": -0.0477, "step": 1431 }, { "clip_ratio/high_max": 0.004969096029526554, "clip_ratio/high_mean": 0.0018096721432812046, "clip_ratio/low_mean": 0.0019547070041880943, "clip_ratio/low_min": 6.583567301277071e-05, "clip_ratio/region_mean": 0.0037643791001755744, "epoch": 3.3452901720618256, "grad_norm": 0.11336461454629898, "learning_rate": 1e-06, "loss": -0.048, "step": 1432 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1227678571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3858.0, "completions/mean_length": 1048.1529541015625, "completions/mean_terminated_length": 621.6094360351562, "completions/min_length": 128.0, "completions/min_terminated_length": 128.0, "epoch": 3.347623213764946, "grad_norm": 0.33480337262153625, "learning_rate": 1e-06, "loss": -0.0497, "num_tokens": 208176363.0, "reward": 0.5189732313156128, "reward_std": 0.16078399121761322, "rewards/verify_math_reward/mean": 0.5189732313156128, "rewards/verify_math_reward/std": 0.49991893768310547, "step": 1433 }, { "clip_ratio/high_max": 0.003671175909403246, "clip_ratio/high_mean": 0.0010798605380841764, "clip_ratio/low_mean": 0.0009517449743725592, "clip_ratio/low_min": 4.0320494008483365e-05, "clip_ratio/region_mean": 0.0020316055379225872, "epoch": 3.3499562554680664, "grad_norm": 0.2044934779405594, "learning_rate": 1e-06, "loss": -0.0498, "step": 1434 }, { "clip_ratio/high_max": 0.0044436264943215065, "clip_ratio/high_mean": 0.0014202195779944304, "clip_ratio/low_mean": 0.0014911838243278908, "clip_ratio/low_min": 6.198427217896096e-05, "clip_ratio/region_mean": 0.00291140337503748, "epoch": 3.352289297171187, "grad_norm": 0.13736048340797424, "learning_rate": 1e-06, "loss": -0.0502, "step": 1435 }, { "clip_ratio/high_max": 0.005500689563632477, "clip_ratio/high_mean": 0.0016777292112237774, "clip_ratio/low_mean": 0.0019432822227827273, "clip_ratio/low_min": 0.00010230476618744433, "clip_ratio/region_mean": 0.003621011484938208, "epoch": 3.3546223388743073, "grad_norm": 0.11899946630001068, "learning_rate": 1e-06, "loss": -0.0503, "step": 1436 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0714285714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3014.0, "completions/mean_length": 819.3582763671875, "completions/mean_terminated_length": 567.3088989257812, "completions/min_length": 149.0, "completions/min_terminated_length": 149.0, "epoch": 3.356955380577428, "grad_norm": 0.2872629463672638, "learning_rate": 1e-06, "loss": -0.002, "num_tokens": 208739276.0, "reward": 0.6238839626312256, "reward_std": 0.12253601104021072, "rewards/verify_math_reward/mean": 0.6238839030265808, "rewards/verify_math_reward/std": 0.4846802353858948, "step": 1437 }, { "clip_ratio/high_max": 0.00247236941504525, "clip_ratio/high_mean": 0.0009216337421094067, "clip_ratio/low_mean": 0.0008784023339103442, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001800036094209645, "epoch": 3.359288422280548, "grad_norm": 0.27788272500038147, "learning_rate": 1e-06, "loss": -0.002, "step": 1438 }, { "clip_ratio/high_max": 0.0032082588295452297, "clip_ratio/high_mean": 0.001179328879516106, "clip_ratio/low_mean": 0.0013297901678015478, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0025091190327657387, "epoch": 3.361621463983669, "grad_norm": 0.13105103373527527, "learning_rate": 1e-06, "loss": -0.0023, "step": 1439 }, { "clip_ratio/high_max": 0.0037512202106881887, "clip_ratio/high_mean": 0.0013395827591011766, "clip_ratio/low_mean": 0.0017740178082021885, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0031136005782173015, "epoch": 3.363954505686789, "grad_norm": 0.09995643049478531, "learning_rate": 1e-06, "loss": -0.0024, "step": 1440 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1104910714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3004.0, "completions/mean_length": 942.8739013671875, "completions/mean_terminated_length": 551.2057495117188, "completions/min_length": 131.0, "completions/min_terminated_length": 131.0, "epoch": 3.3662875473899097, "grad_norm": 0.3165186941623688, "learning_rate": 1e-06, "loss": -0.048, "num_tokens": 209279307.0, "reward": 0.6495535969734192, "reward_std": 0.17400500178337097, "rewards/verify_math_reward/mean": 0.6495535969734192, "rewards/verify_math_reward/std": 0.47737622261047363, "step": 1441 }, { "clip_ratio/high_max": 0.003067974110308569, "clip_ratio/high_mean": 0.0013276108365971595, "clip_ratio/low_mean": 0.0009673998683865648, "clip_ratio/low_min": 4.8423840780742466e-05, "clip_ratio/region_mean": 0.0022950106867938302, "epoch": 3.36862058909303, "grad_norm": 0.20929716527462006, "learning_rate": 1e-06, "loss": -0.048, "step": 1442 }, { "clip_ratio/high_max": 0.004002856090664864, "clip_ratio/high_mean": 0.0016889465259737335, "clip_ratio/low_mean": 0.0014540184674842749, "clip_ratio/low_min": 5.6253239563375246e-05, "clip_ratio/region_mean": 0.0031429650116479024, "epoch": 3.3709536307961505, "grad_norm": 0.1491130143404007, "learning_rate": 1e-06, "loss": -0.0484, "step": 1443 }, { "clip_ratio/high_max": 0.004669924601330422, "clip_ratio/high_mean": 0.0020170616699033417, "clip_ratio/low_mean": 0.0019806952150247525, "clip_ratio/low_min": 0.0001367331615256262, "clip_ratio/region_mean": 0.0039977568667382, "epoch": 3.3732866724992707, "grad_norm": 0.12539853155612946, "learning_rate": 1e-06, "loss": -0.0486, "step": 1444 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0792410714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 2892.0, "completions/mean_length": 806.6842041015625, "completions/mean_terminated_length": 523.6036376953125, "completions/min_length": 166.0, "completions/min_terminated_length": 166.0, "epoch": 3.3756197142023914, "grad_norm": 0.35378989577293396, "learning_rate": 1e-06, "loss": -0.048, "num_tokens": 209797432.0, "reward": 0.684151828289032, "reward_std": 0.16773684322834015, "rewards/verify_math_reward/mean": 0.6841517686843872, "rewards/verify_math_reward/std": 0.46511244773864746, "step": 1445 }, { "clip_ratio/high_max": 0.0034761690913001075, "clip_ratio/high_mean": 0.0012960876083525363, "clip_ratio/low_mean": 0.0009990680337068625, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002295155674801208, "epoch": 3.377952755905512, "grad_norm": 0.22234691679477692, "learning_rate": 1e-06, "loss": -0.0481, "step": 1446 }, { "clip_ratio/high_max": 0.00445524750102777, "clip_ratio/high_mean": 0.00169015923893312, "clip_ratio/low_mean": 0.0015749493759358302, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0032651086658006534, "epoch": 3.3802857976086322, "grad_norm": 0.15491150319576263, "learning_rate": 1e-06, "loss": -0.0485, "step": 1447 }, { "clip_ratio/high_max": 0.005324508863850497, "clip_ratio/high_mean": 0.002126934668922331, "clip_ratio/low_mean": 0.0021582389672403224, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0042851736943703145, "epoch": 3.382618839311753, "grad_norm": 0.11977016925811768, "learning_rate": 1e-06, "loss": -0.0487, "step": 1448 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0669642857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3653.0, "completions/mean_length": 846.700927734375, "completions/mean_terminated_length": 613.49755859375, "completions/min_length": 106.0, "completions/min_terminated_length": 106.0, "epoch": 3.384951881014873, "grad_norm": 0.32891198992729187, "learning_rate": 1e-06, "loss": -0.0311, "num_tokens": 210400412.0, "reward": 0.5636160969734192, "reward_std": 0.16596952080726624, "rewards/verify_math_reward/mean": 0.5636160969734192, "rewards/verify_math_reward/std": 0.49621346592903137, "step": 1449 }, { "clip_ratio/high_max": 0.003074590265896404, "clip_ratio/high_mean": 0.001189088406135852, "clip_ratio/low_mean": 0.001008239947623224, "clip_ratio/low_min": 5.537098695640452e-05, "clip_ratio/region_mean": 0.002197328365582507, "epoch": 3.3872849227179938, "grad_norm": 0.38472697138786316, "learning_rate": 1e-06, "loss": -0.0312, "step": 1450 }, { "clip_ratio/high_max": 0.004076700861332938, "clip_ratio/high_mean": 0.0015201837304630317, "clip_ratio/low_mean": 0.0015714692672190722, "clip_ratio/low_min": 8.305647497763857e-05, "clip_ratio/region_mean": 0.0030916530013200827, "epoch": 3.389617964421114, "grad_norm": 0.1505495011806488, "learning_rate": 1e-06, "loss": -0.0315, "step": 1451 }, { "clip_ratio/high_max": 0.004655471590012894, "clip_ratio/high_mean": 0.0017973316512325255, "clip_ratio/low_mean": 0.0020679986519098748, "clip_ratio/low_min": 0.00012458472338039428, "clip_ratio/region_mean": 0.003865330247208476, "epoch": 3.3919510061242346, "grad_norm": 0.11179495602846146, "learning_rate": 1e-06, "loss": -0.0318, "step": 1452 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0982142857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3964.0, "completions/mean_length": 912.8750610351562, "completions/mean_terminated_length": 566.197998046875, "completions/min_length": 155.0, "completions/min_terminated_length": 155.0, "epoch": 3.394284047827355, "grad_norm": 0.33668947219848633, "learning_rate": 1e-06, "loss": -0.0276, "num_tokens": 210955748.0, "reward": 0.5558035969734192, "reward_std": 0.1551848202943802, "rewards/verify_math_reward/mean": 0.5558035969734192, "rewards/verify_math_reward/std": 0.49715372920036316, "step": 1453 }, { "clip_ratio/high_max": 0.003294939153420273, "clip_ratio/high_mean": 0.0011015737400157377, "clip_ratio/low_mean": 0.0013778659667877946, "clip_ratio/low_min": 1.4667918549093883e-05, "clip_ratio/region_mean": 0.0024794397249934264, "epoch": 3.3966170895304755, "grad_norm": 0.2266514003276825, "learning_rate": 1e-06, "loss": -0.0277, "step": 1454 }, { "clip_ratio/high_max": 0.003977893458795734, "clip_ratio/high_mean": 0.0013247321658127476, "clip_ratio/low_mean": 0.0019285795206087641, "clip_ratio/low_min": 2.9335837098187767e-05, "clip_ratio/region_mean": 0.0032533117046114057, "epoch": 3.3989501312335957, "grad_norm": 0.2405986785888672, "learning_rate": 1e-06, "loss": -0.028, "step": 1455 }, { "clip_ratio/high_max": 0.004819168447284028, "clip_ratio/high_mean": 0.0016370704397559166, "clip_ratio/low_mean": 0.0025394363765371963, "clip_ratio/low_min": 5.8671674196375534e-05, "clip_ratio/region_mean": 0.004176506903604604, "epoch": 3.4012831729367163, "grad_norm": 0.12424448877573013, "learning_rate": 1e-06, "loss": -0.0283, "step": 1456 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0803571428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3911.0, "completions/mean_length": 896.3895263671875, "completions/mean_terminated_length": 616.8118896484375, "completions/min_length": 121.0, "completions/min_terminated_length": 121.0, "epoch": 3.4036162146398365, "grad_norm": 0.3258913457393646, "learning_rate": 1e-06, "loss": -0.0208, "num_tokens": 211548961.0, "reward": 0.6104910969734192, "reward_std": 0.1498834788799286, "rewards/verify_math_reward/mean": 0.6104910969734192, "rewards/verify_math_reward/std": 0.48791125416755676, "step": 1457 }, { "clip_ratio/high_max": 0.0029209689964773133, "clip_ratio/high_mean": 0.0011272691172052873, "clip_ratio/low_mean": 0.0009223267925335676, "clip_ratio/low_min": 4.8449612222611904e-05, "clip_ratio/region_mean": 0.002049595907010371, "epoch": 3.405949256342957, "grad_norm": 0.22020293772220612, "learning_rate": 1e-06, "loss": -0.0209, "step": 1458 }, { "clip_ratio/high_max": 0.003634042768680956, "clip_ratio/high_mean": 0.0014793877708143555, "clip_ratio/low_mean": 0.0013879891030228464, "clip_ratio/low_min": 5.683884955942631e-05, "clip_ratio/region_mean": 0.002867376824724488, "epoch": 3.4082822980460774, "grad_norm": 0.16483503580093384, "learning_rate": 1e-06, "loss": -0.0212, "step": 1459 }, { "clip_ratio/high_max": 0.004426344617968425, "clip_ratio/high_mean": 0.0017894255433930084, "clip_ratio/low_mean": 0.0017640381847741082, "clip_ratio/low_min": 9.094215783989057e-05, "clip_ratio/region_mean": 0.0035534638082026504, "epoch": 3.410615339749198, "grad_norm": 0.11736679077148438, "learning_rate": 1e-06, "loss": -0.0213, "step": 1460 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0770089285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3782.0, "completions/mean_length": 897.6641235351562, "completions/mean_terminated_length": 630.8137817382812, "completions/min_length": 161.0, "completions/min_terminated_length": 161.0, "epoch": 3.4129483814523183, "grad_norm": 0.370855450630188, "learning_rate": 1e-06, "loss": -0.0295, "num_tokens": 212172644.0, "reward": 0.5066964626312256, "reward_std": 0.19310013949871063, "rewards/verify_math_reward/mean": 0.5066964030265808, "rewards/verify_math_reward/std": 0.5002344250679016, "step": 1461 }, { "clip_ratio/high_max": 0.0035387179304962046, "clip_ratio/high_mean": 0.0013630581543111475, "clip_ratio/low_mean": 0.0013434040738502517, "clip_ratio/low_min": 7.795067176630255e-05, "clip_ratio/region_mean": 0.0027064621172030456, "epoch": 3.415281423155439, "grad_norm": 1.0494226217269897, "learning_rate": 1e-06, "loss": -0.0292, "step": 1462 }, { "clip_ratio/high_max": 0.0037899578383076005, "clip_ratio/high_mean": 0.0015883591840974987, "clip_ratio/low_mean": 0.002000364926061593, "clip_ratio/low_min": 0.00012135546967328992, "clip_ratio/region_mean": 0.0035887241683667526, "epoch": 3.417614464858559, "grad_norm": 0.28826481103897095, "learning_rate": 1e-06, "loss": -0.0297, "step": 1463 }, { "clip_ratio/high_max": 0.004639524100639392, "clip_ratio/high_mean": 0.0019227886987209786, "clip_ratio/low_mean": 0.002635526267113164, "clip_ratio/low_min": 0.0001897072452266002, "clip_ratio/region_mean": 0.004558314962196164, "epoch": 3.41994750656168, "grad_norm": 0.18610695004463196, "learning_rate": 1e-06, "loss": -0.0299, "step": 1464 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0848214285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3481.0, "completions/mean_length": 910.2891235351562, "completions/mean_terminated_length": 615.028076171875, "completions/min_length": 143.0, "completions/min_terminated_length": 143.0, "epoch": 3.4222805482648004, "grad_norm": 0.3070477247238159, "learning_rate": 1e-06, "loss": -0.0328, "num_tokens": 212775175.0, "reward": 0.5703125, "reward_std": 0.14184364676475525, "rewards/verify_math_reward/mean": 0.5703125, "rewards/verify_math_reward/std": 0.49530795216560364, "step": 1465 }, { "clip_ratio/high_max": 0.0033100838845712133, "clip_ratio/high_mean": 0.0010978324917232385, "clip_ratio/low_mean": 0.0008103427971946076, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001908175203425344, "epoch": 3.4246135899679206, "grad_norm": 0.21276326477527618, "learning_rate": 1e-06, "loss": -0.0328, "step": 1466 }, { "clip_ratio/high_max": 0.004178324874374084, "clip_ratio/high_mean": 0.0013756191401625983, "clip_ratio/low_mean": 0.0012445839838619577, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0026202031294815242, "epoch": 3.4269466316710413, "grad_norm": 0.14450976252555847, "learning_rate": 1e-06, "loss": -0.0331, "step": 1467 }, { "clip_ratio/high_max": 0.005303017082042061, "clip_ratio/high_mean": 0.0017012455136864446, "clip_ratio/low_mean": 0.0016832017463457305, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.003384447278222069, "epoch": 3.4292796733741615, "grad_norm": 0.1098371222615242, "learning_rate": 1e-06, "loss": -0.0333, "step": 1468 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0502232142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3808.0, "completions/mean_length": 734.9442138671875, "completions/mean_terminated_length": 557.2150268554688, "completions/min_length": 133.0, "completions/min_terminated_length": 133.0, "epoch": 3.431612715077282, "grad_norm": 0.32140618562698364, "learning_rate": 1e-06, "loss": -0.02, "num_tokens": 213333445.0, "reward": 0.6618303656578064, "reward_std": 0.1396312266588211, "rewards/verify_math_reward/mean": 0.6618303656578064, "rewards/verify_math_reward/std": 0.4733508229255676, "step": 1469 }, { "clip_ratio/high_max": 0.0031718998397991527, "clip_ratio/high_mean": 0.001155943376943469, "clip_ratio/low_mean": 0.0009701974013296422, "clip_ratio/low_min": 3.637143163359724e-05, "clip_ratio/region_mean": 0.0021261407964630052, "epoch": 3.4339457567804024, "grad_norm": 0.19846093654632568, "learning_rate": 1e-06, "loss": -0.0201, "step": 1470 }, { "clip_ratio/high_max": 0.004122487240238115, "clip_ratio/high_mean": 0.0014899682864779606, "clip_ratio/low_mean": 0.0013824990892317146, "clip_ratio/low_min": 5.624045297736302e-05, "clip_ratio/region_mean": 0.0028724673684337176, "epoch": 3.436278798483523, "grad_norm": 0.1433844119310379, "learning_rate": 1e-06, "loss": -0.0204, "step": 1471 }, { "clip_ratio/high_max": 0.00484548466920387, "clip_ratio/high_mean": 0.0018221103309770115, "clip_ratio/low_mean": 0.0017921788894454949, "clip_ratio/low_min": 9.184432929032482e-05, "clip_ratio/region_mean": 0.0036142892204225063, "epoch": 3.4386118401866432, "grad_norm": 0.11969095468521118, "learning_rate": 1e-06, "loss": -0.0206, "step": 1472 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3397.0, "completions/mean_length": 954.9967041015625, "completions/mean_terminated_length": 599.9266967773438, "completions/min_length": 162.0, "completions/min_terminated_length": 162.0, "epoch": 3.440944881889764, "grad_norm": 0.328739196062088, "learning_rate": 1e-06, "loss": -0.0438, "num_tokens": 213916738.0, "reward": 0.5680803656578064, "reward_std": 0.15575185418128967, "rewards/verify_math_reward/mean": 0.5680803656578064, "rewards/verify_math_reward/std": 0.4956200420856476, "step": 1473 }, { "clip_ratio/high_max": 0.003649689184385352, "clip_ratio/high_mean": 0.0012720645318040624, "clip_ratio/low_mean": 0.001012951739539858, "clip_ratio/low_min": 5.3410303735290654e-05, "clip_ratio/region_mean": 0.0022850162931717932, "epoch": 3.443277923592884, "grad_norm": 273.7433776855469, "learning_rate": 1e-06, "loss": -0.0261, "step": 1474 }, { "clip_ratio/high_max": 0.004247971621225588, "clip_ratio/high_mean": 0.0015598472382407635, "clip_ratio/low_mean": 0.0013763205915893195, "clip_ratio/low_min": 0.00011065370927099138, "clip_ratio/region_mean": 0.0029361678170971572, "epoch": 3.4456109652960047, "grad_norm": 75374.2578125, "learning_rate": 1e-06, "loss": 14.544, "step": 1475 }, { "clip_ratio/high_max": 0.0047157242661342025, "clip_ratio/high_mean": 0.001658662633417407, "clip_ratio/low_mean": 0.0016832537912705448, "clip_ratio/low_min": 0.00011028812514268793, "clip_ratio/region_mean": 0.00334191630827263, "epoch": 3.447944006999125, "grad_norm": 80.58850860595703, "learning_rate": 1e-06, "loss": -0.0227, "step": 1476 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1060267857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 2360.0, "completions/mean_length": 956.6574096679688, "completions/mean_terminated_length": 584.3258056640625, "completions/min_length": 173.0, "completions/min_terminated_length": 173.0, "epoch": 3.4502770487022456, "grad_norm": 0.3226136267185211, "learning_rate": 1e-06, "loss": -0.0615, "num_tokens": 214470647.0, "reward": 0.5803571939468384, "reward_std": 0.16597019135951996, "rewards/verify_math_reward/mean": 0.5803571343421936, "rewards/verify_math_reward/std": 0.4937761425971985, "step": 1477 }, { "clip_ratio/high_max": 0.0038060480801505037, "clip_ratio/high_mean": 0.0015814549678907497, "clip_ratio/low_mean": 0.0007737272808299167, "clip_ratio/low_min": 4.3192812881898135e-05, "clip_ratio/region_mean": 0.002355182255996624, "epoch": 3.452610090405366, "grad_norm": 0.19761905074119568, "learning_rate": 1e-06, "loss": -0.0616, "step": 1478 }, { "clip_ratio/high_max": 0.004704038110503461, "clip_ratio/high_mean": 0.0019194007472833619, "clip_ratio/low_mean": 0.0011096977868874092, "clip_ratio/low_min": 4.544352850643918e-05, "clip_ratio/region_mean": 0.00302909853780875, "epoch": 3.4549431321084865, "grad_norm": 0.152030810713768, "learning_rate": 1e-06, "loss": -0.0619, "step": 1479 }, { "clip_ratio/high_max": 0.005573749367613345, "clip_ratio/high_mean": 0.0023625228168384638, "clip_ratio/low_mean": 0.0014885508644510992, "clip_ratio/low_min": 6.47892156848684e-05, "clip_ratio/region_mean": 0.0038510737067554146, "epoch": 3.457276173811607, "grad_norm": 0.11391009390354156, "learning_rate": 1e-06, "loss": -0.0621, "step": 1480 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0859375, "completions/max_length": 4096.0, "completions/max_terminated_length": 2768.0, "completions/mean_length": 836.1038208007812, "completions/mean_terminated_length": 529.6178588867188, "completions/min_length": 138.0, "completions/min_terminated_length": 138.0, "epoch": 3.4596092155147273, "grad_norm": 0.4045262336730957, "learning_rate": 1e-06, "loss": -0.0546, "num_tokens": 214989612.0, "reward": 0.6339285969734192, "reward_std": 0.15744180977344513, "rewards/verify_math_reward/mean": 0.6339285969734192, "rewards/verify_math_reward/std": 0.48199838399887085, "step": 1481 }, { "clip_ratio/high_max": 0.0031986507710826118, "clip_ratio/high_mean": 0.0012433804058673559, "clip_ratio/low_mean": 0.000930545242226799, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002173925638999208, "epoch": 3.4619422572178475, "grad_norm": 0.22097234427928925, "learning_rate": 1e-06, "loss": -0.0547, "step": 1482 }, { "clip_ratio/high_max": 0.004338504841143731, "clip_ratio/high_mean": 0.0017156364483525977, "clip_ratio/low_mean": 0.0013897570697736228, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.003105393498117337, "epoch": 3.464275298920968, "grad_norm": 0.15404903888702393, "learning_rate": 1e-06, "loss": -0.055, "step": 1483 }, { "clip_ratio/high_max": 0.005220797283982392, "clip_ratio/high_mean": 0.0020994475853512995, "clip_ratio/low_mean": 0.0017317217225354398, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0038311692951538134, "epoch": 3.466608340624089, "grad_norm": 0.11643924564123154, "learning_rate": 1e-06, "loss": -0.0552, "step": 1484 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0926339285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3785.0, "completions/mean_length": 941.5614013671875, "completions/mean_terminated_length": 619.521484375, "completions/min_length": 120.0, "completions/min_terminated_length": 120.0, "epoch": 3.468941382327209, "grad_norm": 0.3651109039783478, "learning_rate": 1e-06, "loss": -0.0539, "num_tokens": 215589275.0, "reward": 0.590401828289032, "reward_std": 0.219545379281044, "rewards/verify_math_reward/mean": 0.5904017686843872, "rewards/verify_math_reward/std": 0.49203425645828247, "step": 1485 }, { "clip_ratio/high_max": 0.003997388339485042, "clip_ratio/high_mean": 0.0016932856124185491, "clip_ratio/low_mean": 0.001410522760124877, "clip_ratio/low_min": 0.00011898059619852575, "clip_ratio/region_mean": 0.003103808434389066, "epoch": 3.4712744240303297, "grad_norm": 0.29079344868659973, "learning_rate": 1e-06, "loss": -0.0539, "step": 1486 }, { "clip_ratio/high_max": 0.005061510557425208, "clip_ratio/high_mean": 0.0022223667401704006, "clip_ratio/low_mean": 0.0020996379098505713, "clip_ratio/low_min": 0.00021167069007788086, "clip_ratio/region_mean": 0.004322004650020972, "epoch": 3.47360746573345, "grad_norm": 0.1900169402360916, "learning_rate": 1e-06, "loss": -0.0544, "step": 1487 }, { "clip_ratio/high_max": 0.005820676567964256, "clip_ratio/high_mean": 0.002693339622055646, "clip_ratio/low_mean": 0.002884810201067012, "clip_ratio/low_min": 0.0003068130408792058, "clip_ratio/region_mean": 0.005578149750363082, "epoch": 3.4759405074365706, "grad_norm": 0.1543101966381073, "learning_rate": 1e-06, "loss": -0.0547, "step": 1488 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1037946428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 2371.0, "completions/mean_length": 950.4553833007812, "completions/mean_terminated_length": 586.1519165039062, "completions/min_length": 150.0, "completions/min_terminated_length": 150.0, "epoch": 3.478273549139691, "grad_norm": 0.2818315327167511, "learning_rate": 1e-06, "loss": -0.0432, "num_tokens": 216149323.0, "reward": 0.582589328289032, "reward_std": 0.14263570308685303, "rewards/verify_math_reward/mean": 0.5825892686843872, "rewards/verify_math_reward/std": 0.4934072494506836, "step": 1489 }, { "clip_ratio/high_max": 0.0026045147715194616, "clip_ratio/high_mean": 0.0008903658726922004, "clip_ratio/low_mean": 0.0008223110580729553, "clip_ratio/low_min": 1.9747234546230175e-05, "clip_ratio/region_mean": 0.0017126769380411133, "epoch": 3.4806065908428114, "grad_norm": 0.19161909818649292, "learning_rate": 1e-06, "loss": -0.0433, "step": 1490 }, { "clip_ratio/high_max": 0.0033749561553122476, "clip_ratio/high_mean": 0.0012479197775974171, "clip_ratio/low_mean": 0.001170964475022629, "clip_ratio/low_min": 2.9620852728839964e-05, "clip_ratio/region_mean": 0.0024188842726289295, "epoch": 3.4829396325459316, "grad_norm": 0.13932949304580688, "learning_rate": 1e-06, "loss": -0.0435, "step": 1491 }, { "clip_ratio/high_max": 0.0038341688050422817, "clip_ratio/high_mean": 0.0014365725655807182, "clip_ratio/low_mean": 0.0015721184427093249, "clip_ratio/low_min": 2.9620852728839964e-05, "clip_ratio/region_mean": 0.0030086909682722762, "epoch": 3.4852726742490523, "grad_norm": 0.10930213332176208, "learning_rate": 1e-06, "loss": -0.0437, "step": 1492 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1361607142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 2411.0, "completions/mean_length": 1067.4296875, "completions/mean_terminated_length": 590.0581665039062, "completions/min_length": 167.0, "completions/min_terminated_length": 167.0, "epoch": 3.4876057159521725, "grad_norm": 0.3481297194957733, "learning_rate": 1e-06, "loss": -0.0687, "num_tokens": 216703612.0, "reward": 0.5714285969734192, "reward_std": 0.15729236602783203, "rewards/verify_math_reward/mean": 0.5714285969734192, "rewards/verify_math_reward/std": 0.49514803290367126, "step": 1493 }, { "clip_ratio/high_max": 0.003634344437159598, "clip_ratio/high_mean": 0.0013998175963934045, "clip_ratio/low_mean": 0.0010584617957647424, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002458279428537935, "epoch": 3.489938757655293, "grad_norm": 0.25154000520706177, "learning_rate": 1e-06, "loss": -0.0689, "step": 1494 }, { "clip_ratio/high_max": 0.004598041850840673, "clip_ratio/high_mean": 0.0017823240559664555, "clip_ratio/low_mean": 0.0015657033291063271, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.003348027341417037, "epoch": 3.4922717993584134, "grad_norm": 0.15990762412548065, "learning_rate": 1e-06, "loss": -0.0692, "step": 1495 }, { "clip_ratio/high_max": 0.005470540898386389, "clip_ratio/high_mean": 0.0021713277892558835, "clip_ratio/low_mean": 0.0020834994429606013, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.00425482721766457, "epoch": 3.494604841061534, "grad_norm": 0.12317564338445663, "learning_rate": 1e-06, "loss": -0.0695, "step": 1496 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1049107142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3575.0, "completions/mean_length": 957.8694458007812, "completions/mean_terminated_length": 590.05859375, "completions/min_length": 112.0, "completions/min_terminated_length": 112.0, "epoch": 3.4969378827646542, "grad_norm": 0.2687775194644928, "learning_rate": 1e-06, "loss": -0.0163, "num_tokens": 217254815.0, "reward": 0.5558035969734192, "reward_std": 0.13786065578460693, "rewards/verify_math_reward/mean": 0.5558035969734192, "rewards/verify_math_reward/std": 0.49715372920036316, "step": 1497 }, { "clip_ratio/high_max": 0.002943505740404362, "clip_ratio/high_mean": 0.0010138052475667791, "clip_ratio/low_mean": 0.0007721014699200168, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0017859067229437642, "epoch": 3.499270924467775, "grad_norm": 0.3522396981716156, "learning_rate": 1e-06, "loss": -0.0159, "step": 1498 }, { "clip_ratio/high_max": 0.003434028069023043, "clip_ratio/high_mean": 0.0012605781776073854, "clip_ratio/low_mean": 0.001090516327167279, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002351094444748014, "epoch": 3.5016039661708955, "grad_norm": 0.13213761150836945, "learning_rate": 1e-06, "loss": -0.0167, "step": 1499 }, { "clip_ratio/high_max": 0.004153806337853894, "clip_ratio/high_mean": 0.0015229104828904383, "clip_ratio/low_mean": 0.001439975640096236, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0029628860793309286, "epoch": 3.5039370078740157, "grad_norm": 0.10513986647129059, "learning_rate": 1e-06, "loss": -0.0168, "step": 1500 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0825892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3920.0, "completions/mean_length": 930.654052734375, "completions/mean_terminated_length": 645.6958618164062, "completions/min_length": 134.0, "completions/min_terminated_length": 134.0, "epoch": 3.506270049577136, "grad_norm": 0.3756645917892456, "learning_rate": 1e-06, "loss": -0.0177, "num_tokens": 217873993.0, "reward": 0.6049107313156128, "reward_std": 0.18708978593349457, "rewards/verify_math_reward/mean": 0.6049107313156128, "rewards/verify_math_reward/std": 0.48914292454719543, "step": 1501 }, { "clip_ratio/high_max": 0.003169316660205368, "clip_ratio/high_mean": 0.0013845680368831381, "clip_ratio/low_mean": 0.0008977585512184305, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0022823266190243885, "epoch": 3.5086030912802566, "grad_norm": 0.20947962999343872, "learning_rate": 1e-06, "loss": -0.0177, "step": 1502 }, { "clip_ratio/high_max": 0.004149363077885937, "clip_ratio/high_mean": 0.0017363521292281803, "clip_ratio/low_mean": 0.0013428201091301162, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0030791722892899998, "epoch": 3.5109361329833773, "grad_norm": 0.1465536504983902, "learning_rate": 1e-06, "loss": -0.0181, "step": 1503 }, { "clip_ratio/high_max": 0.004881751345237717, "clip_ratio/high_mean": 0.002035270335909445, "clip_ratio/low_mean": 0.0017654242365097161, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0038006945833330974, "epoch": 3.5132691746864975, "grad_norm": 0.11865488439798355, "learning_rate": 1e-06, "loss": -0.0183, "step": 1504 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1116071428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3911.0, "completions/mean_length": 1010.7991333007812, "completions/mean_terminated_length": 623.2110595703125, "completions/min_length": 145.0, "completions/min_terminated_length": 145.0, "epoch": 3.515602216389618, "grad_norm": 0.2934607267379761, "learning_rate": 1e-06, "loss": -0.0462, "num_tokens": 218466365.0, "reward": 0.5625, "reward_std": 0.14748379588127136, "rewards/verify_math_reward/mean": 0.5625, "rewards/verify_math_reward/std": 0.49635544419288635, "step": 1505 }, { "clip_ratio/high_max": 0.003197393612936139, "clip_ratio/high_mean": 0.001095556815926102, "clip_ratio/low_mean": 0.0006936114441487007, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0017891682364279404, "epoch": 3.5179352580927383, "grad_norm": 0.19158892333507538, "learning_rate": 1e-06, "loss": -0.0463, "step": 1506 }, { "clip_ratio/high_max": 0.003879119838529732, "clip_ratio/high_mean": 0.0014059762434044387, "clip_ratio/low_mean": 0.0010591159480100032, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0024650922132423148, "epoch": 3.520268299795859, "grad_norm": 0.13163326680660248, "learning_rate": 1e-06, "loss": -0.0466, "step": 1507 }, { "clip_ratio/high_max": 0.004882282490143552, "clip_ratio/high_mean": 0.001725567593894084, "clip_ratio/low_mean": 0.0014124980516498908, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0031380656582769006, "epoch": 3.522601341498979, "grad_norm": 0.10336554050445557, "learning_rate": 1e-06, "loss": -0.0468, "step": 1508 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1294642857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3356.0, "completions/mean_length": 1063.09375, "completions/mean_terminated_length": 612.046142578125, "completions/min_length": 167.0, "completions/min_terminated_length": 167.0, "epoch": 3.5249343832021, "grad_norm": 0.3643072247505188, "learning_rate": 1e-06, "loss": -0.0622, "num_tokens": 219038617.0, "reward": 0.5323660969734192, "reward_std": 0.16390429437160492, "rewards/verify_math_reward/mean": 0.5323660969734192, "rewards/verify_math_reward/std": 0.4992299973964691, "step": 1509 }, { "clip_ratio/high_max": 0.0033953819583985023, "clip_ratio/high_mean": 0.001186316574603552, "clip_ratio/low_mean": 0.0009587650183675578, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002145081580238184, "epoch": 3.52726742490522, "grad_norm": 0.43013814091682434, "learning_rate": 1e-06, "loss": -0.0623, "step": 1510 }, { "clip_ratio/high_max": 0.003931037004804239, "clip_ratio/high_mean": 0.0014922929913154803, "clip_ratio/low_mean": 0.0013898893594159745, "clip_ratio/low_min": 1.537893695058301e-05, "clip_ratio/region_mean": 0.002882182292523794, "epoch": 3.5296004666083407, "grad_norm": 0.18274205923080444, "learning_rate": 1e-06, "loss": -0.0625, "step": 1511 }, { "clip_ratio/high_max": 0.004750993233756162, "clip_ratio/high_mean": 0.001766452711308375, "clip_ratio/low_mean": 0.001852617282565916, "clip_ratio/low_min": 3.075787390116602e-05, "clip_ratio/region_mean": 0.003619070033892058, "epoch": 3.531933508311461, "grad_norm": 0.30144381523132324, "learning_rate": 1e-06, "loss": -0.0627, "step": 1512 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0993303571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3084.0, "completions/mean_length": 947.9397583007812, "completions/mean_terminated_length": 600.7559204101562, "completions/min_length": 115.0, "completions/min_terminated_length": 115.0, "epoch": 3.5342665500145816, "grad_norm": 0.36959466338157654, "learning_rate": 1e-06, "loss": -0.0315, "num_tokens": 219608067.0, "reward": 0.6127232313156128, "reward_std": 0.17908385396003723, "rewards/verify_math_reward/mean": 0.6127232313156128, "rewards/verify_math_reward/std": 0.4873998463153839, "step": 1513 }, { "clip_ratio/high_max": 0.0032637810800224543, "clip_ratio/high_mean": 0.0014560824020009022, "clip_ratio/low_mean": 0.0009883404673018958, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0024444228183710948, "epoch": 3.536599591717702, "grad_norm": 0.2260734736919403, "learning_rate": 1e-06, "loss": -0.0316, "step": 1514 }, { "clip_ratio/high_max": 0.003992053141701035, "clip_ratio/high_mean": 0.0017875585981528275, "clip_ratio/low_mean": 0.0014606770346290432, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0032482355745742097, "epoch": 3.5389326334208224, "grad_norm": 0.14931000769138336, "learning_rate": 1e-06, "loss": -0.0319, "step": 1515 }, { "clip_ratio/high_max": 0.004673718343838118, "clip_ratio/high_mean": 0.0021179446157475468, "clip_ratio/low_mean": 0.0017957969939743634, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.003913741587894037, "epoch": 3.5412656751239426, "grad_norm": 0.12227077782154083, "learning_rate": 1e-06, "loss": -0.0321, "step": 1516 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1238839285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3640.0, "completions/mean_length": 1028.78125, "completions/mean_terminated_length": 595.0726318359375, "completions/min_length": 122.0, "completions/min_terminated_length": 122.0, "epoch": 3.5435987168270633, "grad_norm": 0.35472121834754944, "learning_rate": 1e-06, "loss": -0.04, "num_tokens": 220175279.0, "reward": 0.5814732313156128, "reward_std": 0.1612711399793625, "rewards/verify_math_reward/mean": 0.5814732313156128, "rewards/verify_math_reward/std": 0.4935929775238037, "step": 1517 }, { "clip_ratio/high_max": 0.004369788264739327, "clip_ratio/high_mean": 0.0016241427510976791, "clip_ratio/low_mean": 0.000777338719672116, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0024014815062400885, "epoch": 3.545931758530184, "grad_norm": 0.4712355434894562, "learning_rate": 1e-06, "loss": -0.0399, "step": 1518 }, { "clip_ratio/high_max": 0.0052785316365770996, "clip_ratio/high_mean": 0.001990587425098056, "clip_ratio/low_mean": 0.0012342627323960187, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0032248501665890217, "epoch": 3.548264800233304, "grad_norm": 0.28679054975509644, "learning_rate": 1e-06, "loss": -0.0402, "step": 1519 }, { "clip_ratio/high_max": 0.005605215163086541, "clip_ratio/high_mean": 0.002306110007339157, "clip_ratio/low_mean": 0.0016084281323855976, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.003914538130629808, "epoch": 3.5505978419364244, "grad_norm": 0.24441812932491302, "learning_rate": 1e-06, "loss": -0.0404, "step": 1520 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0993303571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3911.0, "completions/mean_length": 940.0313110351562, "completions/mean_terminated_length": 591.9752197265625, "completions/min_length": 148.0, "completions/min_terminated_length": 148.0, "epoch": 3.552930883639545, "grad_norm": 0.3488253951072693, "learning_rate": 1e-06, "loss": -0.0586, "num_tokens": 220736235.0, "reward": 0.6350446939468384, "reward_std": 0.19170933961868286, "rewards/verify_math_reward/mean": 0.6350446343421936, "rewards/verify_math_reward/std": 0.4816865026950836, "step": 1521 }, { "clip_ratio/high_max": 0.0039584932528669015, "clip_ratio/high_mean": 0.001462564574467251, "clip_ratio/low_mean": 0.0009253976131731179, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.00238796224584803, "epoch": 3.5552639253426657, "grad_norm": 0.24032549560070038, "learning_rate": 1e-06, "loss": -0.0588, "step": 1522 }, { "clip_ratio/high_max": 0.004810271624592133, "clip_ratio/high_mean": 0.0019088198969257064, "clip_ratio/low_mean": 0.0014275586690928321, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.003336378649692051, "epoch": 3.557596967045786, "grad_norm": 0.14824149012565613, "learning_rate": 1e-06, "loss": -0.0592, "step": 1523 }, { "clip_ratio/high_max": 0.00611777811718639, "clip_ratio/high_mean": 0.0023005844486760907, "clip_ratio/low_mean": 0.0019258749016444199, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.004226459393976256, "epoch": 3.5599300087489065, "grad_norm": 0.1202918291091919, "learning_rate": 1e-06, "loss": -0.0594, "step": 1524 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0892857142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 2366.0, "completions/mean_length": 894.1842041015625, "completions/mean_terminated_length": 580.2806396484375, "completions/min_length": 91.0, "completions/min_terminated_length": 91.0, "epoch": 3.5622630504520267, "grad_norm": 0.3262138068675995, "learning_rate": 1e-06, "loss": -0.0087, "num_tokens": 221300576.0, "reward": 0.6149553656578064, "reward_std": 0.15992455184459686, "rewards/verify_math_reward/mean": 0.6149553656578064, "rewards/verify_math_reward/std": 0.4868776500225067, "step": 1525 }, { "clip_ratio/high_max": 0.003378881163371261, "clip_ratio/high_mean": 0.001296427843044512, "clip_ratio/low_mean": 0.0013072228339296998, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0026036506533273496, "epoch": 3.5645960921551474, "grad_norm": 0.251617968082428, "learning_rate": 1e-06, "loss": -0.0088, "step": 1526 }, { "clip_ratio/high_max": 0.004017836079583503, "clip_ratio/high_mean": 0.0016698903782526031, "clip_ratio/low_mean": 0.0019555173621483846, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0036254077567718923, "epoch": 3.5669291338582676, "grad_norm": 0.15415586531162262, "learning_rate": 1e-06, "loss": -0.0092, "step": 1527 }, { "clip_ratio/high_max": 0.0048395818448625505, "clip_ratio/high_mean": 0.002033793367445469, "clip_ratio/low_mean": 0.002602042441139929, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.004635835881344974, "epoch": 3.5692621755613883, "grad_norm": 0.12800532579421997, "learning_rate": 1e-06, "loss": -0.0094, "step": 1528 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0959821428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3864.0, "completions/mean_length": 931.7723388671875, "completions/mean_terminated_length": 595.8173217773438, "completions/min_length": 103.0, "completions/min_terminated_length": 103.0, "epoch": 3.5715952172645085, "grad_norm": 0.33652952313423157, "learning_rate": 1e-06, "loss": -0.0703, "num_tokens": 221879196.0, "reward": 0.5915178656578064, "reward_std": 0.1631825715303421, "rewards/verify_math_reward/mean": 0.5915178656578064, "rewards/verify_math_reward/std": 0.49182769656181335, "step": 1529 }, { "clip_ratio/high_max": 0.0032083376063383184, "clip_ratio/high_mean": 0.001352563131149509, "clip_ratio/low_mean": 0.0009093239386857022, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0022618870352744125, "epoch": 3.573928258967629, "grad_norm": 0.3137912452220917, "learning_rate": 1e-06, "loss": -0.0703, "step": 1530 }, { "clip_ratio/high_max": 0.004593252437189221, "clip_ratio/high_mean": 0.0018878825576393865, "clip_ratio/low_mean": 0.001388741351547651, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.003276623880083207, "epoch": 3.5762613006707493, "grad_norm": 0.147227481007576, "learning_rate": 1e-06, "loss": -0.0707, "step": 1531 }, { "clip_ratio/high_max": 0.0054799829522380605, "clip_ratio/high_mean": 0.0021782278890896123, "clip_ratio/low_mean": 0.0018489591966499574, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.004027187111205421, "epoch": 3.57859434237387, "grad_norm": 0.11838389188051224, "learning_rate": 1e-06, "loss": -0.0709, "step": 1532 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1060267857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3407.0, "completions/mean_length": 957.3839721679688, "completions/mean_terminated_length": 585.1385498046875, "completions/min_length": 152.0, "completions/min_terminated_length": 152.0, "epoch": 3.5809273840769906, "grad_norm": 0.35629597306251526, "learning_rate": 1e-06, "loss": -0.0321, "num_tokens": 222441412.0, "reward": 0.6149553656578064, "reward_std": 0.16660889983177185, "rewards/verify_math_reward/mean": 0.6149553656578064, "rewards/verify_math_reward/std": 0.4868776500225067, "step": 1533 }, { "clip_ratio/high_max": 0.00434045483416412, "clip_ratio/high_mean": 0.0015466984768863767, "clip_ratio/low_mean": 0.0009631901148168254, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002509888588974718, "epoch": 3.583260425780111, "grad_norm": 1.1193641424179077, "learning_rate": 1e-06, "loss": -0.032, "step": 1534 }, { "clip_ratio/high_max": 0.0048800574586493894, "clip_ratio/high_mean": 0.0019512368526193313, "clip_ratio/low_mean": 0.0016197883087443188, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.003571025197743438, "epoch": 3.585593467483231, "grad_norm": 0.8925911784172058, "learning_rate": 1e-06, "loss": -0.0322, "step": 1535 }, { "clip_ratio/high_max": 0.005907108454266563, "clip_ratio/high_mean": 0.002316489291843027, "clip_ratio/low_mean": 0.002097315500577679, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0044138048106106, "epoch": 3.5879265091863517, "grad_norm": 0.6900689005851746, "learning_rate": 1e-06, "loss": -0.0326, "step": 1536 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1049107142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3490.0, "completions/mean_length": 962.7042846679688, "completions/mean_terminated_length": 595.4601440429688, "completions/min_length": 128.0, "completions/min_terminated_length": 128.0, "epoch": 3.5902595508894724, "grad_norm": 0.2817955017089844, "learning_rate": 1e-06, "loss": -0.0385, "num_tokens": 223012243.0, "reward": 0.5814732313156128, "reward_std": 0.13128940761089325, "rewards/verify_math_reward/mean": 0.5814732313156128, "rewards/verify_math_reward/std": 0.4935929775238037, "step": 1537 }, { "clip_ratio/high_max": 0.002920760445704218, "clip_ratio/high_mean": 0.0009965495009964798, "clip_ratio/low_mean": 0.0007855768872104818, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0017821263754740357, "epoch": 3.5925925925925926, "grad_norm": 0.21005071699619293, "learning_rate": 1e-06, "loss": -0.0386, "step": 1538 }, { "clip_ratio/high_max": 0.0037049198799650185, "clip_ratio/high_mean": 0.0013449063226289582, "clip_ratio/low_mean": 0.0010674645127437543, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002412370879028458, "epoch": 3.5949256342957128, "grad_norm": 0.14682909846305847, "learning_rate": 1e-06, "loss": -0.0388, "step": 1539 }, { "clip_ratio/high_max": 0.004441956218215637, "clip_ratio/high_mean": 0.0015301477687899023, "clip_ratio/low_mean": 0.001464590175601188, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002994737929839175, "epoch": 3.5972586759988334, "grad_norm": 0.11666007339954376, "learning_rate": 1e-06, "loss": -0.039, "step": 1540 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1261160714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3915.0, "completions/mean_length": 1056.188720703125, "completions/mean_terminated_length": 617.4929809570312, "completions/min_length": 165.0, "completions/min_terminated_length": 165.0, "epoch": 3.599591717701954, "grad_norm": 0.31285643577575684, "learning_rate": 1e-06, "loss": -0.0253, "num_tokens": 223579028.0, "reward": 0.5323660969734192, "reward_std": 0.15605242550373077, "rewards/verify_math_reward/mean": 0.5323660969734192, "rewards/verify_math_reward/std": 0.4992299973964691, "step": 1541 }, { "clip_ratio/high_max": 0.0030681257412652485, "clip_ratio/high_mean": 0.0011774130380217684, "clip_ratio/low_mean": 0.0009085413330467418, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0020859543365077116, "epoch": 3.6019247594050743, "grad_norm": 0.25728097558021545, "learning_rate": 1e-06, "loss": -0.0254, "step": 1542 }, { "clip_ratio/high_max": 0.003666820870421361, "clip_ratio/high_mean": 0.0014283433156379033, "clip_ratio/low_mean": 0.0012985116263735108, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0027268549019936472, "epoch": 3.604257801108195, "grad_norm": 0.14587701857089996, "learning_rate": 1e-06, "loss": -0.0257, "step": 1543 }, { "clip_ratio/high_max": 0.004427370295161381, "clip_ratio/high_mean": 0.001744620607496472, "clip_ratio/low_mean": 0.0017054049276339356, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.003450025600614026, "epoch": 3.606590842811315, "grad_norm": 0.12500238418579102, "learning_rate": 1e-06, "loss": -0.0258, "step": 1544 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1707589285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3254.0, "completions/mean_length": 1207.47998046875, "completions/mean_terminated_length": 612.6702270507812, "completions/min_length": 162.0, "completions/min_terminated_length": 162.0, "epoch": 3.608923884514436, "grad_norm": 0.3241111934185028, "learning_rate": 1e-06, "loss": -0.0937, "num_tokens": 224124626.0, "reward": 0.6149553656578064, "reward_std": 0.17682655155658722, "rewards/verify_math_reward/mean": 0.6149553656578064, "rewards/verify_math_reward/std": 0.4868776500225067, "step": 1545 }, { "clip_ratio/high_max": 0.0030379883173736744, "clip_ratio/high_mean": 0.0012363471068965737, "clip_ratio/low_mean": 0.0008106009154289495, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0020469480004976504, "epoch": 3.611256926217556, "grad_norm": 0.20092715322971344, "learning_rate": 1e-06, "loss": -0.0938, "step": 1546 }, { "clip_ratio/high_max": 0.004211751234834082, "clip_ratio/high_mean": 0.0016279311494145077, "clip_ratio/low_mean": 0.001198958367240266, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002826889540301636, "epoch": 3.6135899679206767, "grad_norm": 0.14301931858062744, "learning_rate": 1e-06, "loss": -0.0942, "step": 1547 }, { "clip_ratio/high_max": 0.00489734421717003, "clip_ratio/high_mean": 0.0020070656610187143, "clip_ratio/low_mean": 0.0015230460558086634, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.003530111745931208, "epoch": 3.615923009623797, "grad_norm": 0.11428657174110413, "learning_rate": 1e-06, "loss": -0.0943, "step": 1548 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1462053571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 2541.0, "completions/mean_length": 1120.796875, "completions/mean_terminated_length": 611.3176879882812, "completions/min_length": 127.0, "completions/min_terminated_length": 127.0, "epoch": 3.6182560513269175, "grad_norm": 0.3615557849407196, "learning_rate": 1e-06, "loss": -0.0493, "num_tokens": 224681620.0, "reward": 0.5691964626312256, "reward_std": 0.1830640286207199, "rewards/verify_math_reward/mean": 0.5691964030265808, "rewards/verify_math_reward/std": 0.4954652786254883, "step": 1549 }, { "clip_ratio/high_max": 0.0033484800078440458, "clip_ratio/high_mean": 0.0013866849512851331, "clip_ratio/low_mean": 0.0008986568354885094, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0022853417831356637, "epoch": 3.6205890930300377, "grad_norm": 0.2300233542919159, "learning_rate": 1e-06, "loss": -0.0493, "step": 1550 }, { "clip_ratio/high_max": 0.00420714513893472, "clip_ratio/high_mean": 0.0018317250287509523, "clip_ratio/low_mean": 0.0013650646269525168, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0031967896429705434, "epoch": 3.6229221347331584, "grad_norm": 0.1649959534406662, "learning_rate": 1e-06, "loss": -0.0496, "step": 1551 }, { "clip_ratio/high_max": 0.0047625846054870635, "clip_ratio/high_mean": 0.002119449920428451, "clip_ratio/low_mean": 0.0018051645456580445, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.003924614502466284, "epoch": 3.625255176436279, "grad_norm": 0.13146750628948212, "learning_rate": 1e-06, "loss": -0.0499, "step": 1552 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0970982142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 2922.0, "completions/mean_length": 896.200927734375, "completions/mean_terminated_length": 552.0939331054688, "completions/min_length": 98.0, "completions/min_terminated_length": 98.0, "epoch": 3.6275882181393992, "grad_norm": 0.3896058201789856, "learning_rate": 1e-06, "loss": -0.0665, "num_tokens": 225217584.0, "reward": 0.65625, "reward_std": 0.1693858504295349, "rewards/verify_math_reward/mean": 0.65625, "rewards/verify_math_reward/std": 0.4752241373062134, "step": 1553 }, { "clip_ratio/high_max": 0.004351139192294795, "clip_ratio/high_mean": 0.0014802272744418588, "clip_ratio/low_mean": 0.0014126373862382025, "clip_ratio/low_min": 2.8140477297711186e-05, "clip_ratio/region_mean": 0.00289286466431804, "epoch": 3.6299212598425195, "grad_norm": 0.32620298862457275, "learning_rate": 1e-06, "loss": -0.0666, "step": 1554 }, { "clip_ratio/high_max": 0.005212198608205654, "clip_ratio/high_mean": 0.00191652216744842, "clip_ratio/low_mean": 0.001893272561574122, "clip_ratio/low_min": 7.26913203834556e-05, "clip_ratio/region_mean": 0.0038097946817288175, "epoch": 3.63225430154564, "grad_norm": 0.22166509926319122, "learning_rate": 1e-06, "loss": -0.0669, "step": 1555 }, { "clip_ratio/high_max": 0.0058464856701903045, "clip_ratio/high_mean": 0.0022405720555980224, "clip_ratio/low_mean": 0.002330504652491072, "clip_ratio/low_min": 7.26913203834556e-05, "clip_ratio/region_mean": 0.004571076715365052, "epoch": 3.6345873432487608, "grad_norm": 0.14236122369766235, "learning_rate": 1e-06, "loss": -0.0671, "step": 1556 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1160714285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 2586.0, "completions/mean_length": 1040.474365234375, "completions/mean_terminated_length": 639.2437133789062, "completions/min_length": 148.0, "completions/min_terminated_length": 148.0, "epoch": 3.636920384951881, "grad_norm": 0.36941227316856384, "learning_rate": 1e-06, "loss": -0.0284, "num_tokens": 225804097.0, "reward": 0.5100446939468384, "reward_std": 0.17089177668094635, "rewards/verify_math_reward/mean": 0.5100446343421936, "rewards/verify_math_reward/std": 0.5001782774925232, "step": 1557 }, { "clip_ratio/high_max": 0.00320712444226956, "clip_ratio/high_mean": 0.0012146544813731452, "clip_ratio/low_mean": 0.0009235197812813567, "clip_ratio/low_min": 5.375188266043551e-05, "clip_ratio/region_mean": 0.00213817429903429, "epoch": 3.6392534266550016, "grad_norm": 0.2143459916114807, "learning_rate": 1e-06, "loss": -0.0285, "step": 1558 }, { "clip_ratio/high_max": 0.004129017572267912, "clip_ratio/high_mean": 0.0015934544135234319, "clip_ratio/low_mean": 0.0013927619402238633, "clip_ratio/low_min": 5.609405707218684e-05, "clip_ratio/region_mean": 0.002986216321005486, "epoch": 3.641586468358122, "grad_norm": 0.15883515775203705, "learning_rate": 1e-06, "loss": -0.0289, "step": 1559 }, { "clip_ratio/high_max": 0.0049488695949548855, "clip_ratio/high_mean": 0.0019195172608306166, "clip_ratio/low_mean": 0.001880105057352921, "clip_ratio/low_min": 8.9750494225882e-05, "clip_ratio/region_mean": 0.0037996223545633256, "epoch": 3.6439195100612425, "grad_norm": 0.11862213909626007, "learning_rate": 1e-06, "loss": -0.0291, "step": 1560 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 4096.0, "completions/max_terminated_length": 3818.0, "completions/mean_length": 1044.860595703125, "completions/mean_terminated_length": 608.9833984375, "completions/min_length": 159.0, "completions/min_terminated_length": 159.0, "epoch": 3.6462525517643627, "grad_norm": 0.3595889210700989, "learning_rate": 1e-06, "loss": -0.0627, "num_tokens": 226368084.0, "reward": 0.5970982313156128, "reward_std": 0.2154543101787567, "rewards/verify_math_reward/mean": 0.5970982313156128, "rewards/verify_math_reward/std": 0.49075525999069214, "step": 1561 }, { "clip_ratio/high_max": 0.004204580109217204, "clip_ratio/high_mean": 0.0016843500779941678, "clip_ratio/low_mean": 0.0010968255064653931, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.00278117555717472, "epoch": 3.6485855934674833, "grad_norm": 0.2778065800666809, "learning_rate": 1e-06, "loss": -0.0628, "step": 1562 }, { "clip_ratio/high_max": 0.004785167475347407, "clip_ratio/high_mean": 0.0021604470966849476, "clip_ratio/low_mean": 0.0016636165892123245, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.003824063707725145, "epoch": 3.6509186351706036, "grad_norm": 0.18634282052516937, "learning_rate": 1e-06, "loss": -0.0632, "step": 1563 }, { "clip_ratio/high_max": 0.006393658783053979, "clip_ratio/high_mean": 0.0026821411374839954, "clip_ratio/low_mean": 0.0021449356972880196, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.00482707675837446, "epoch": 3.653251676873724, "grad_norm": 0.1499367654323578, "learning_rate": 1e-06, "loss": -0.0634, "step": 1564 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.109375, "completions/max_length": 4096.0, "completions/max_terminated_length": 3772.0, "completions/mean_length": 988.1261596679688, "completions/mean_terminated_length": 606.4573974609375, "completions/min_length": 148.0, "completions/min_terminated_length": 148.0, "epoch": 3.6555847185768444, "grad_norm": 0.3651546537876129, "learning_rate": 1e-06, "loss": -0.0463, "num_tokens": 226951021.0, "reward": 0.5602678656578064, "reward_std": 0.1414324790239334, "rewards/verify_math_reward/mean": 0.5602678656578064, "rewards/verify_math_reward/std": 0.4966317415237427, "step": 1565 }, { "clip_ratio/high_max": 0.004662491432100069, "clip_ratio/high_mean": 0.001526115176602616, "clip_ratio/low_mean": 0.000855527435305703, "clip_ratio/low_min": 1.0199086318607442e-05, "clip_ratio/region_mean": 0.0023816426109988242, "epoch": 3.657917760279965, "grad_norm": 1.3459382057189941, "learning_rate": 1e-06, "loss": -0.0461, "step": 1566 }, { "clip_ratio/high_max": 0.005645391007419676, "clip_ratio/high_mean": 0.0018409408039588016, "clip_ratio/low_mean": 0.0012496254603320267, "clip_ratio/low_min": 1.6905598386074416e-05, "clip_ratio/region_mean": 0.00309056622791104, "epoch": 3.6602508019830857, "grad_norm": 0.18026192486286163, "learning_rate": 1e-06, "loss": -0.0465, "step": 1567 }, { "clip_ratio/high_max": 0.006300625122094061, "clip_ratio/high_mean": 0.002103770078974776, "clip_ratio/low_mean": 0.0016038492449297337, "clip_ratio/low_min": 3.0720078939339146e-05, "clip_ratio/region_mean": 0.003707619325723499, "epoch": 3.662583843686206, "grad_norm": 0.13710922002792358, "learning_rate": 1e-06, "loss": -0.0467, "step": 1568 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1026785714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 4092.0, "completions/mean_length": 942.841552734375, "completions/mean_terminated_length": 582.0323486328125, "completions/min_length": 109.0, "completions/min_terminated_length": 109.0, "epoch": 3.664916885389326, "grad_norm": 0.32218852639198303, "learning_rate": 1e-06, "loss": -0.0564, "num_tokens": 227514159.0, "reward": 0.6674107313156128, "reward_std": 0.1585274189710617, "rewards/verify_math_reward/mean": 0.6674107313156128, "rewards/verify_math_reward/std": 0.47140392661094666, "step": 1569 }, { "clip_ratio/high_max": 0.0033045380259864032, "clip_ratio/high_mean": 0.0013742682276642881, "clip_ratio/low_mean": 0.0007751147072667663, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0021493829335668124, "epoch": 3.667249927092447, "grad_norm": 0.2527618408203125, "learning_rate": 1e-06, "loss": -0.0564, "step": 1570 }, { "clip_ratio/high_max": 0.004432777663168963, "clip_ratio/high_mean": 0.0017319885628239717, "clip_ratio/low_mean": 0.0012123033793614013, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002944291918538511, "epoch": 3.6695829687955674, "grad_norm": 0.14069336652755737, "learning_rate": 1e-06, "loss": -0.0567, "step": 1571 }, { "clip_ratio/high_max": 0.005416095613327343, "clip_ratio/high_mean": 0.0021105323612573557, "clip_ratio/low_mean": 0.0016114068694150774, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.003721939297975041, "epoch": 3.6719160104986877, "grad_norm": 0.11302340030670166, "learning_rate": 1e-06, "loss": -0.0569, "step": 1572 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1183035714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 2823.0, "completions/mean_length": 1061.1685791015625, "completions/mean_terminated_length": 653.9633178710938, "completions/min_length": 146.0, "completions/min_terminated_length": 146.0, "epoch": 3.674249052201808, "grad_norm": 0.3315960764884949, "learning_rate": 1e-06, "loss": -0.0386, "num_tokens": 228123318.0, "reward": 0.5569196939468384, "reward_std": 0.1628134846687317, "rewards/verify_math_reward/mean": 0.5569196343421936, "rewards/verify_math_reward/std": 0.4970270097255707, "step": 1573 }, { "clip_ratio/high_max": 0.002792710169160273, "clip_ratio/high_mean": 0.001133435551309958, "clip_ratio/low_mean": 0.0011106792717328062, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0022441149048972875, "epoch": 3.6765820939049285, "grad_norm": 0.2727128565311432, "learning_rate": 1e-06, "loss": -0.0386, "step": 1574 }, { "clip_ratio/high_max": 0.0038406569801736623, "clip_ratio/high_mean": 0.0014710588402522262, "clip_ratio/low_mean": 0.0016742073530622292, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0031452661351067945, "epoch": 3.678915135608049, "grad_norm": 0.32342395186424255, "learning_rate": 1e-06, "loss": -0.039, "step": 1575 }, { "clip_ratio/high_max": 0.004592683064402081, "clip_ratio/high_mean": 0.0017668247201072518, "clip_ratio/low_mean": 0.002124619612004608, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0038914442993700504, "epoch": 3.6812481773111694, "grad_norm": 1.2609137296676636, "learning_rate": 1e-06, "loss": -0.0391, "step": 1576 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1205357142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3402.0, "completions/mean_length": 1038.4263916015625, "completions/mean_terminated_length": 619.3679809570312, "completions/min_length": 146.0, "completions/min_terminated_length": 146.0, "epoch": 3.68358121901429, "grad_norm": 0.3630722165107727, "learning_rate": 1e-06, "loss": -0.0437, "num_tokens": 228695604.0, "reward": 0.5301339626312256, "reward_std": 0.1681077629327774, "rewards/verify_math_reward/mean": 0.5301339030265808, "rewards/verify_math_reward/std": 0.49936988949775696, "step": 1577 }, { "clip_ratio/high_max": 0.003347970894537866, "clip_ratio/high_mean": 0.0011681997802952537, "clip_ratio/low_mean": 0.0010536735026107635, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002221873299276922, "epoch": 3.6859142607174102, "grad_norm": 0.22458170354366302, "learning_rate": 1e-06, "loss": -0.0437, "step": 1578 }, { "clip_ratio/high_max": 0.004416114941705018, "clip_ratio/high_mean": 0.0016081201520137256, "clip_ratio/low_mean": 0.0015054010291351005, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.003113521175691858, "epoch": 3.688247302420531, "grad_norm": 0.1649567186832428, "learning_rate": 1e-06, "loss": -0.0441, "step": 1579 }, { "clip_ratio/high_max": 0.005585735125350766, "clip_ratio/high_mean": 0.001959314031410031, "clip_ratio/low_mean": 0.001907202025904553, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.00386651603912469, "epoch": 3.690580344123651, "grad_norm": 0.13269121944904327, "learning_rate": 1e-06, "loss": -0.0443, "step": 1580 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1261160714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 2747.0, "completions/mean_length": 1024.3460693359375, "completions/mean_terminated_length": 581.054931640625, "completions/min_length": 169.0, "completions/min_terminated_length": 169.0, "epoch": 3.6929133858267718, "grad_norm": 0.42029282450675964, "learning_rate": 1e-06, "loss": -0.0766, "num_tokens": 229239514.0, "reward": 0.6194196939468384, "reward_std": 0.17078298330307007, "rewards/verify_math_reward/mean": 0.6194196343421936, "rewards/verify_math_reward/std": 0.48580074310302734, "step": 1581 }, { "clip_ratio/high_max": 0.004905180307105184, "clip_ratio/high_mean": 0.0016320274844474625, "clip_ratio/low_mean": 0.00081249791674054, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0024445253802696243, "epoch": 3.695246427529892, "grad_norm": 1.8582253456115723, "learning_rate": 1e-06, "loss": -0.0765, "step": 1582 }, { "clip_ratio/high_max": 0.005668967394740321, "clip_ratio/high_mean": 0.001977574393094983, "clip_ratio/low_mean": 0.0012094487447029678, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0031870231323409826, "epoch": 3.6975794692330126, "grad_norm": 0.3027811348438263, "learning_rate": 1e-06, "loss": -0.077, "step": 1583 }, { "clip_ratio/high_max": 0.006513013751828112, "clip_ratio/high_mean": 0.002340283121156972, "clip_ratio/low_mean": 0.0016290325438603759, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.003969315686845221, "epoch": 3.699912510936133, "grad_norm": 0.16053646802902222, "learning_rate": 1e-06, "loss": -0.0772, "step": 1584 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1350446428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3440.0, "completions/mean_length": 1050.419677734375, "completions/mean_terminated_length": 574.9161376953125, "completions/min_length": 103.0, "completions/min_terminated_length": 103.0, "epoch": 3.7022455526392535, "grad_norm": 0.35522741079330444, "learning_rate": 1e-06, "loss": -0.0419, "num_tokens": 229771434.0, "reward": 0.6071428656578064, "reward_std": 0.16517744958400726, "rewards/verify_math_reward/mean": 0.6071428656578064, "rewards/verify_math_reward/std": 0.48865827918052673, "step": 1585 }, { "clip_ratio/high_max": 0.0036568081559380516, "clip_ratio/high_mean": 0.0013587209541583434, "clip_ratio/low_mean": 0.0010310748948541004, "clip_ratio/low_min": 5.363272430258803e-05, "clip_ratio/region_mean": 0.002389795823546592, "epoch": 3.704578594342374, "grad_norm": 0.2512953281402588, "learning_rate": 1e-06, "loss": -0.042, "step": 1586 }, { "clip_ratio/high_max": 0.0044697420235024765, "clip_ratio/high_mean": 0.0016965594695648178, "clip_ratio/low_mean": 0.0015337004206230631, "clip_ratio/low_min": 8.840169903123751e-05, "clip_ratio/region_mean": 0.0032302598556270823, "epoch": 3.7069116360454943, "grad_norm": 0.1620185822248459, "learning_rate": 1e-06, "loss": -0.0424, "step": 1587 }, { "clip_ratio/high_max": 0.005599283394985832, "clip_ratio/high_mean": 0.0020964101604477037, "clip_ratio/low_mean": 0.0020502036859397776, "clip_ratio/low_min": 8.840169903123751e-05, "clip_ratio/region_mean": 0.004146613835473545, "epoch": 3.7092446777486145, "grad_norm": 0.12447145581245422, "learning_rate": 1e-06, "loss": -0.0426, "step": 1588 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1294642857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 2744.0, "completions/mean_length": 1069.614990234375, "completions/mean_terminated_length": 619.5371704101562, "completions/min_length": 132.0, "completions/min_terminated_length": 132.0, "epoch": 3.711577719451735, "grad_norm": 0.3626271188259125, "learning_rate": 1e-06, "loss": -0.0325, "num_tokens": 230345289.0, "reward": 0.578125, "reward_std": 0.17077341675758362, "rewards/verify_math_reward/mean": 0.578125, "rewards/verify_math_reward/std": 0.4941346049308777, "step": 1589 }, { "clip_ratio/high_max": 0.003062231218791567, "clip_ratio/high_mean": 0.0013405614809016697, "clip_ratio/low_mean": 0.0009490746688243235, "clip_ratio/low_min": 5.912961205467582e-05, "clip_ratio/region_mean": 0.0022896361479070038, "epoch": 3.713910761154856, "grad_norm": 0.2277187556028366, "learning_rate": 1e-06, "loss": -0.0326, "step": 1590 }, { "clip_ratio/high_max": 0.003829478155239485, "clip_ratio/high_mean": 0.0016572985332459211, "clip_ratio/low_mean": 0.0014761280435777735, "clip_ratio/low_min": 7.438704778905958e-05, "clip_ratio/region_mean": 0.003133426565909758, "epoch": 3.716243802857976, "grad_norm": 0.16102197766304016, "learning_rate": 1e-06, "loss": -0.0329, "step": 1591 }, { "clip_ratio/high_max": 0.004660401435103267, "clip_ratio/high_mean": 0.0020575818271026947, "clip_ratio/low_mean": 0.0018840373304556124, "clip_ratio/low_min": 0.0001041418727254495, "clip_ratio/region_mean": 0.003941619252145756, "epoch": 3.7185768445610963, "grad_norm": 0.1247912049293518, "learning_rate": 1e-06, "loss": -0.0331, "step": 1592 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0747767857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 2576.0, "completions/mean_length": 805.458740234375, "completions/mean_terminated_length": 539.5162963867188, "completions/min_length": 156.0, "completions/min_terminated_length": 156.0, "epoch": 3.720909886264217, "grad_norm": 0.3289114236831665, "learning_rate": 1e-06, "loss": -0.0362, "num_tokens": 230872740.0, "reward": 0.6640625, "reward_std": 0.1298893392086029, "rewards/verify_math_reward/mean": 0.6640625, "rewards/verify_math_reward/std": 0.4725809693336487, "step": 1593 }, { "clip_ratio/high_max": 0.0030552562093362212, "clip_ratio/high_mean": 0.0011155529373354511, "clip_ratio/low_mean": 0.0007572791282655089, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0018728320756054018, "epoch": 3.7232429279673376, "grad_norm": 0.20431537926197052, "learning_rate": 1e-06, "loss": -0.0363, "step": 1594 }, { "clip_ratio/high_max": 0.0038031902950024232, "clip_ratio/high_mean": 0.001414468941220548, "clip_ratio/low_mean": 0.0011381189779058332, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0025525879209453706, "epoch": 3.725575969670458, "grad_norm": 0.14851944148540497, "learning_rate": 1e-06, "loss": -0.0366, "step": 1595 }, { "clip_ratio/high_max": 0.004644519176508766, "clip_ratio/high_mean": 0.0017066515847545816, "clip_ratio/low_mean": 0.001532902828330407, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0032395543457823806, "epoch": 3.7279090113735784, "grad_norm": 0.11473555862903595, "learning_rate": 1e-06, "loss": -0.0368, "step": 1596 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1294642857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3893.0, "completions/mean_length": 1059.743408203125, "completions/mean_terminated_length": 608.1974487304688, "completions/min_length": 131.0, "completions/min_terminated_length": 131.0, "epoch": 3.7302420530766986, "grad_norm": 0.3790885806083679, "learning_rate": 1e-06, "loss": -0.0485, "num_tokens": 231427118.0, "reward": 0.5680803656578064, "reward_std": 0.217105433344841, "rewards/verify_math_reward/mean": 0.5680803656578064, "rewards/verify_math_reward/std": 0.4956200420856476, "step": 1597 }, { "clip_ratio/high_max": 0.003211742441635579, "clip_ratio/high_mean": 0.0014720362378284335, "clip_ratio/low_mean": 0.0011778200478147482, "clip_ratio/low_min": 1.8939394067274407e-05, "clip_ratio/region_mean": 0.002649856309290044, "epoch": 3.7325750947798193, "grad_norm": 0.26180848479270935, "learning_rate": 1e-06, "loss": -0.0486, "step": 1598 }, { "clip_ratio/high_max": 0.004077058256370947, "clip_ratio/high_mean": 0.0018267248888150789, "clip_ratio/low_mean": 0.001726854381558951, "clip_ratio/low_min": 3.787878813454881e-05, "clip_ratio/region_mean": 0.0035535792994778603, "epoch": 3.7349081364829395, "grad_norm": 0.174207404255867, "learning_rate": 1e-06, "loss": -0.049, "step": 1599 }, { "clip_ratio/high_max": 0.004669600384659134, "clip_ratio/high_mean": 0.002200999435444828, "clip_ratio/low_mean": 0.0023176538343250286, "clip_ratio/low_min": 5.758852057624608e-05, "clip_ratio/region_mean": 0.0045186533679952845, "epoch": 3.73724117818606, "grad_norm": 0.14960600435733795, "learning_rate": 1e-06, "loss": -0.0492, "step": 1600 }, { "epoch": 3.73724117818606, "step": 1600, "total_flos": 0.0, "train_loss": 0.04791172147407224, "train_runtime": 51217.5177, "train_samples_per_second": 27.99, "train_steps_per_second": 0.031 } ], "logging_steps": 1, "max_steps": 1600, "num_input_tokens_seen": 231427118, "num_train_epochs": 4, "save_steps": 160, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }