{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 14.970262390670554, "eval_steps": 500, "global_step": 1560, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.013113839285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 3635.0, "completions/mean_length": 581.2455444335938, "completions/mean_terminated_length": 534.5411376953125, "completions/min_length": 14.0, "completions/min_terminated_length": 14.0, "epoch": 0.009329446064139942, "grad_norm": 0.17493604123592377, "learning_rate": 1e-06, "loss": -0.0026, "num_tokens": 2242408.0, "reward": 0.5080915689468384, "reward_std": 0.25282198190689087, "rewards/simpleverify_reward/mean": 0.5080915093421936, "rewards/simpleverify_reward/std": 0.5000042915344238, "step": 1 }, { "clip_ratio/high_max": 0.0024519292055629194, "clip_ratio/high_mean": 0.0010059480082418304, "clip_ratio/low_mean": 0.0006111991915531689, "clip_ratio/low_min": 5.6025533922365867e-05, "clip_ratio/region_mean": 0.0016171471943380311, "epoch": 0.018658892128279883, "grad_norm": 0.14644181728363037, "learning_rate": 1e-06, "loss": -0.0131, "step": 2 }, { "clip_ratio/high_max": 0.0028774274105671793, "clip_ratio/high_mean": 0.001170621133496752, "clip_ratio/low_mean": 0.0008192473942472134, "clip_ratio/low_min": 6.209780622157268e-05, "clip_ratio/region_mean": 0.0019898685204680078, "epoch": 0.027988338192419825, "grad_norm": 0.13708409667015076, "learning_rate": 1e-06, "loss": -0.0025, "step": 3 }, { "clip_ratio/high_max": 0.0024275040632346645, "clip_ratio/high_mean": 0.0010067483271996025, "clip_ratio/low_mean": 0.0008512503063684562, "clip_ratio/low_min": 0.00011706756231433246, "clip_ratio/region_mean": 0.0018579986135591753, "epoch": 0.037317784256559766, "grad_norm": 0.13022777438163757, "learning_rate": 1e-06, "loss": 0.0086, "step": 4 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.011997767857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3160.0, "completions/mean_length": 588.6328125, "completions/mean_terminated_length": 546.041259765625, "completions/min_length": 4.0, "completions/min_terminated_length": 4.0, "epoch": 0.04664723032069971, "grad_norm": 0.14320141077041626, "learning_rate": 1e-06, "loss": 0.0131, "num_tokens": 4522780.0, "reward": 0.5111607313156128, "reward_std": 0.2627614140510559, "rewards/simpleverify_reward/mean": 0.5111607313156128, "rewards/simpleverify_reward/std": 0.49994516372680664, "step": 5 }, { "clip_ratio/high_max": 0.003039437418919988, "clip_ratio/high_mean": 0.0012297874673095066, "clip_ratio/low_mean": 0.0010019387045758776, "clip_ratio/low_min": 0.00015669495041947812, "clip_ratio/region_mean": 0.0022317261900752783, "epoch": 0.05597667638483965, "grad_norm": 0.13477666676044464, "learning_rate": 1e-06, "loss": 0.0242, "step": 6 }, { "clip_ratio/high_max": 0.003102322058111895, "clip_ratio/high_mean": 0.001369738583889557, "clip_ratio/low_mean": 0.001000345240754541, "clip_ratio/low_min": 8.813426211418118e-05, "clip_ratio/region_mean": 0.002370083784626331, "epoch": 0.0653061224489796, "grad_norm": 0.14433100819587708, "learning_rate": 1e-06, "loss": -0.0336, "step": 7 }, { "clip_ratio/high_max": 0.0028942005810677074, "clip_ratio/high_mean": 0.0013536905025830492, "clip_ratio/low_mean": 0.0011913909402210265, "clip_ratio/low_min": 8.153841463354183e-05, "clip_ratio/region_mean": 0.0025450814137002453, "epoch": 0.07463556851311953, "grad_norm": 0.14411970973014832, "learning_rate": 1e-06, "loss": 0.0084, "step": 8 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0189732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 4086.0, "completions/mean_length": 627.8309326171875, "completions/mean_terminated_length": 560.7559814453125, "completions/min_length": 27.0, "completions/min_terminated_length": 27.0, "epoch": 0.08396501457725948, "grad_norm": 0.14034909009933472, "learning_rate": 1e-06, "loss": -0.0275, "num_tokens": 6865438.0, "reward": 0.5094866156578064, "reward_std": 0.2229158878326416, "rewards/simpleverify_reward/mean": 0.5094866156578064, "rewards/simpleverify_reward/std": 0.49997973442077637, "step": 9 }, { "clip_ratio/high_max": 0.0021710594664909877, "clip_ratio/high_mean": 0.0008687489644216839, "clip_ratio/low_mean": 0.0005598217485385248, "clip_ratio/low_min": 6.239254980755504e-05, "clip_ratio/region_mean": 0.001428570700227283, "epoch": 0.09329446064139942, "grad_norm": 0.1351631134748459, "learning_rate": 1e-06, "loss": 0.0144, "step": 10 }, { "clip_ratio/high_max": 0.0025452003901591524, "clip_ratio/high_mean": 0.0010179718556173611, "clip_ratio/low_mean": 0.0006625103706028312, "clip_ratio/low_min": 3.398441276658559e-05, "clip_ratio/region_mean": 0.001680482251686044, "epoch": 0.10262390670553936, "grad_norm": 0.11703171581029892, "learning_rate": 1e-06, "loss": 0.0047, "step": 11 }, { "clip_ratio/high_max": 0.002472730979206972, "clip_ratio/high_mean": 0.0010246134079352487, "clip_ratio/low_mean": 0.0009653315792093053, "clip_ratio/low_min": 0.00013718358331971103, "clip_ratio/region_mean": 0.001989944990782533, "epoch": 0.1119533527696793, "grad_norm": 0.12323521822690964, "learning_rate": 1e-06, "loss": 0.0049, "step": 12 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012555803571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3188.0, "completions/mean_length": 607.7193603515625, "completions/mean_terminated_length": 563.3641967773438, "completions/min_length": 40.0, "completions/min_terminated_length": 40.0, "epoch": 0.12128279883381925, "grad_norm": 0.13984547555446625, "learning_rate": 1e-06, "loss": -0.0065, "num_tokens": 9228216.0, "reward": 0.5457589626312256, "reward_std": 0.22266876697540283, "rewards/simpleverify_reward/mean": 0.5457589030265808, "rewards/simpleverify_reward/std": 0.49797120690345764, "step": 13 }, { "clip_ratio/high_max": 0.001932133367517963, "clip_ratio/high_mean": 0.0008500031563016819, "clip_ratio/low_mean": 0.0005756239315815037, "clip_ratio/low_min": 5.104591400595382e-05, "clip_ratio/region_mean": 0.001425627080607228, "epoch": 0.1306122448979592, "grad_norm": 0.11634547263383865, "learning_rate": 1e-06, "loss": -0.0125, "step": 14 }, { "clip_ratio/high_max": 0.0022687641758238897, "clip_ratio/high_mean": 0.0009989524551201612, "clip_ratio/low_mean": 0.000720806123354123, "clip_ratio/low_min": 4.257699674781179e-05, "clip_ratio/region_mean": 0.0017197585693793371, "epoch": 0.13994169096209913, "grad_norm": 0.11972187459468842, "learning_rate": 1e-06, "loss": -0.0044, "step": 15 }, { "clip_ratio/high_max": 0.0022173000616021454, "clip_ratio/high_mean": 0.0009379026814713143, "clip_ratio/low_mean": 0.0007830274444131646, "clip_ratio/low_min": 2.5443959202675615e-05, "clip_ratio/region_mean": 0.0017209301076945849, "epoch": 0.14927113702623906, "grad_norm": 0.11197485774755478, "learning_rate": 1e-06, "loss": 0.0018, "step": 16 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010044642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3755.0, "completions/mean_length": 588.0248413085938, "completions/mean_terminated_length": 552.430908203125, "completions/min_length": 25.0, "completions/min_terminated_length": 25.0, "epoch": 0.158600583090379, "grad_norm": 0.12907473742961884, "learning_rate": 1e-06, "loss": -0.0109, "num_tokens": 11540721.0, "reward": 0.5831473469734192, "reward_std": 0.2338676154613495, "rewards/simpleverify_reward/mean": 0.5831473469734192, "rewards/simpleverify_reward/std": 0.4931068420410156, "step": 17 }, { "clip_ratio/high_max": 0.002336648576601874, "clip_ratio/high_mean": 0.0010057021790998988, "clip_ratio/low_mean": 0.0006432715908886166, "clip_ratio/low_min": 1.2515018170233816e-05, "clip_ratio/region_mean": 0.0016489737608935684, "epoch": 0.16793002915451896, "grad_norm": 0.13437563180923462, "learning_rate": 1e-06, "loss": 0.0215, "step": 18 }, { "clip_ratio/high_max": 0.0025223474949598312, "clip_ratio/high_mean": 0.001070035508746514, "clip_ratio/low_mean": 0.0007129741788958199, "clip_ratio/low_min": 5.4675053434039e-05, "clip_ratio/region_mean": 0.0017830096840043552, "epoch": 0.1772594752186589, "grad_norm": 0.1328997015953064, "learning_rate": 1e-06, "loss": 0.0173, "step": 19 }, { "clip_ratio/high_max": 0.0023523970012320206, "clip_ratio/high_mean": 0.001005390517093474, "clip_ratio/low_mean": 0.0007245687193062622, "clip_ratio/low_min": 7.212390391941881e-05, "clip_ratio/region_mean": 0.0017299592000199482, "epoch": 0.18658892128279883, "grad_norm": 0.13091354072093964, "learning_rate": 1e-06, "loss": 0.0186, "step": 20 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 4096.0, "completions/max_terminated_length": 4038.0, "completions/mean_length": 619.5198364257812, "completions/mean_terminated_length": 578.2966918945312, "completions/min_length": 11.0, "completions/min_terminated_length": 11.0, "epoch": 0.19591836734693877, "grad_norm": 0.12361998111009598, "learning_rate": 1e-06, "loss": 0.0054, "num_tokens": 13935304.0, "reward": 0.5546875, "reward_std": 0.21687793731689453, "rewards/simpleverify_reward/mean": 0.5546875, "rewards/simpleverify_reward/std": 0.4970696270465851, "step": 21 }, { "clip_ratio/high_max": 0.0023276917963812593, "clip_ratio/high_mean": 0.0010488078096386744, "clip_ratio/low_mean": 0.0005773739540018141, "clip_ratio/low_min": 1.2783800229954068e-05, "clip_ratio/region_mean": 0.0016261817982012872, "epoch": 0.20524781341107873, "grad_norm": 0.12800021469593048, "learning_rate": 1e-06, "loss": 0.0037, "step": 22 }, { "clip_ratio/high_max": 0.001725553494907217, "clip_ratio/high_mean": 0.0007855956555431476, "clip_ratio/low_mean": 0.0005828676894452656, "clip_ratio/low_min": 3.695330451591872e-05, "clip_ratio/region_mean": 0.0013684633559023496, "epoch": 0.21457725947521866, "grad_norm": 0.11946704983711243, "learning_rate": 1e-06, "loss": 0.0401, "step": 23 }, { "clip_ratio/high_max": 0.0020689135635620914, "clip_ratio/high_mean": 0.0008959819479059661, "clip_ratio/low_mean": 0.0006255708303797292, "clip_ratio/low_min": 5.919905015616678e-05, "clip_ratio/region_mean": 0.0015215527528198436, "epoch": 0.2239067055393586, "grad_norm": 0.11608587205410004, "learning_rate": 1e-06, "loss": 0.0048, "step": 24 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.016462053571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 4068.0, "completions/mean_length": 626.027099609375, "completions/mean_terminated_length": 567.9481201171875, "completions/min_length": 69.0, "completions/min_terminated_length": 69.0, "epoch": 0.23323615160349853, "grad_norm": 0.1277482509613037, "learning_rate": 1e-06, "loss": 0.0108, "num_tokens": 16288137.0, "reward": 0.5306919813156128, "reward_std": 0.20616421103477478, "rewards/simpleverify_reward/mean": 0.5306919813156128, "rewards/simpleverify_reward/std": 0.49912673234939575, "step": 25 }, { "clip_ratio/high_max": 0.002106813044520095, "clip_ratio/high_mean": 0.0008431876376562286, "clip_ratio/low_mean": 0.0006450238925026497, "clip_ratio/low_min": 4.131935020268429e-05, "clip_ratio/region_mean": 0.0014882115210639313, "epoch": 0.2425655976676385, "grad_norm": 0.12528544664382935, "learning_rate": 1e-06, "loss": 0.0129, "step": 26 }, { "clip_ratio/high_max": 0.001846392551669851, "clip_ratio/high_mean": 0.0007230933060782263, "clip_ratio/low_mean": 0.0007191729619080434, "clip_ratio/low_min": 2.2024402824172284e-05, "clip_ratio/region_mean": 0.001442266278900206, "epoch": 0.2518950437317784, "grad_norm": 0.1178368479013443, "learning_rate": 1e-06, "loss": 0.0192, "step": 27 }, { "clip_ratio/high_max": 0.0020629207574529573, "clip_ratio/high_mean": 0.0008437964825134259, "clip_ratio/low_mean": 0.0006216232682163536, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014654197620984633, "epoch": 0.2612244897959184, "grad_norm": 0.11520645767450333, "learning_rate": 1e-06, "loss": 0.0038, "step": 28 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014229910714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 3869.0, "completions/mean_length": 617.9291381835938, "completions/mean_terminated_length": 567.7220458984375, "completions/min_length": 5.0, "completions/min_terminated_length": 5.0, "epoch": 0.2705539358600583, "grad_norm": 0.12818750739097595, "learning_rate": 1e-06, "loss": -0.0009, "num_tokens": 18653931.0, "reward": 0.5412946939468384, "reward_std": 0.2073201835155487, "rewards/simpleverify_reward/mean": 0.5412946343421936, "rewards/simpleverify_reward/std": 0.49836134910583496, "step": 29 }, { "clip_ratio/high_max": 0.0022696401429129764, "clip_ratio/high_mean": 0.0009347531195089687, "clip_ratio/low_mean": 0.0006198049768499914, "clip_ratio/low_min": 3.09339739033021e-05, "clip_ratio/region_mean": 0.0015545581045444123, "epoch": 0.27988338192419826, "grad_norm": 0.12092304229736328, "learning_rate": 1e-06, "loss": -0.006, "step": 30 }, { "clip_ratio/high_max": 0.0018945966148748994, "clip_ratio/high_mean": 0.0008741961919440655, "clip_ratio/low_mean": 0.0006421127036446705, "clip_ratio/low_min": 1.4817449482507072e-05, "clip_ratio/region_mean": 0.001516308922873577, "epoch": 0.2892128279883382, "grad_norm": 0.12309981882572174, "learning_rate": 1e-06, "loss": 0.0162, "step": 31 }, { "clip_ratio/high_max": 0.0018378813510935288, "clip_ratio/high_mean": 0.0008178476746252272, "clip_ratio/low_mean": 0.0007057220427668653, "clip_ratio/low_min": 7.686254639338586e-05, "clip_ratio/region_mean": 0.0015235697319440078, "epoch": 0.29854227405247813, "grad_norm": 0.12673714756965637, "learning_rate": 1e-06, "loss": -0.0, "step": 32 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015066964285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 3918.0, "completions/mean_length": 611.962890625, "completions/mean_terminated_length": 558.666015625, "completions/min_length": 3.0, "completions/min_terminated_length": 3.0, "epoch": 0.30787172011661806, "grad_norm": 0.11993032693862915, "learning_rate": 1e-06, "loss": -0.0117, "num_tokens": 20972430.0, "reward": 0.5505022406578064, "reward_std": 0.2194855660200119, "rewards/simpleverify_reward/mean": 0.5505022406578064, "rewards/simpleverify_reward/std": 0.4975124001502991, "step": 33 }, { "clip_ratio/high_max": 0.001994181227928493, "clip_ratio/high_mean": 0.0008830488004605286, "clip_ratio/low_mean": 0.0007373878397629596, "clip_ratio/low_min": 7.69297485021525e-05, "clip_ratio/region_mean": 0.0016204366402234882, "epoch": 0.317201166180758, "grad_norm": 0.1185341402888298, "learning_rate": 1e-06, "loss": 0.0028, "step": 34 }, { "clip_ratio/high_max": 0.002162621844036039, "clip_ratio/high_mean": 0.0008814250413706759, "clip_ratio/low_mean": 0.0007694131909374846, "clip_ratio/low_min": 6.902107179485029e-05, "clip_ratio/region_mean": 0.0016508382177562453, "epoch": 0.32653061224489793, "grad_norm": 0.1345488429069519, "learning_rate": 1e-06, "loss": 0.0151, "step": 35 }, { "clip_ratio/high_max": 0.00217103109753225, "clip_ratio/high_mean": 0.0009013333947223146, "clip_ratio/low_mean": 0.0008750349970796378, "clip_ratio/low_min": 7.320834902202478e-05, "clip_ratio/region_mean": 0.0017763684008968994, "epoch": 0.3358600583090379, "grad_norm": 0.1357390582561493, "learning_rate": 1e-06, "loss": 0.0539, "step": 36 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014787946428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3891.0, "completions/mean_length": 601.482177734375, "completions/mean_terminated_length": 549.0297241210938, "completions/min_length": 121.0, "completions/min_terminated_length": 121.0, "epoch": 0.34518950437317786, "grad_norm": 0.12443603575229645, "learning_rate": 1e-06, "loss": 0.0129, "num_tokens": 23275334.0, "reward": 0.547433078289032, "reward_std": 0.2031458020210266, "rewards/simpleverify_reward/mean": 0.5474330186843872, "rewards/simpleverify_reward/std": 0.49781447649002075, "step": 37 }, { "clip_ratio/high_max": 0.0020158340703346767, "clip_ratio/high_mean": 0.0009003378299894393, "clip_ratio/low_mean": 0.0005252805431155139, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014256184149417095, "epoch": 0.3545189504373178, "grad_norm": 0.12015597522258759, "learning_rate": 1e-06, "loss": -0.0026, "step": 38 }, { "clip_ratio/high_max": 0.002306257578311488, "clip_ratio/high_mean": 0.0008626532271591714, "clip_ratio/low_mean": 0.0006371898089128081, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014998430124251172, "epoch": 0.3638483965014577, "grad_norm": 0.11119357496500015, "learning_rate": 1e-06, "loss": 0.014, "step": 39 }, { "clip_ratio/high_max": 0.0016719921841286123, "clip_ratio/high_mean": 0.0007360276958934264, "clip_ratio/low_mean": 0.0005806757471873425, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013167034048819914, "epoch": 0.37317784256559766, "grad_norm": 0.11490615457296371, "learning_rate": 1e-06, "loss": -0.0146, "step": 40 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012834821428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 4017.0, "completions/mean_length": 602.9305419921875, "completions/mean_terminated_length": 557.5147094726562, "completions/min_length": 119.0, "completions/min_terminated_length": 119.0, "epoch": 0.3825072886297376, "grad_norm": 0.12960845232009888, "learning_rate": 1e-06, "loss": 0.0401, "num_tokens": 25613133.0, "reward": 0.5465959906578064, "reward_std": 0.2000388205051422, "rewards/simpleverify_reward/mean": 0.5465959906578064, "rewards/simpleverify_reward/std": 0.4978935420513153, "step": 41 }, { "clip_ratio/high_max": 0.0017490498612460215, "clip_ratio/high_mean": 0.0007602175955980783, "clip_ratio/low_mean": 0.000608309639574145, "clip_ratio/low_min": 3.297085550002521e-05, "clip_ratio/region_mean": 0.0013685272169823293, "epoch": 0.39183673469387753, "grad_norm": 0.11739464104175568, "learning_rate": 1e-06, "loss": -0.0411, "step": 42 }, { "clip_ratio/high_max": 0.0017301726984442212, "clip_ratio/high_mean": 0.0007616511029482353, "clip_ratio/low_mean": 0.0006128217573859729, "clip_ratio/low_min": 1.3348995707929134e-05, "clip_ratio/region_mean": 0.001374472845782293, "epoch": 0.40116618075801747, "grad_norm": 0.120390385389328, "learning_rate": 1e-06, "loss": -0.0128, "step": 43 }, { "clip_ratio/high_max": 0.0015080733355716802, "clip_ratio/high_mean": 0.0007228094164020149, "clip_ratio/low_mean": 0.0007602541936648777, "clip_ratio/low_min": 8.885918850864982e-05, "clip_ratio/region_mean": 0.0014830636355327442, "epoch": 0.41049562682215746, "grad_norm": 0.1120561957359314, "learning_rate": 1e-06, "loss": 0.0463, "step": 44 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0170200892857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3787.0, "completions/mean_length": 645.1046752929688, "completions/mean_terminated_length": 585.3530883789062, "completions/min_length": 112.0, "completions/min_terminated_length": 112.0, "epoch": 0.4198250728862974, "grad_norm": 0.13429218530654907, "learning_rate": 1e-06, "loss": 0.013, "num_tokens": 28036716.0, "reward": 0.5460379719734192, "reward_std": 0.2133796513080597, "rewards/simpleverify_reward/mean": 0.5460379719734192, "rewards/simpleverify_reward/std": 0.4979454278945923, "step": 45 }, { "clip_ratio/high_max": 0.0016349691850336967, "clip_ratio/high_mean": 0.0007246564136949019, "clip_ratio/low_mean": 0.0005982008542559925, "clip_ratio/low_min": 7.202498090919107e-05, "clip_ratio/region_mean": 0.0013228572606749367, "epoch": 0.4291545189504373, "grad_norm": 0.11848900467157364, "learning_rate": 1e-06, "loss": 0.0233, "step": 46 }, { "clip_ratio/high_max": 0.0020506835280684754, "clip_ratio/high_mean": 0.0008388634632865433, "clip_ratio/low_mean": 0.000498463000440097, "clip_ratio/low_min": 1.1376047041267157e-05, "clip_ratio/region_mean": 0.0013373264409892727, "epoch": 0.43848396501457726, "grad_norm": 0.11460186541080475, "learning_rate": 1e-06, "loss": -0.0216, "step": 47 }, { "clip_ratio/high_max": 0.0019764481512538623, "clip_ratio/high_mean": 0.0008296815722133033, "clip_ratio/low_mean": 0.0006423032500606496, "clip_ratio/low_min": 6.993252100073732e-05, "clip_ratio/region_mean": 0.0014719848040840589, "epoch": 0.4478134110787172, "grad_norm": 0.11370869725942612, "learning_rate": 1e-06, "loss": 0.014, "step": 48 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0220424107142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3887.0, "completions/mean_length": 632.730224609375, "completions/mean_terminated_length": 554.6707763671875, "completions/min_length": 16.0, "completions/min_terminated_length": 16.0, "epoch": 0.45714285714285713, "grad_norm": 0.1177234873175621, "learning_rate": 1e-06, "loss": 0.0529, "num_tokens": 30327285.0, "reward": 0.5641741156578064, "reward_std": 0.21200869977474213, "rewards/simpleverify_reward/mean": 0.5641741156578064, "rewards/simpleverify_reward/std": 0.49593377113342285, "step": 49 }, { "clip_ratio/high_max": 0.002107020278344862, "clip_ratio/high_mean": 0.0008792382323008496, "clip_ratio/low_mean": 0.0004828867449759855, "clip_ratio/low_min": 3.669917168735992e-05, "clip_ratio/region_mean": 0.001362124974548351, "epoch": 0.46647230320699706, "grad_norm": 0.12459157407283783, "learning_rate": 1e-06, "loss": -0.0223, "step": 50 }, { "clip_ratio/high_max": 0.0024252113653346896, "clip_ratio/high_mean": 0.0008867855576681904, "clip_ratio/low_mean": 0.0006567912350874394, "clip_ratio/low_min": 5.138972483109683e-05, "clip_ratio/region_mean": 0.0015435767636517994, "epoch": 0.47580174927113705, "grad_norm": 0.21782147884368896, "learning_rate": 1e-06, "loss": 0.0298, "step": 51 }, { "clip_ratio/high_max": 0.002212096780567663, "clip_ratio/high_mean": 0.0008760514556342969, "clip_ratio/low_mean": 0.0006789399594708811, "clip_ratio/low_min": 0.00013323582243174314, "clip_ratio/region_mean": 0.0015549914314760827, "epoch": 0.485131195335277, "grad_norm": 0.12730556726455688, "learning_rate": 1e-06, "loss": -0.023, "step": 52 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3833.0, "completions/mean_length": 586.8599853515625, "completions/mean_terminated_length": 535.1964721679688, "completions/min_length": 96.0, "completions/min_terminated_length": 96.0, "epoch": 0.4944606413994169, "grad_norm": 0.12142479419708252, "learning_rate": 1e-06, "loss": 0.0134, "num_tokens": 32578991.0, "reward": 0.5652902126312256, "reward_std": 0.1999187171459198, "rewards/simpleverify_reward/mean": 0.5652901530265808, "rewards/simpleverify_reward/std": 0.495788037776947, "step": 53 }, { "clip_ratio/high_max": 0.0019279623229522258, "clip_ratio/high_mean": 0.0008482047633151524, "clip_ratio/low_mean": 0.0006109664172981866, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014591711696994025, "epoch": 0.5037900874635568, "grad_norm": 0.13373589515686035, "learning_rate": 1e-06, "loss": 0.0069, "step": 54 }, { "clip_ratio/high_max": 0.0017978162577492185, "clip_ratio/high_mean": 0.0007370194489340065, "clip_ratio/low_mean": 0.0006463261406679521, "clip_ratio/low_min": 1.0729613677540328e-05, "clip_ratio/region_mean": 0.001383345566864591, "epoch": 0.5131195335276968, "grad_norm": 0.10906954854726791, "learning_rate": 1e-06, "loss": 0.004, "step": 55 }, { "clip_ratio/high_max": 0.0020950143225491047, "clip_ratio/high_mean": 0.0007582559228467289, "clip_ratio/low_mean": 0.0006336569731502095, "clip_ratio/low_min": 1.2373787285469007e-05, "clip_ratio/region_mean": 0.0013919128832640126, "epoch": 0.5224489795918368, "grad_norm": 0.1295280009508133, "learning_rate": 1e-06, "loss": -0.0103, "step": 56 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012834821428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3929.0, "completions/mean_length": 582.744140625, "completions/mean_terminated_length": 537.0658569335938, "completions/min_length": 130.0, "completions/min_terminated_length": 130.0, "epoch": 0.5317784256559767, "grad_norm": 0.13180360198020935, "learning_rate": 1e-06, "loss": -0.0097, "num_tokens": 34834898.0, "reward": 0.58203125, "reward_std": 0.1870126873254776, "rewards/simpleverify_reward/mean": 0.58203125, "rewards/simpleverify_reward/std": 0.49329379200935364, "step": 57 }, { "clip_ratio/high_max": 0.0022463132081611548, "clip_ratio/high_mean": 0.0008565835141780553, "clip_ratio/low_mean": 0.0005203430887377181, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001376926629745867, "epoch": 0.5411078717201167, "grad_norm": 0.11818147450685501, "learning_rate": 1e-06, "loss": 0.0003, "step": 58 }, { "clip_ratio/high_max": 0.001838752163166646, "clip_ratio/high_mean": 0.0007767736587993568, "clip_ratio/low_mean": 0.0005765245277871145, "clip_ratio/low_min": 2.0857667550444603e-05, "clip_ratio/region_mean": 0.0013532982047763653, "epoch": 0.5504373177842565, "grad_norm": 0.12041822075843811, "learning_rate": 1e-06, "loss": 0.0258, "step": 59 }, { "clip_ratio/high_max": 0.002170029307308141, "clip_ratio/high_mean": 0.000856021186336875, "clip_ratio/low_mean": 0.0007048611369100399, "clip_ratio/low_min": 3.024521083716536e-05, "clip_ratio/region_mean": 0.0015608822941430844, "epoch": 0.5597667638483965, "grad_norm": 0.11375053972005844, "learning_rate": 1e-06, "loss": 0.0169, "step": 60 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3992.0, "completions/mean_length": 635.732177734375, "completions/mean_terminated_length": 576.8172607421875, "completions/min_length": 117.0, "completions/min_terminated_length": 117.0, "epoch": 0.5690962099125364, "grad_norm": 0.134700208902359, "learning_rate": 1e-06, "loss": 0.0008, "num_tokens": 37204018.0, "reward": 0.564453125, "reward_std": 0.2176358848810196, "rewards/simpleverify_reward/mean": 0.564453125, "rewards/simpleverify_reward/std": 0.4958975613117218, "step": 61 }, { "clip_ratio/high_max": 0.0017510913094156422, "clip_ratio/high_mean": 0.0007842377654014854, "clip_ratio/low_mean": 0.0005905124808123219, "clip_ratio/low_min": 2.1529235709749628e-05, "clip_ratio/region_mean": 0.001374750219838461, "epoch": 0.5784256559766764, "grad_norm": 0.11428514868021011, "learning_rate": 1e-06, "loss": 0.0111, "step": 62 }, { "clip_ratio/high_max": 0.0022545032916241325, "clip_ratio/high_mean": 0.0009266319939342793, "clip_ratio/low_mean": 0.0007465720627806149, "clip_ratio/low_min": 0.00010408167872810736, "clip_ratio/region_mean": 0.0016732040458009578, "epoch": 0.5877551020408164, "grad_norm": 0.1252550333738327, "learning_rate": 1e-06, "loss": 0.0404, "step": 63 }, { "clip_ratio/high_max": 0.002187127567594871, "clip_ratio/high_mean": 0.000964181466770242, "clip_ratio/low_mean": 0.0006265091960813152, "clip_ratio/low_min": 1.544735459901858e-05, "clip_ratio/region_mean": 0.0015906906810414512, "epoch": 0.5970845481049563, "grad_norm": 0.11590372771024704, "learning_rate": 1e-06, "loss": -0.0192, "step": 64 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.013950892857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 4076.0, "completions/mean_length": 626.7238159179688, "completions/mean_terminated_length": 577.6395263671875, "completions/min_length": 3.0, "completions/min_terminated_length": 3.0, "epoch": 0.6064139941690962, "grad_norm": 0.1237572506070137, "learning_rate": 1e-06, "loss": 0.0352, "num_tokens": 39620908.0, "reward": 0.552734375, "reward_std": 0.1803591549396515, "rewards/simpleverify_reward/mean": 0.552734375, "rewards/simpleverify_reward/std": 0.49728068709373474, "step": 65 }, { "clip_ratio/high_max": 0.0018609155595186166, "clip_ratio/high_mean": 0.0008167100168066099, "clip_ratio/low_mean": 0.0005047277909397963, "clip_ratio/low_min": 6.032918827258982e-05, "clip_ratio/region_mean": 0.0013214377722761128, "epoch": 0.6157434402332361, "grad_norm": 0.11496131867170334, "learning_rate": 1e-06, "loss": -0.0189, "step": 66 }, { "clip_ratio/high_max": 0.0021445268794195727, "clip_ratio/high_mean": 0.0008168510103132576, "clip_ratio/low_mean": 0.0004740808226415538, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012909318611491472, "epoch": 0.6250728862973761, "grad_norm": 0.11073864251375198, "learning_rate": 1e-06, "loss": -0.0222, "step": 67 }, { "clip_ratio/high_max": 0.002052243009529775, "clip_ratio/high_mean": 0.0008120777001749957, "clip_ratio/low_mean": 0.0006315992395684589, "clip_ratio/low_min": 2.7790129024651833e-05, "clip_ratio/region_mean": 0.001443676905182656, "epoch": 0.634402332361516, "grad_norm": 0.11889535188674927, "learning_rate": 1e-06, "loss": 0.0249, "step": 68 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0189732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3366.0, "completions/mean_length": 650.8088989257812, "completions/mean_terminated_length": 584.1782836914062, "completions/min_length": 136.0, "completions/min_terminated_length": 136.0, "epoch": 0.643731778425656, "grad_norm": 0.13388052582740784, "learning_rate": 1e-06, "loss": -0.0557, "num_tokens": 42052311.0, "reward": 0.54296875, "reward_std": 0.21882569789886475, "rewards/simpleverify_reward/mean": 0.54296875, "rewards/simpleverify_reward/std": 0.498219758272171, "step": 69 }, { "clip_ratio/high_max": 0.0021779544586024713, "clip_ratio/high_mean": 0.0008539274931536056, "clip_ratio/low_mean": 0.0006774065641366178, "clip_ratio/low_min": 9.412442796019604e-05, "clip_ratio/region_mean": 0.001531334055471234, "epoch": 0.6530612244897959, "grad_norm": 0.1265670210123062, "learning_rate": 1e-06, "loss": 0.0063, "step": 70 }, { "clip_ratio/high_max": 0.0019527288422978017, "clip_ratio/high_mean": 0.0008001440655789338, "clip_ratio/low_mean": 0.0007555261254310608, "clip_ratio/low_min": 4.7529333642160054e-05, "clip_ratio/region_mean": 0.0015556701800960582, "epoch": 0.6623906705539359, "grad_norm": 0.11567088961601257, "learning_rate": 1e-06, "loss": 0.0167, "step": 71 }, { "clip_ratio/high_max": 0.0019144151992804836, "clip_ratio/high_mean": 0.0006897433449921664, "clip_ratio/low_mean": 0.0006972121209400939, "clip_ratio/low_min": 4.6482977268169634e-05, "clip_ratio/region_mean": 0.0013869554350094404, "epoch": 0.6717201166180758, "grad_norm": 0.1153932437300682, "learning_rate": 1e-06, "loss": 0.0034, "step": 72 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0228794642857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 4052.0, "completions/mean_length": 689.320068359375, "completions/mean_terminated_length": 609.552001953125, "completions/min_length": 56.0, "completions/min_terminated_length": 56.0, "epoch": 0.6810495626822157, "grad_norm": 0.1190451979637146, "learning_rate": 1e-06, "loss": 0.0457, "num_tokens": 44551746.0, "reward": 0.5398995876312256, "reward_std": 0.2084798663854599, "rewards/simpleverify_reward/mean": 0.5398995280265808, "rewards/simpleverify_reward/std": 0.498475044965744, "step": 73 }, { "clip_ratio/high_max": 0.0018922849485534243, "clip_ratio/high_mean": 0.0008136382093653083, "clip_ratio/low_mean": 0.0005698631521227071, "clip_ratio/low_min": 7.587584423163207e-05, "clip_ratio/region_mean": 0.0013835013814968988, "epoch": 0.6903790087463557, "grad_norm": 0.12138821184635162, "learning_rate": 1e-06, "loss": 0.0173, "step": 74 }, { "clip_ratio/high_max": 0.0019175242705387063, "clip_ratio/high_mean": 0.0008661125211801846, "clip_ratio/low_mean": 0.0006915202075106208, "clip_ratio/low_min": 6.391561601049034e-05, "clip_ratio/region_mean": 0.0015576326550217345, "epoch": 0.6997084548104956, "grad_norm": 0.11854323744773865, "learning_rate": 1e-06, "loss": -0.0289, "step": 75 }, { "clip_ratio/high_max": 0.002158072245947551, "clip_ratio/high_mean": 0.0008361156833416317, "clip_ratio/low_mean": 0.0006968084344407544, "clip_ratio/low_min": 4.6026739255466964e-05, "clip_ratio/region_mean": 0.001532924117782386, "epoch": 0.7090379008746356, "grad_norm": 0.11603830009698868, "learning_rate": 1e-06, "loss": -0.0121, "step": 76 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0170200892857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 4091.0, "completions/mean_length": 617.605224609375, "completions/mean_terminated_length": 557.3775024414062, "completions/min_length": 107.0, "completions/min_terminated_length": 107.0, "epoch": 0.7183673469387755, "grad_norm": 0.12893222272396088, "learning_rate": 1e-06, "loss": -0.0102, "num_tokens": 46872059.0, "reward": 0.5909598469734192, "reward_std": 0.2251289188861847, "rewards/simpleverify_reward/mean": 0.5909598469734192, "rewards/simpleverify_reward/std": 0.49172529578208923, "step": 77 }, { "clip_ratio/high_max": 0.002089931738737505, "clip_ratio/high_mean": 0.0009299955090682488, "clip_ratio/low_mean": 0.0006818136162110022, "clip_ratio/low_min": 1.7109225154854357e-05, "clip_ratio/region_mean": 0.0016118091662065126, "epoch": 0.7276967930029155, "grad_norm": 0.1304260790348053, "learning_rate": 1e-06, "loss": 0.0049, "step": 78 }, { "clip_ratio/high_max": 0.002175448287744075, "clip_ratio/high_mean": 0.0007941979638417251, "clip_ratio/low_mean": 0.0007702220427745488, "clip_ratio/low_min": 6.153531467134599e-05, "clip_ratio/region_mean": 0.001564420002978295, "epoch": 0.7370262390670554, "grad_norm": 0.12144004553556442, "learning_rate": 1e-06, "loss": 0.0299, "step": 79 }, { "clip_ratio/high_max": 0.0022716685489285737, "clip_ratio/high_mean": 0.0009308311528002378, "clip_ratio/low_mean": 0.0006350376406771829, "clip_ratio/low_min": 9.447933462070068e-05, "clip_ratio/region_mean": 0.0015658687880204525, "epoch": 0.7463556851311953, "grad_norm": 0.13412271440029144, "learning_rate": 1e-06, "loss": -0.0202, "step": 80 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.021205357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3808.0, "completions/mean_length": 660.7857666015625, "completions/mean_terminated_length": 586.3626098632812, "completions/min_length": 131.0, "completions/min_terminated_length": 131.0, "epoch": 0.7556851311953353, "grad_norm": 0.11035066097974777, "learning_rate": 1e-06, "loss": 0.0091, "num_tokens": 49299835.0, "reward": 0.5691964626312256, "reward_std": 0.2002212107181549, "rewards/simpleverify_reward/mean": 0.5691964030265808, "rewards/simpleverify_reward/std": 0.49525776505470276, "step": 81 }, { "clip_ratio/high_max": 0.002149599382391898, "clip_ratio/high_mean": 0.0008908363543014275, "clip_ratio/low_mean": 0.0005584163427556632, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001449252711609006, "epoch": 0.7650145772594752, "grad_norm": 0.11991727352142334, "learning_rate": 1e-06, "loss": -0.0058, "step": 82 }, { "clip_ratio/high_max": 0.002075232783681713, "clip_ratio/high_mean": 0.0007903584391897311, "clip_ratio/low_mean": 0.0005651339361065766, "clip_ratio/low_min": 8.764986159803811e-05, "clip_ratio/region_mean": 0.0013554923752963077, "epoch": 0.7743440233236152, "grad_norm": 0.1208573505282402, "learning_rate": 1e-06, "loss": -0.0045, "step": 83 }, { "clip_ratio/high_max": 0.0021252884216664825, "clip_ratio/high_mean": 0.0008541398201487027, "clip_ratio/low_mean": 0.0006127889446361223, "clip_ratio/low_min": 4.072490628459491e-05, "clip_ratio/region_mean": 0.0014669287484139204, "epoch": 0.7836734693877551, "grad_norm": 0.12662363052368164, "learning_rate": 1e-06, "loss": 0.008, "step": 84 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.013950892857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 4017.0, "completions/mean_length": 606.450927734375, "completions/mean_terminated_length": 557.079833984375, "completions/min_length": 9.0, "completions/min_terminated_length": 9.0, "epoch": 0.793002915451895, "grad_norm": 0.11620554327964783, "learning_rate": 1e-06, "loss": 0.0043, "num_tokens": 51605883.0, "reward": 0.609375, "reward_std": 0.18583808839321136, "rewards/simpleverify_reward/mean": 0.609375, "rewards/simpleverify_reward/std": 0.48795855045318604, "step": 85 }, { "clip_ratio/high_max": 0.0018204297521151602, "clip_ratio/high_mean": 0.0007280359186552232, "clip_ratio/low_mean": 0.0006874073360449984, "clip_ratio/low_min": 7.102857307472732e-05, "clip_ratio/region_mean": 0.0014154432428767905, "epoch": 0.8023323615160349, "grad_norm": 0.1269119679927826, "learning_rate": 1e-06, "loss": 0.028, "step": 86 }, { "clip_ratio/high_max": 0.001923794043250382, "clip_ratio/high_mean": 0.0007847420547477668, "clip_ratio/low_mean": 0.0006287943560892018, "clip_ratio/low_min": 2.0719377062050626e-05, "clip_ratio/region_mean": 0.0014135363780951593, "epoch": 0.8116618075801749, "grad_norm": 0.10836216807365417, "learning_rate": 1e-06, "loss": -0.0136, "step": 87 }, { "clip_ratio/high_max": 0.002166200698411558, "clip_ratio/high_mean": 0.0008868271943356376, "clip_ratio/low_mean": 0.0006470726566476515, "clip_ratio/low_min": 3.0427335332205985e-05, "clip_ratio/region_mean": 0.001533899841888342, "epoch": 0.8209912536443149, "grad_norm": 0.12363235652446747, "learning_rate": 1e-06, "loss": -0.0103, "step": 88 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.013950892857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3910.0, "completions/mean_length": 627.630859375, "completions/mean_terminated_length": 578.5594482421875, "completions/min_length": 110.0, "completions/min_terminated_length": 110.0, "epoch": 0.8303206997084548, "grad_norm": 0.11391891539096832, "learning_rate": 1e-06, "loss": 0.0188, "num_tokens": 54014952.0, "reward": 0.576729953289032, "reward_std": 0.1839667111635208, "rewards/simpleverify_reward/mean": 0.5767298936843872, "rewards/simpleverify_reward/std": 0.4941463768482208, "step": 89 }, { "clip_ratio/high_max": 0.002055741810181644, "clip_ratio/high_mean": 0.0008208683684642892, "clip_ratio/low_mean": 0.000494132948006154, "clip_ratio/low_min": 1.7882690372061916e-05, "clip_ratio/region_mean": 0.001315001311013475, "epoch": 0.8396501457725948, "grad_norm": 0.12000273168087006, "learning_rate": 1e-06, "loss": -0.0142, "step": 90 }, { "clip_ratio/high_max": 0.0020362616960483138, "clip_ratio/high_mean": 0.0007605874980072258, "clip_ratio/low_mean": 0.0004993219481548294, "clip_ratio/low_min": 2.18855957427877e-05, "clip_ratio/region_mean": 0.0012599094370671082, "epoch": 0.8489795918367347, "grad_norm": 0.11531569808721542, "learning_rate": 1e-06, "loss": 0.0134, "step": 91 }, { "clip_ratio/high_max": 0.0019130878827127162, "clip_ratio/high_mean": 0.0007606530252814991, "clip_ratio/low_mean": 0.0005914706498515443, "clip_ratio/low_min": 2.5732286303536966e-05, "clip_ratio/region_mean": 0.0013521236833184958, "epoch": 0.8583090379008746, "grad_norm": 0.11356611549854279, "learning_rate": 1e-06, "loss": 0.0346, "step": 92 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.017578125, "completions/max_length": 4096.0, "completions/max_terminated_length": 4093.0, "completions/mean_length": 641.884765625, "completions/mean_terminated_length": 580.0814819335938, "completions/min_length": 125.0, "completions/min_terminated_length": 125.0, "epoch": 0.8676384839650145, "grad_norm": 0.12858347594738007, "learning_rate": 1e-06, "loss": 0.0212, "num_tokens": 56402659.0, "reward": 0.584542453289032, "reward_std": 0.2170875370502472, "rewards/simpleverify_reward/mean": 0.5845423936843872, "rewards/simpleverify_reward/std": 0.49286949634552, "step": 93 }, { "clip_ratio/high_max": 0.0023062456457410008, "clip_ratio/high_mean": 0.0009602033478586236, "clip_ratio/low_mean": 0.0004867205898335669, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014469239104073495, "epoch": 0.8769679300291545, "grad_norm": 0.12558914721012115, "learning_rate": 1e-06, "loss": -0.0049, "step": 94 }, { "clip_ratio/high_max": 0.002205582532042172, "clip_ratio/high_mean": 0.0008652425476611825, "clip_ratio/low_mean": 0.0006122402746768785, "clip_ratio/low_min": 1.236888965649996e-05, "clip_ratio/region_mean": 0.001477482834161492, "epoch": 0.8862973760932945, "grad_norm": 0.12085623294115067, "learning_rate": 1e-06, "loss": -0.0325, "step": 95 }, { "clip_ratio/high_max": 0.0019747444384847768, "clip_ratio/high_mean": 0.0008771742104727309, "clip_ratio/low_mean": 0.0007263326515385415, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0016035068547353148, "epoch": 0.8956268221574344, "grad_norm": 0.11808449029922485, "learning_rate": 1e-06, "loss": 0.0177, "step": 96 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0234375, "completions/max_length": 4096.0, "completions/max_terminated_length": 3763.0, "completions/mean_length": 688.1138916015625, "completions/mean_terminated_length": 606.3245849609375, "completions/min_length": 145.0, "completions/min_terminated_length": 145.0, "epoch": 0.9049562682215744, "grad_norm": 0.12350978702306747, "learning_rate": 1e-06, "loss": 0.0125, "num_tokens": 58891499.0, "reward": 0.5398995876312256, "reward_std": 0.19640468060970306, "rewards/simpleverify_reward/mean": 0.5398995280265808, "rewards/simpleverify_reward/std": 0.498475044965744, "step": 97 }, { "clip_ratio/high_max": 0.0016800956218503416, "clip_ratio/high_mean": 0.0007059089421090903, "clip_ratio/low_mean": 0.0006728367316100048, "clip_ratio/low_min": 2.1477587324625347e-05, "clip_ratio/region_mean": 0.0013787456446152646, "epoch": 0.9142857142857143, "grad_norm": 0.12350565195083618, "learning_rate": 1e-06, "loss": 0.0138, "step": 98 }, { "clip_ratio/high_max": 0.0017839961728896014, "clip_ratio/high_mean": 0.0007016315776127158, "clip_ratio/low_mean": 0.00066240259911865, "clip_ratio/low_min": 4.669195641326951e-05, "clip_ratio/region_mean": 0.0013640341676364187, "epoch": 0.9236151603498542, "grad_norm": 0.11450343579053879, "learning_rate": 1e-06, "loss": 0.0366, "step": 99 }, { "clip_ratio/high_max": 0.001657637574680848, "clip_ratio/high_mean": 0.000750164341297932, "clip_ratio/low_mean": 0.000629274331004126, "clip_ratio/low_min": 2.1242623006401118e-05, "clip_ratio/region_mean": 0.0013794386577501427, "epoch": 0.9329446064139941, "grad_norm": 0.11214578151702881, "learning_rate": 1e-06, "loss": -0.0266, "step": 100 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0189732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3986.0, "completions/mean_length": 665.7156982421875, "completions/mean_terminated_length": 599.3734130859375, "completions/min_length": 84.0, "completions/min_terminated_length": 84.0, "epoch": 0.9422740524781341, "grad_norm": 0.12263553589582443, "learning_rate": 1e-06, "loss": -0.0121, "num_tokens": 61364736.0, "reward": 0.5555245876312256, "reward_std": 0.18389412760734558, "rewards/simpleverify_reward/mean": 0.5555245280265808, "rewards/simpleverify_reward/std": 0.496976763010025, "step": 101 }, { "clip_ratio/high_max": 0.0017071649635909125, "clip_ratio/high_mean": 0.0006859258764961851, "clip_ratio/low_mean": 0.0006258827816054691, "clip_ratio/low_min": 3.585561898944434e-05, "clip_ratio/region_mean": 0.0013118086208123714, "epoch": 0.9516034985422741, "grad_norm": 0.11153359711170197, "learning_rate": 1e-06, "loss": 0.0071, "step": 102 }, { "clip_ratio/high_max": 0.001448060054826783, "clip_ratio/high_mean": 0.0005969270132482052, "clip_ratio/low_mean": 0.0006210333394847112, "clip_ratio/low_min": 1.9797276763711125e-05, "clip_ratio/region_mean": 0.001217960332724033, "epoch": 0.960932944606414, "grad_norm": 0.11562177538871765, "learning_rate": 1e-06, "loss": 0.023, "step": 103 }, { "clip_ratio/high_max": 0.0017266605864278972, "clip_ratio/high_mean": 0.0006465664482675493, "clip_ratio/low_mean": 0.0006424028088076739, "clip_ratio/low_min": 5.594325739366468e-05, "clip_ratio/region_mean": 0.0012889692661701702, "epoch": 0.970262390670554, "grad_norm": 0.12908075749874115, "learning_rate": 1e-06, "loss": 0.0264, "step": 104 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0200892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 4092.0, "completions/mean_length": 636.2199096679688, "completions/mean_terminated_length": 565.2904663085938, "completions/min_length": 112.0, "completions/min_terminated_length": 112.0, "epoch": 1.00932944606414, "grad_norm": 0.13502830266952515, "learning_rate": 1e-06, "loss": 0.0261, "num_tokens": 63734284.0, "reward": 0.5499442219734192, "reward_std": 0.19773021340370178, "rewards/simpleverify_reward/mean": 0.5499442219734192, "rewards/simpleverify_reward/std": 0.4975687265396118, "step": 105 }, { "clip_ratio/high_max": 0.002146747319784481, "clip_ratio/high_mean": 0.0008405951757595176, "clip_ratio/low_mean": 0.0005782945836472209, "clip_ratio/low_min": 5.368789697968168e-05, "clip_ratio/region_mean": 0.0014188897657732014, "epoch": 1.01865889212828, "grad_norm": 0.12813352048397064, "learning_rate": 1e-06, "loss": -0.0022, "step": 106 }, { "clip_ratio/high_max": 0.0018661808608158026, "clip_ratio/high_mean": 0.0007158625066949753, "clip_ratio/low_mean": 0.000625614273303654, "clip_ratio/low_min": 4.504991011344828e-05, "clip_ratio/region_mean": 0.0013414767890935764, "epoch": 1.0279883381924197, "grad_norm": 0.12012085318565369, "learning_rate": 1e-06, "loss": 0.0011, "step": 107 }, { "clip_ratio/high_max": 0.002116545925673563, "clip_ratio/high_mean": 0.0007658416325284634, "clip_ratio/low_mean": 0.0007744880167592783, "clip_ratio/low_min": 8.041860564844683e-05, "clip_ratio/region_mean": 0.0015403296201839112, "epoch": 1.0373177842565597, "grad_norm": 0.12095325440168381, "learning_rate": 1e-06, "loss": 0.0102, "step": 108 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0220424107142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 4019.0, "completions/mean_length": 649.0949096679688, "completions/mean_terminated_length": 571.404296875, "completions/min_length": 93.0, "completions/min_terminated_length": 93.0, "epoch": 1.0466472303206997, "grad_norm": 0.10980932414531708, "learning_rate": 1e-06, "loss": -0.0134, "num_tokens": 66121624.0, "reward": 0.5558035969734192, "reward_std": 0.18948142230510712, "rewards/simpleverify_reward/mean": 0.5558035969734192, "rewards/simpleverify_reward/std": 0.4969455301761627, "step": 109 }, { "clip_ratio/high_max": 0.00165672010552953, "clip_ratio/high_mean": 0.000642104821963585, "clip_ratio/low_mean": 0.0005678119978256291, "clip_ratio/low_min": 2.153687091777101e-05, "clip_ratio/region_mean": 0.0012099168161512353, "epoch": 1.0559766763848397, "grad_norm": 0.128701850771904, "learning_rate": 1e-06, "loss": 0.0412, "step": 110 }, { "clip_ratio/high_max": 0.0017974528454942629, "clip_ratio/high_mean": 0.0007438106422341662, "clip_ratio/low_mean": 0.0006669154463452287, "clip_ratio/low_min": 5.243499435891863e-05, "clip_ratio/region_mean": 0.0014107261085882783, "epoch": 1.0653061224489795, "grad_norm": 0.1199413314461708, "learning_rate": 1e-06, "loss": -0.0031, "step": 111 }, { "clip_ratio/high_max": 0.0018923726383945905, "clip_ratio/high_mean": 0.0006902091818119516, "clip_ratio/low_mean": 0.0005874707112525357, "clip_ratio/low_min": 8.078900464170147e-05, "clip_ratio/region_mean": 0.0012776798721461091, "epoch": 1.0746355685131195, "grad_norm": 0.12488680332899094, "learning_rate": 1e-06, "loss": -0.0022, "step": 112 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3850.0, "completions/mean_length": 610.7863159179688, "completions/mean_terminated_length": 555.4654541015625, "completions/min_length": 105.0, "completions/min_terminated_length": 105.0, "epoch": 1.0839650145772595, "grad_norm": 0.12968002259731293, "learning_rate": 1e-06, "loss": 0.0095, "num_tokens": 68402242.0, "reward": 0.6286272406578064, "reward_std": 0.2003306895494461, "rewards/simpleverify_reward/mean": 0.6286272406578064, "rewards/simpleverify_reward/std": 0.4832392632961273, "step": 113 }, { "clip_ratio/high_max": 0.002221736685896758, "clip_ratio/high_mean": 0.0009351482058264082, "clip_ratio/low_mean": 0.0005353429314709501, "clip_ratio/low_min": 3.743189881788567e-05, "clip_ratio/region_mean": 0.0014704911300214007, "epoch": 1.0932944606413995, "grad_norm": 0.13403697311878204, "learning_rate": 1e-06, "loss": 0.022, "step": 114 }, { "clip_ratio/high_max": 0.0022223495980142616, "clip_ratio/high_mean": 0.0009615286835469306, "clip_ratio/low_mean": 0.0005003123160349787, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014618409841204993, "epoch": 1.1026239067055394, "grad_norm": 0.13239029049873352, "learning_rate": 1e-06, "loss": -0.0139, "step": 115 }, { "clip_ratio/high_max": 0.00191305969201494, "clip_ratio/high_mean": 0.0007098073929228121, "clip_ratio/low_mean": 0.0005596039072770509, "clip_ratio/low_min": 2.725983995333081e-05, "clip_ratio/region_mean": 0.0012694112992903683, "epoch": 1.1119533527696792, "grad_norm": 0.11922561377286911, "learning_rate": 1e-06, "loss": 0.0244, "step": 116 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.020368303571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3894.0, "completions/mean_length": 650.0960083007812, "completions/mean_terminated_length": 578.449462890625, "completions/min_length": 101.0, "completions/min_terminated_length": 101.0, "epoch": 1.1212827988338192, "grad_norm": 0.11664435267448425, "learning_rate": 1e-06, "loss": 0.0615, "num_tokens": 70795674.0, "reward": 0.5828683376312256, "reward_std": 0.19722381234169006, "rewards/simpleverify_reward/mean": 0.5828682780265808, "rewards/simpleverify_reward/std": 0.49315381050109863, "step": 117 }, { "clip_ratio/high_max": 0.001989788426726591, "clip_ratio/high_mean": 0.0007818681160642882, "clip_ratio/low_mean": 0.0006176868764669052, "clip_ratio/low_min": 2.6337967938161455e-05, "clip_ratio/region_mean": 0.0013995550034451298, "epoch": 1.1306122448979592, "grad_norm": 0.1240810677409172, "learning_rate": 1e-06, "loss": 0.0068, "step": 118 }, { "clip_ratio/high_max": 0.0020033203072671313, "clip_ratio/high_mean": 0.0008266681643362972, "clip_ratio/low_mean": 0.0005502736385096796, "clip_ratio/low_min": 1.2794268513971474e-05, "clip_ratio/region_mean": 0.0013769417673756834, "epoch": 1.1399416909620992, "grad_norm": 0.12250246852636337, "learning_rate": 1e-06, "loss": -0.0347, "step": 119 }, { "clip_ratio/high_max": 0.0022079020527598914, "clip_ratio/high_mean": 0.000895160326763289, "clip_ratio/low_mean": 0.0005287443837005412, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001423904723196756, "epoch": 1.149271137026239, "grad_norm": 0.12448569387197495, "learning_rate": 1e-06, "loss": -0.0088, "step": 120 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3879.0, "completions/mean_length": 626.9456176757812, "completions/mean_terminated_length": 571.8812255859375, "completions/min_length": 108.0, "completions/min_terminated_length": 108.0, "epoch": 1.158600583090379, "grad_norm": 0.12393394857645035, "learning_rate": 1e-06, "loss": 0.0128, "num_tokens": 73164207.0, "reward": 0.5934709906578064, "reward_std": 0.1667875349521637, "rewards/simpleverify_reward/mean": 0.5934709906578064, "rewards/simpleverify_reward/std": 0.4912540018558502, "step": 121 }, { "clip_ratio/high_max": 0.0019104609455098398, "clip_ratio/high_mean": 0.0007301881696548662, "clip_ratio/low_mean": 0.00045579813377116807, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011859863006975502, "epoch": 1.167930029154519, "grad_norm": 0.11453551799058914, "learning_rate": 1e-06, "loss": -0.0363, "step": 122 }, { "clip_ratio/high_max": 0.0017259078449569643, "clip_ratio/high_mean": 0.0006403512688848423, "clip_ratio/low_mean": 0.0006374911245075054, "clip_ratio/low_min": 4.7292845010815654e-05, "clip_ratio/region_mean": 0.0012778423879353795, "epoch": 1.177259475218659, "grad_norm": 0.10245633870363235, "learning_rate": 1e-06, "loss": 0.0246, "step": 123 }, { "clip_ratio/high_max": 0.0017372078000335023, "clip_ratio/high_mean": 0.0006538370798807591, "clip_ratio/low_mean": 0.0006406645061360905, "clip_ratio/low_min": 2.4895438400562853e-05, "clip_ratio/region_mean": 0.0012945015550940298, "epoch": 1.186588921282799, "grad_norm": 0.13707734644412994, "learning_rate": 1e-06, "loss": 0.0208, "step": 124 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.022600446428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3962.0, "completions/mean_length": 664.97265625, "completions/mean_terminated_length": 585.6369018554688, "completions/min_length": 116.0, "completions/min_terminated_length": 116.0, "epoch": 1.1959183673469387, "grad_norm": 0.13986274600028992, "learning_rate": 1e-06, "loss": 0.0092, "num_tokens": 75578997.0, "reward": 0.5627790689468384, "reward_std": 0.21183809638023376, "rewards/simpleverify_reward/mean": 0.5627790093421936, "rewards/simpleverify_reward/std": 0.4961123466491699, "step": 125 }, { "clip_ratio/high_max": 0.0020609042621799745, "clip_ratio/high_mean": 0.0009520197745587211, "clip_ratio/low_mean": 0.0005832706938235788, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015352904811152257, "epoch": 1.2052478134110787, "grad_norm": 0.13114215433597565, "learning_rate": 1e-06, "loss": -0.0276, "step": 126 }, { "clip_ratio/high_max": 0.00218138204945717, "clip_ratio/high_mean": 0.0008601556810390321, "clip_ratio/low_mean": 0.0006370682367560221, "clip_ratio/low_min": 4.0355792407353874e-05, "clip_ratio/region_mean": 0.00149722391506657, "epoch": 1.2145772594752187, "grad_norm": 0.11253665387630463, "learning_rate": 1e-06, "loss": 0.0063, "step": 127 }, { "clip_ratio/high_max": 0.0019539271743269637, "clip_ratio/high_mean": 0.000799665336671751, "clip_ratio/low_mean": 0.0007466406968887895, "clip_ratio/low_min": 3.6188419471727684e-05, "clip_ratio/region_mean": 0.00154630600445671, "epoch": 1.2239067055393587, "grad_norm": 0.12482571601867676, "learning_rate": 1e-06, "loss": 0.0084, "step": 128 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.017299107142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 4083.0, "completions/mean_length": 608.3211669921875, "completions/mean_terminated_length": 546.92529296875, "completions/min_length": 112.0, "completions/min_terminated_length": 112.0, "epoch": 1.2332361516034984, "grad_norm": 0.11050374805927277, "learning_rate": 1e-06, "loss": -0.0173, "num_tokens": 77846916.0, "reward": 0.6163504719734192, "reward_std": 0.18100975453853607, "rewards/simpleverify_reward/mean": 0.6163504719734192, "rewards/simpleverify_reward/std": 0.48634201288223267, "step": 129 }, { "clip_ratio/high_max": 0.0017901970277307555, "clip_ratio/high_mean": 0.0007953565582283773, "clip_ratio/low_mean": 0.0004623936647476512, "clip_ratio/low_min": 3.079736870859051e-05, "clip_ratio/region_mean": 0.0012577502347994596, "epoch": 1.2425655976676384, "grad_norm": 0.12470415234565735, "learning_rate": 1e-06, "loss": -0.0062, "step": 130 }, { "clip_ratio/high_max": 0.002093502080242615, "clip_ratio/high_mean": 0.0008757366213103523, "clip_ratio/low_mean": 0.0005387423248066625, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.00141447893838631, "epoch": 1.2518950437317784, "grad_norm": 0.14253109693527222, "learning_rate": 1e-06, "loss": 0.0057, "step": 131 }, { "clip_ratio/high_max": 0.0018265173421241343, "clip_ratio/high_mean": 0.0008242255480581662, "clip_ratio/low_mean": 0.000605414561505313, "clip_ratio/low_min": 3.69647359548253e-05, "clip_ratio/region_mean": 0.0014296401350293308, "epoch": 1.2612244897959184, "grad_norm": 0.13475914299488068, "learning_rate": 1e-06, "loss": 0.034, "step": 132 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.022600446428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 4073.0, "completions/mean_length": 644.2299194335938, "completions/mean_terminated_length": 564.4144897460938, "completions/min_length": 108.0, "completions/min_terminated_length": 108.0, "epoch": 1.2705539358600584, "grad_norm": 0.13921624422073364, "learning_rate": 1e-06, "loss": 0.0108, "num_tokens": 80189980.0, "reward": 0.5588728189468384, "reward_std": 0.1941133588552475, "rewards/simpleverify_reward/mean": 0.5588727593421936, "rewards/simpleverify_reward/std": 0.496591180562973, "step": 133 }, { "clip_ratio/high_max": 0.0018973786900460254, "clip_ratio/high_mean": 0.0007834825246391119, "clip_ratio/low_mean": 0.0005557729145948542, "clip_ratio/low_min": 3.321398253319785e-05, "clip_ratio/region_mean": 0.0013392554101301357, "epoch": 1.2798833819241984, "grad_norm": 0.12825220823287964, "learning_rate": 1e-06, "loss": -0.0047, "step": 134 }, { "clip_ratio/high_max": 0.00182367539673578, "clip_ratio/high_mean": 0.0008240480528911576, "clip_ratio/low_mean": 0.0006195009173097787, "clip_ratio/low_min": 1.3119227332936134e-05, "clip_ratio/region_mean": 0.0014435489938477986, "epoch": 1.2892128279883381, "grad_norm": 0.11681318283081055, "learning_rate": 1e-06, "loss": -0.0135, "step": 135 }, { "clip_ratio/high_max": 0.00197053136798786, "clip_ratio/high_mean": 0.0007122634469851619, "clip_ratio/low_mean": 0.0006770924883312546, "clip_ratio/low_min": 2.7889291231986135e-05, "clip_ratio/region_mean": 0.0013893559298594482, "epoch": 1.2985422740524781, "grad_norm": 0.1361992210149765, "learning_rate": 1e-06, "loss": 0.0323, "step": 136 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.024553571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3458.0, "completions/mean_length": 669.5393676757812, "completions/mean_terminated_length": 583.2897338867188, "completions/min_length": 142.0, "completions/min_terminated_length": 142.0, "epoch": 1.3078717201166181, "grad_norm": 0.13055633008480072, "learning_rate": 1e-06, "loss": -0.0199, "num_tokens": 82601121.0, "reward": 0.53515625, "reward_std": 0.19403105974197388, "rewards/simpleverify_reward/mean": 0.53515625, "rewards/simpleverify_reward/std": 0.498832106590271, "step": 137 }, { "clip_ratio/high_max": 0.0017463456897530705, "clip_ratio/high_mean": 0.0008345675596501678, "clip_ratio/low_mean": 0.0005793403433926869, "clip_ratio/low_min": 9.563887033436913e-06, "clip_ratio/region_mean": 0.001413907899404876, "epoch": 1.3172011661807579, "grad_norm": 0.12187880277633667, "learning_rate": 1e-06, "loss": -0.0167, "step": 138 }, { "clip_ratio/high_max": 0.001732812452246435, "clip_ratio/high_mean": 0.0007392155112029286, "clip_ratio/low_mean": 0.0005025638402003096, "clip_ratio/low_min": 9.318428237747867e-05, "clip_ratio/region_mean": 0.0012417793586791959, "epoch": 1.3265306122448979, "grad_norm": 0.13114553689956665, "learning_rate": 1e-06, "loss": 0.0103, "step": 139 }, { "clip_ratio/high_max": 0.001725418129353784, "clip_ratio/high_mean": 0.0007963057050801581, "clip_ratio/low_mean": 0.0005837406661157729, "clip_ratio/low_min": 3.642354749899823e-05, "clip_ratio/region_mean": 0.001380046334816143, "epoch": 1.3358600583090379, "grad_norm": 0.12495388835668564, "learning_rate": 1e-06, "loss": 0.012, "step": 140 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0267857142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 4079.0, "completions/mean_length": 686.4746704101562, "completions/mean_terminated_length": 592.6344604492188, "completions/min_length": 113.0, "completions/min_terminated_length": 113.0, "epoch": 1.3451895043731779, "grad_norm": 0.12903298437595367, "learning_rate": 1e-06, "loss": -0.04, "num_tokens": 85042094.0, "reward": 0.568917453289032, "reward_std": 0.1997484564781189, "rewards/simpleverify_reward/mean": 0.5689173936843872, "rewards/simpleverify_reward/std": 0.4952967166900635, "step": 141 }, { "clip_ratio/high_max": 0.002009898620599415, "clip_ratio/high_mean": 0.0008419322257395834, "clip_ratio/low_mean": 0.0005393469368755177, "clip_ratio/low_min": 1.4806917533860542e-05, "clip_ratio/region_mean": 0.0013812791657983325, "epoch": 1.3545189504373178, "grad_norm": 0.12894636392593384, "learning_rate": 1e-06, "loss": -0.0471, "step": 142 }, { "clip_ratio/high_max": 0.001814288058085367, "clip_ratio/high_mean": 0.0007845879827073077, "clip_ratio/low_mean": 0.0007344760506384773, "clip_ratio/low_min": 8.359637467947323e-05, "clip_ratio/region_mean": 0.0015190640187938698, "epoch": 1.3638483965014578, "grad_norm": 0.12971647083759308, "learning_rate": 1e-06, "loss": 0.0485, "step": 143 }, { "clip_ratio/high_max": 0.0018520283920224756, "clip_ratio/high_mean": 0.0007502528460463509, "clip_ratio/low_mean": 0.0007543957835878246, "clip_ratio/low_min": 7.015404298726935e-05, "clip_ratio/region_mean": 0.0015046486660139635, "epoch": 1.3731778425655976, "grad_norm": 0.1311144232749939, "learning_rate": 1e-06, "loss": 0.0451, "step": 144 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.026227678571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 4061.0, "completions/mean_length": 669.9149169921875, "completions/mean_terminated_length": 577.6363525390625, "completions/min_length": 126.0, "completions/min_terminated_length": 126.0, "epoch": 1.3825072886297376, "grad_norm": 0.12647980451583862, "learning_rate": 1e-06, "loss": -0.0262, "num_tokens": 87419573.0, "reward": 0.5906808376312256, "reward_std": 0.19070059061050415, "rewards/simpleverify_reward/mean": 0.5906807780265808, "rewards/simpleverify_reward/std": 0.49177682399749756, "step": 145 }, { "clip_ratio/high_max": 0.0016104375499708112, "clip_ratio/high_mean": 0.0006907026945555117, "clip_ratio/low_mean": 0.0005369173786675674, "clip_ratio/low_min": 1.5504838302149437e-05, "clip_ratio/region_mean": 0.001227620088684489, "epoch": 1.3918367346938776, "grad_norm": 0.12101851403713226, "learning_rate": 1e-06, "loss": -0.0285, "step": 146 }, { "clip_ratio/high_max": 0.0021422184727271087, "clip_ratio/high_mean": 0.0007755116139378515, "clip_ratio/low_mean": 0.0007549517195002409, "clip_ratio/low_min": 4.6515358008036856e-05, "clip_ratio/region_mean": 0.0015304633197956719, "epoch": 1.4011661807580174, "grad_norm": 0.1280081570148468, "learning_rate": 1e-06, "loss": 0.0151, "step": 147 }, { "clip_ratio/high_max": 0.001843275113060372, "clip_ratio/high_mean": 0.0007460374654328916, "clip_ratio/low_mean": 0.0006548765095431008, "clip_ratio/low_min": 7.469627871614648e-05, "clip_ratio/region_mean": 0.0014009139376867097, "epoch": 1.4104956268221573, "grad_norm": 0.13672010600566864, "learning_rate": 1e-06, "loss": 0.0239, "step": 148 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0189732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3681.0, "completions/mean_length": 626.3482666015625, "completions/mean_terminated_length": 559.2445678710938, "completions/min_length": 123.0, "completions/min_terminated_length": 123.0, "epoch": 1.4198250728862973, "grad_norm": 0.11989639699459076, "learning_rate": 1e-06, "loss": 0.0138, "num_tokens": 89726437.0, "reward": 0.6202567219734192, "reward_std": 0.1686181277036667, "rewards/simpleverify_reward/mean": 0.6202567219734192, "rewards/simpleverify_reward/std": 0.4853906035423279, "step": 149 }, { "clip_ratio/high_max": 0.0014615096406487282, "clip_ratio/high_mean": 0.000621973245870322, "clip_ratio/low_mean": 0.0005229199396126205, "clip_ratio/low_min": 1.581877950229682e-05, "clip_ratio/region_mean": 0.0011448931945778895, "epoch": 1.4291545189504373, "grad_norm": 0.12966448068618774, "learning_rate": 1e-06, "loss": 0.0006, "step": 150 }, { "clip_ratio/high_max": 0.0017544885158713441, "clip_ratio/high_mean": 0.0006567374457517872, "clip_ratio/low_mean": 0.0005475313646456925, "clip_ratio/low_min": 2.7210713597014546e-05, "clip_ratio/region_mean": 0.001204268821311416, "epoch": 1.4384839650145773, "grad_norm": 0.10042349994182587, "learning_rate": 1e-06, "loss": -0.0153, "step": 151 }, { "clip_ratio/high_max": 0.0016186857119464548, "clip_ratio/high_mean": 0.0005979871793897473, "clip_ratio/low_mean": 0.000573672958125826, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011716601256921422, "epoch": 1.4478134110787173, "grad_norm": 0.1346733570098877, "learning_rate": 1e-06, "loss": 0.0192, "step": 152 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0200892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 4046.0, "completions/mean_length": 668.036865234375, "completions/mean_terminated_length": 597.7597045898438, "completions/min_length": 142.0, "completions/min_terminated_length": 142.0, "epoch": 1.457142857142857, "grad_norm": 0.11754105240106583, "learning_rate": 1e-06, "loss": -0.0323, "num_tokens": 92175977.0, "reward": 0.5775669813156128, "reward_std": 0.18889792263507843, "rewards/simpleverify_reward/mean": 0.5775669813156128, "rewards/simpleverify_reward/std": 0.494015634059906, "step": 153 }, { "clip_ratio/high_max": 0.0015176600354607217, "clip_ratio/high_mean": 0.0007133628023439087, "clip_ratio/low_mean": 0.0005661946415784769, "clip_ratio/low_min": 3.7835651710338425e-05, "clip_ratio/region_mean": 0.001279557454836322, "epoch": 1.466472303206997, "grad_norm": 0.12351874262094498, "learning_rate": 1e-06, "loss": 0.0424, "step": 154 }, { "clip_ratio/high_max": 0.0016789777546364348, "clip_ratio/high_mean": 0.0006935889114174643, "clip_ratio/low_mean": 0.0005555534808081575, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012491423840401694, "epoch": 1.475801749271137, "grad_norm": 0.12182097882032394, "learning_rate": 1e-06, "loss": 0.0112, "step": 155 }, { "clip_ratio/high_max": 0.001704990350845037, "clip_ratio/high_mean": 0.000691753055434674, "clip_ratio/low_mean": 0.0005499148146554944, "clip_ratio/low_min": 4.431576599017717e-05, "clip_ratio/region_mean": 0.0012416678873705678, "epoch": 1.485131195335277, "grad_norm": 0.11938410252332687, "learning_rate": 1e-06, "loss": 0.0096, "step": 156 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.019252232142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3805.0, "completions/mean_length": 662.9623413085938, "completions/mean_terminated_length": 595.5712890625, "completions/min_length": 99.0, "completions/min_terminated_length": 99.0, "epoch": 1.4944606413994168, "grad_norm": 0.1341061294078827, "learning_rate": 1e-06, "loss": -0.006, "num_tokens": 94638962.0, "reward": 0.5831473469734192, "reward_std": 0.19836531579494476, "rewards/simpleverify_reward/mean": 0.5831473469734192, "rewards/simpleverify_reward/std": 0.493106871843338, "step": 157 }, { "clip_ratio/high_max": 0.0020496412289503496, "clip_ratio/high_mean": 0.0007570923135062912, "clip_ratio/low_mean": 0.000569339053981821, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001326431371126091, "epoch": 1.5037900874635568, "grad_norm": 0.12687119841575623, "learning_rate": 1e-06, "loss": -0.0099, "step": 158 }, { "clip_ratio/high_max": 0.0019752607186092064, "clip_ratio/high_mean": 0.0008088964132184628, "clip_ratio/low_mean": 0.0005700109377357876, "clip_ratio/low_min": 7.456675211869879e-05, "clip_ratio/region_mean": 0.0013789073454972822, "epoch": 1.5131195335276968, "grad_norm": 0.145109623670578, "learning_rate": 1e-06, "loss": 0.0024, "step": 159 }, { "clip_ratio/high_max": 0.0018405253758828621, "clip_ratio/high_mean": 0.0007701144731981913, "clip_ratio/low_mean": 0.0006262711485760519, "clip_ratio/low_min": 2.7482413315738086e-05, "clip_ratio/region_mean": 0.0013963856472400948, "epoch": 1.5224489795918368, "grad_norm": 0.12432614713907242, "learning_rate": 1e-06, "loss": 0.0163, "step": 160 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01953125, "completions/max_length": 4096.0, "completions/max_terminated_length": 3897.0, "completions/mean_length": 638.3697509765625, "completions/mean_terminated_length": 569.4926147460938, "completions/min_length": 115.0, "completions/min_terminated_length": 115.0, "epoch": 1.5317784256559768, "grad_norm": 0.13684342801570892, "learning_rate": 1e-06, "loss": -0.0046, "num_tokens": 97001999.0, "reward": 0.5731027126312256, "reward_std": 0.18581697344779968, "rewards/simpleverify_reward/mean": 0.5731026530265808, "rewards/simpleverify_reward/std": 0.49469614028930664, "step": 161 }, { "clip_ratio/high_max": 0.0017107110834331252, "clip_ratio/high_mean": 0.0007434064700646559, "clip_ratio/low_mean": 0.0005995759966026526, "clip_ratio/low_min": 6.97951836627908e-05, "clip_ratio/region_mean": 0.001342982483038213, "epoch": 1.5411078717201168, "grad_norm": 0.1381772756576538, "learning_rate": 1e-06, "loss": -0.0046, "step": 162 }, { "clip_ratio/high_max": 0.001652863411436556, "clip_ratio/high_mean": 0.0007244071894092485, "clip_ratio/low_mean": 0.000549788794160122, "clip_ratio/low_min": 2.745796064118622e-05, "clip_ratio/region_mean": 0.0012741959690174554, "epoch": 1.5504373177842565, "grad_norm": 0.1193263828754425, "learning_rate": 1e-06, "loss": 0.0263, "step": 163 }, { "clip_ratio/high_max": 0.0016289952473016456, "clip_ratio/high_mean": 0.0007454929136656574, "clip_ratio/low_mean": 0.0006348715701278707, "clip_ratio/low_min": 4.399888621264836e-05, "clip_ratio/region_mean": 0.0013803645051666535, "epoch": 1.5597667638483965, "grad_norm": 0.12108853459358215, "learning_rate": 1e-06, "loss": -0.0128, "step": 164 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.024274553571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3876.0, "completions/mean_length": 685.2196044921875, "completions/mean_terminated_length": 600.3646240234375, "completions/min_length": 112.0, "completions/min_terminated_length": 112.0, "epoch": 1.5690962099125363, "grad_norm": 0.1248461902141571, "learning_rate": 1e-06, "loss": -0.0022, "num_tokens": 99453874.0, "reward": 0.5837053656578064, "reward_std": 0.19259242713451385, "rewards/simpleverify_reward/mean": 0.5837053656578064, "rewards/simpleverify_reward/std": 0.4930124282836914, "step": 165 }, { "clip_ratio/high_max": 0.0017936950462171808, "clip_ratio/high_mean": 0.0007493952616641764, "clip_ratio/low_mean": 0.000692337116561248, "clip_ratio/low_min": 1.581877950229682e-05, "clip_ratio/region_mean": 0.001441732354578562, "epoch": 1.5784256559766763, "grad_norm": 0.1370670646429062, "learning_rate": 1e-06, "loss": -0.0041, "step": 166 }, { "clip_ratio/high_max": 0.002089113819238264, "clip_ratio/high_mean": 0.0007637384860572638, "clip_ratio/low_mean": 0.0005933238799116225, "clip_ratio/low_min": 1.3061650861345697e-05, "clip_ratio/region_mean": 0.0013570623377745505, "epoch": 1.5877551020408163, "grad_norm": 0.13718299567699432, "learning_rate": 1e-06, "loss": -0.018, "step": 167 }, { "clip_ratio/high_max": 0.0019731895154109225, "clip_ratio/high_mean": 0.0007252031227835687, "clip_ratio/low_mean": 0.0007885386166890385, "clip_ratio/low_min": 6.892225428600796e-05, "clip_ratio/region_mean": 0.0015137417867663316, "epoch": 1.5970845481049563, "grad_norm": 0.1218254342675209, "learning_rate": 1e-06, "loss": 0.0359, "step": 168 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.020368303571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 4090.0, "completions/mean_length": 660.7037353515625, "completions/mean_terminated_length": 589.2777099609375, "completions/min_length": 136.0, "completions/min_terminated_length": 136.0, "epoch": 1.6064139941690962, "grad_norm": 0.13127924501895905, "learning_rate": 1e-06, "loss": 0.0386, "num_tokens": 101893708.0, "reward": 0.5948660969734192, "reward_std": 0.1907694935798645, "rewards/simpleverify_reward/mean": 0.5948660969734192, "rewards/simpleverify_reward/std": 0.4909864366054535, "step": 169 }, { "clip_ratio/high_max": 0.0017669667431619018, "clip_ratio/high_mean": 0.0007627780778420856, "clip_ratio/low_mean": 0.0005881269198653172, "clip_ratio/low_min": 4.333893139119027e-05, "clip_ratio/region_mean": 0.001350905018625781, "epoch": 1.6157434402332362, "grad_norm": 0.1295381337404251, "learning_rate": 1e-06, "loss": -0.0142, "step": 170 }, { "clip_ratio/high_max": 0.0020220784936100245, "clip_ratio/high_mean": 0.0008242045823863009, "clip_ratio/low_mean": 0.0005255541727819946, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001349758753349306, "epoch": 1.6250728862973762, "grad_norm": 0.13337182998657227, "learning_rate": 1e-06, "loss": 0.0355, "step": 171 }, { "clip_ratio/high_max": 0.0019956167052441742, "clip_ratio/high_mean": 0.0007843799403417506, "clip_ratio/low_mean": 0.0004926284254906932, "clip_ratio/low_min": 1.0382059372204822e-05, "clip_ratio/region_mean": 0.001277008337638108, "epoch": 1.634402332361516, "grad_norm": 0.11998724192380905, "learning_rate": 1e-06, "loss": -0.024, "step": 172 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01953125, "completions/max_length": 4096.0, "completions/max_terminated_length": 3164.0, "completions/mean_length": 651.7453002929688, "completions/mean_terminated_length": 583.1345825195312, "completions/min_length": 113.0, "completions/min_terminated_length": 113.0, "epoch": 1.643731778425656, "grad_norm": 0.13196328282356262, "learning_rate": 1e-06, "loss": 0.0268, "num_tokens": 104289403.0, "reward": 0.5906808376312256, "reward_std": 0.19382086396217346, "rewards/simpleverify_reward/mean": 0.5906807780265808, "rewards/simpleverify_reward/std": 0.49177682399749756, "step": 173 }, { "clip_ratio/high_max": 0.0018032177104032598, "clip_ratio/high_mean": 0.0007370746789092664, "clip_ratio/low_mean": 0.0006355587429425213, "clip_ratio/low_min": 1.6434394638054073e-05, "clip_ratio/region_mean": 0.0013726334400416818, "epoch": 1.6530612244897958, "grad_norm": 0.13487084209918976, "learning_rate": 1e-06, "loss": 0.0133, "step": 174 }, { "clip_ratio/high_max": 0.0016957447624008637, "clip_ratio/high_mean": 0.000710726540091855, "clip_ratio/low_mean": 0.0006508322912850417, "clip_ratio/low_min": 6.102254701545462e-05, "clip_ratio/region_mean": 0.0013615588432003278, "epoch": 1.6623906705539357, "grad_norm": 0.14557676017284393, "learning_rate": 1e-06, "loss": 0.013, "step": 175 }, { "clip_ratio/high_max": 0.0016970648357528262, "clip_ratio/high_mean": 0.0007375629847956588, "clip_ratio/low_mean": 0.0006402967828762485, "clip_ratio/low_min": 2.2856430405227002e-05, "clip_ratio/region_mean": 0.0013778597894997802, "epoch": 1.6717201166180757, "grad_norm": 0.12357483804225922, "learning_rate": 1e-06, "loss": 0.0052, "step": 176 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.02734375, "completions/max_length": 4096.0, "completions/max_terminated_length": 3744.0, "completions/mean_length": 666.0809326171875, "completions/mean_terminated_length": 569.657470703125, "completions/min_length": 90.0, "completions/min_terminated_length": 90.0, "epoch": 1.6810495626822157, "grad_norm": 0.13427303731441498, "learning_rate": 1e-06, "loss": -0.0047, "num_tokens": 106645261.0, "reward": 0.6007254719734192, "reward_std": 0.19296862185001373, "rewards/simpleverify_reward/mean": 0.6007254719734192, "rewards/simpleverify_reward/std": 0.48981767892837524, "step": 177 }, { "clip_ratio/high_max": 0.0019654395800898783, "clip_ratio/high_mean": 0.000840193517433363, "clip_ratio/low_mean": 0.0006636629414060735, "clip_ratio/low_min": 0.00010452401329530403, "clip_ratio/region_mean": 0.0015038564706628677, "epoch": 1.6903790087463557, "grad_norm": 1.0433558225631714, "learning_rate": 1e-06, "loss": -0.0219, "step": 178 }, { "clip_ratio/high_max": 0.0021515010612347396, "clip_ratio/high_mean": 0.000842546580315684, "clip_ratio/low_mean": 0.0005846771764481673, "clip_ratio/low_min": 1.2838948350690771e-05, "clip_ratio/region_mean": 0.0014272237749537453, "epoch": 1.6997084548104957, "grad_norm": 0.13866542279720306, "learning_rate": 1e-06, "loss": 0.0202, "step": 179 }, { "clip_ratio/high_max": 0.002061309991404414, "clip_ratio/high_mean": 0.0008895468818082009, "clip_ratio/low_mean": 0.0006999277502472978, "clip_ratio/low_min": 1.3685132216778584e-05, "clip_ratio/region_mean": 0.0015894745956757106, "epoch": 1.7090379008746357, "grad_norm": 0.12858909368515015, "learning_rate": 1e-06, "loss": 0.0054, "step": 180 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.023716517857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 4089.0, "completions/mean_length": 666.3582763671875, "completions/mean_terminated_length": 583.0431518554688, "completions/min_length": 121.0, "completions/min_terminated_length": 121.0, "epoch": 1.7183673469387755, "grad_norm": 0.13009749352931976, "learning_rate": 1e-06, "loss": 0.0162, "num_tokens": 109043553.0, "reward": 0.594308078289032, "reward_std": 0.17664796113967896, "rewards/simpleverify_reward/mean": 0.5943080186843872, "rewards/simpleverify_reward/std": 0.4910939633846283, "step": 181 }, { "clip_ratio/high_max": 0.0014811085748078767, "clip_ratio/high_mean": 0.0006185811926116003, "clip_ratio/low_mean": 0.0005099131903989473, "clip_ratio/low_min": 1.6062709619291127e-05, "clip_ratio/region_mean": 0.0011284944175713463, "epoch": 1.7276967930029155, "grad_norm": 0.12047559767961502, "learning_rate": 1e-06, "loss": 0.0188, "step": 182 }, { "clip_ratio/high_max": 0.0018661204085219651, "clip_ratio/high_mean": 0.0007471864810213447, "clip_ratio/low_mean": 0.0005852505269103858, "clip_ratio/low_min": 4.3043788537033834e-05, "clip_ratio/region_mean": 0.0013324370156624354, "epoch": 1.7370262390670554, "grad_norm": 0.14119189977645874, "learning_rate": 1e-06, "loss": 0.0184, "step": 183 }, { "clip_ratio/high_max": 0.0015606831475452054, "clip_ratio/high_mean": 0.0007254519186972175, "clip_ratio/low_mean": 0.00045370148654910736, "clip_ratio/low_min": 2.6569828150968533e-05, "clip_ratio/region_mean": 0.0011791534270741977, "epoch": 1.7463556851311952, "grad_norm": 0.12161746621131897, "learning_rate": 1e-06, "loss": -0.0392, "step": 184 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.03125, "completions/max_length": 4096.0, "completions/max_terminated_length": 3988.0, "completions/mean_length": 708.2813110351562, "completions/mean_terminated_length": 599.0, "completions/min_length": 138.0, "completions/min_terminated_length": 138.0, "epoch": 1.7556851311953352, "grad_norm": 0.12788978219032288, "learning_rate": 1e-06, "loss": -0.0433, "num_tokens": 111486121.0, "reward": 0.59765625, "reward_std": 0.16989926993846893, "rewards/simpleverify_reward/mean": 0.59765625, "rewards/simpleverify_reward/std": 0.49043893814086914, "step": 185 }, { "clip_ratio/high_max": 0.0015723296637588646, "clip_ratio/high_mean": 0.0006500621584564215, "clip_ratio/low_mean": 0.0006356569501804188, "clip_ratio/low_min": 6.814106927777175e-05, "clip_ratio/region_mean": 0.0012857191104558297, "epoch": 1.7650145772594752, "grad_norm": 0.12230674922466278, "learning_rate": 1e-06, "loss": 0.0331, "step": 186 }, { "clip_ratio/high_max": 0.0015478206041734666, "clip_ratio/high_mean": 0.0005984209547023056, "clip_ratio/low_mean": 0.00057901639775082, "clip_ratio/low_min": 3.727308376255678e-05, "clip_ratio/region_mean": 0.0011774373560911044, "epoch": 1.7743440233236152, "grad_norm": 0.12837786972522736, "learning_rate": 1e-06, "loss": 0.0073, "step": 187 }, { "clip_ratio/high_max": 0.00176864864261006, "clip_ratio/high_mean": 0.0007771791097184177, "clip_ratio/low_mean": 0.0005722846381104318, "clip_ratio/low_min": 2.792048326227814e-05, "clip_ratio/region_mean": 0.0013494637278199662, "epoch": 1.7836734693877552, "grad_norm": 0.12429434806108475, "learning_rate": 1e-06, "loss": 0.008, "step": 188 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.030970982142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3973.0, "completions/mean_length": 692.9071044921875, "completions/mean_terminated_length": 584.1414184570312, "completions/min_length": 147.0, "completions/min_terminated_length": 147.0, "epoch": 1.7930029154518952, "grad_norm": 0.149130716919899, "learning_rate": 1e-06, "loss": 0.0111, "num_tokens": 113860828.0, "reward": 0.5834263563156128, "reward_std": 0.18149735033512115, "rewards/simpleverify_reward/mean": 0.5834263563156128, "rewards/simpleverify_reward/std": 0.4930597245693207, "step": 189 }, { "clip_ratio/high_max": 0.001561344131914666, "clip_ratio/high_mean": 0.0006195683026817278, "clip_ratio/low_mean": 0.0006000239682180109, "clip_ratio/low_min": 5.3205181757221e-05, "clip_ratio/region_mean": 0.0012195922863611486, "epoch": 1.802332361516035, "grad_norm": 0.1277225911617279, "learning_rate": 1e-06, "loss": 0.0039, "step": 190 }, { "clip_ratio/high_max": 0.0015845374582568184, "clip_ratio/high_mean": 0.0007021065066510346, "clip_ratio/low_mean": 0.0006249984962778399, "clip_ratio/low_min": 5.3568110160995275e-05, "clip_ratio/region_mean": 0.0013271050120238215, "epoch": 1.811661807580175, "grad_norm": 0.12438131868839264, "learning_rate": 1e-06, "loss": -0.0091, "step": 191 }, { "clip_ratio/high_max": 0.001738724451570306, "clip_ratio/high_mean": 0.0007851965365262004, "clip_ratio/low_mean": 0.0006543963500007521, "clip_ratio/low_min": 4.059447019244544e-05, "clip_ratio/region_mean": 0.0014395928701560479, "epoch": 1.820991253644315, "grad_norm": 0.13107848167419434, "learning_rate": 1e-06, "loss": -0.027, "step": 192 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.03515625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3850.0, "completions/mean_length": 717.4397583007812, "completions/mean_terminated_length": 594.3342895507812, "completions/min_length": 136.0, "completions/min_terminated_length": 136.0, "epoch": 1.8303206997084547, "grad_norm": 0.13714130222797394, "learning_rate": 1e-06, "loss": 0.0005, "num_tokens": 116275820.0, "reward": 0.5457589626312256, "reward_std": 0.19115445017814636, "rewards/simpleverify_reward/mean": 0.5457589030265808, "rewards/simpleverify_reward/std": 0.49797120690345764, "step": 193 }, { "clip_ratio/high_max": 0.0018565903847047593, "clip_ratio/high_mean": 0.0007656851903448114, "clip_ratio/low_mean": 0.0005296764875311055, "clip_ratio/low_min": 5.0935614126501605e-05, "clip_ratio/region_mean": 0.0012953616933373269, "epoch": 1.8396501457725947, "grad_norm": 0.13725395500659943, "learning_rate": 1e-06, "loss": -0.0392, "step": 194 }, { "clip_ratio/high_max": 0.0018346947472309694, "clip_ratio/high_mean": 0.0006727495656377869, "clip_ratio/low_mean": 0.0006248532672543661, "clip_ratio/low_min": 3.393262068129843e-05, "clip_ratio/region_mean": 0.0012976028701814357, "epoch": 1.8489795918367347, "grad_norm": 0.13091763854026794, "learning_rate": 1e-06, "loss": 0.0255, "step": 195 }, { "clip_ratio/high_max": 0.0015907511333352886, "clip_ratio/high_mean": 0.0006998878343438264, "clip_ratio/low_mean": 0.0006731574903824367, "clip_ratio/low_min": 4.0652675124874804e-05, "clip_ratio/region_mean": 0.001373045342916157, "epoch": 1.8583090379008746, "grad_norm": 0.1437220722436905, "learning_rate": 1e-06, "loss": 0.0339, "step": 196 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.030133928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3516.0, "completions/mean_length": 695.9361572265625, "completions/mean_terminated_length": 590.2954711914062, "completions/min_length": 119.0, "completions/min_terminated_length": 119.0, "epoch": 1.8676384839650146, "grad_norm": 0.15065987408161163, "learning_rate": 1e-06, "loss": -0.0024, "num_tokens": 118671567.0, "reward": 0.5954241156578064, "reward_std": 0.20062178373336792, "rewards/simpleverify_reward/mean": 0.5954241156578064, "rewards/simpleverify_reward/std": 0.49087825417518616, "step": 197 }, { "clip_ratio/high_max": 0.001950680551090045, "clip_ratio/high_mean": 0.0007641898901056265, "clip_ratio/low_mean": 0.000525693925737869, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001289883795834612, "epoch": 1.8769679300291546, "grad_norm": 0.1336461901664734, "learning_rate": 1e-06, "loss": -0.0042, "step": 198 }, { "clip_ratio/high_max": 0.0018980813038069755, "clip_ratio/high_mean": 0.0007862519414629787, "clip_ratio/low_mean": 0.0006269449986575637, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014131969037407544, "epoch": 1.8862973760932946, "grad_norm": 0.14158329367637634, "learning_rate": 1e-06, "loss": 0.0034, "step": 199 }, { "clip_ratio/high_max": 0.001594455876329448, "clip_ratio/high_mean": 0.0007565400810563006, "clip_ratio/low_mean": 0.0005681244783772854, "clip_ratio/low_min": 2.4059419047262054e-05, "clip_ratio/region_mean": 0.0013246645430626813, "epoch": 1.8956268221574344, "grad_norm": 0.1265958547592163, "learning_rate": 1e-06, "loss": -0.0027, "step": 200 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.02734375, "completions/max_length": 4096.0, "completions/max_terminated_length": 4078.0, "completions/mean_length": 703.5999145507812, "completions/mean_terminated_length": 608.231201171875, "completions/min_length": 112.0, "completions/min_terminated_length": 112.0, "epoch": 1.9049562682215744, "grad_norm": 0.13805460929870605, "learning_rate": 1e-06, "loss": -0.0625, "num_tokens": 121147701.0, "reward": 0.5817522406578064, "reward_std": 0.1962631642818451, "rewards/simpleverify_reward/mean": 0.5817522406578064, "rewards/simpleverify_reward/std": 0.4933401346206665, "step": 201 }, { "clip_ratio/high_max": 0.0018138790255761705, "clip_ratio/high_mean": 0.0007107004093995783, "clip_ratio/low_mean": 0.0005195626636123052, "clip_ratio/low_min": 2.9420039027172606e-05, "clip_ratio/region_mean": 0.0012302630384510849, "epoch": 1.9142857142857141, "grad_norm": 0.12585294246673584, "learning_rate": 1e-06, "loss": 0.0398, "step": 202 }, { "clip_ratio/high_max": 0.0015562682492600288, "clip_ratio/high_mean": 0.0006897802522871643, "clip_ratio/low_mean": 0.0007110645547072636, "clip_ratio/low_min": 9.128165766014718e-05, "clip_ratio/region_mean": 0.0014008447906235233, "epoch": 1.9236151603498541, "grad_norm": 0.12557384371757507, "learning_rate": 1e-06, "loss": 0.0215, "step": 203 }, { "clip_ratio/high_max": 0.0019606529676821083, "clip_ratio/high_mean": 0.0007750306294838083, "clip_ratio/low_mean": 0.000631870349025121, "clip_ratio/low_min": 1.8200349586550146e-05, "clip_ratio/region_mean": 0.0014069009812374134, "epoch": 1.9329446064139941, "grad_norm": 0.13822881877422333, "learning_rate": 1e-06, "loss": 0.0127, "step": 204 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0234375, "completions/max_length": 4096.0, "completions/max_terminated_length": 3889.0, "completions/mean_length": 668.6219482421875, "completions/mean_terminated_length": 586.3648681640625, "completions/min_length": 129.0, "completions/min_terminated_length": 129.0, "epoch": 1.9422740524781341, "grad_norm": 0.1455037146806717, "learning_rate": 1e-06, "loss": -0.0332, "num_tokens": 123548738.0, "reward": 0.6213728189468384, "reward_std": 0.19923119246959686, "rewards/simpleverify_reward/mean": 0.6213727593421936, "rewards/simpleverify_reward/std": 0.4851126968860626, "step": 205 }, { "clip_ratio/high_max": 0.002131566157913767, "clip_ratio/high_mean": 0.000842039160488639, "clip_ratio/low_mean": 0.0006112156179369777, "clip_ratio/low_min": 6.622447835979983e-05, "clip_ratio/region_mean": 0.001453254786611069, "epoch": 1.951603498542274, "grad_norm": 0.1438569873571396, "learning_rate": 1e-06, "loss": -0.0087, "step": 206 }, { "clip_ratio/high_max": 0.0019175705856468994, "clip_ratio/high_mean": 0.000828225742225186, "clip_ratio/low_mean": 0.0006762893153791083, "clip_ratio/low_min": 9.126752047450282e-06, "clip_ratio/region_mean": 0.0015045150285004638, "epoch": 1.960932944606414, "grad_norm": 0.13895376026630402, "learning_rate": 1e-06, "loss": 0.0218, "step": 207 }, { "clip_ratio/high_max": 0.0017563573055667803, "clip_ratio/high_mean": 0.0007487716611649375, "clip_ratio/low_mean": 0.0006586269228137098, "clip_ratio/low_min": 1.5838824765523896e-05, "clip_ratio/region_mean": 0.0014073985876166262, "epoch": 1.970262390670554, "grad_norm": 0.13894550502300262, "learning_rate": 1e-06, "loss": 0.0145, "step": 208 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.025111607142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3962.0, "completions/mean_length": 689.1981201171875, "completions/mean_terminated_length": 601.4441528320312, "completions/min_length": 83.0, "completions/min_terminated_length": 83.0, "epoch": 2.00932944606414, "grad_norm": 0.14317025244235992, "learning_rate": 1e-06, "loss": 0.0083, "num_tokens": 126013360.0, "reward": 0.5775669813156128, "reward_std": 0.1968931257724762, "rewards/simpleverify_reward/mean": 0.5775669813156128, "rewards/simpleverify_reward/std": 0.494015634059906, "step": 209 }, { "clip_ratio/high_max": 0.001550738059449941, "clip_ratio/high_mean": 0.0006912295739311958, "clip_ratio/low_mean": 0.0005871282378393516, "clip_ratio/low_min": 3.506639404804446e-05, "clip_ratio/region_mean": 0.0012783578531525563, "epoch": 2.01865889212828, "grad_norm": 0.13387776911258698, "learning_rate": 1e-06, "loss": 0.0015, "step": 210 }, { "clip_ratio/high_max": 0.0020829075438086875, "clip_ratio/high_mean": 0.0007961640149005689, "clip_ratio/low_mean": 0.0006532982479257043, "clip_ratio/low_min": 4.993517450202489e-05, "clip_ratio/region_mean": 0.001449462266464252, "epoch": 2.02798833819242, "grad_norm": 0.13327153027057648, "learning_rate": 1e-06, "loss": -0.0265, "step": 211 }, { "clip_ratio/high_max": 0.0019816491039819084, "clip_ratio/high_mean": 0.0007523557342210552, "clip_ratio/low_mean": 0.0006888250009069452, "clip_ratio/low_min": 5.865732418897096e-05, "clip_ratio/region_mean": 0.0014411807169381063, "epoch": 2.03731778425656, "grad_norm": 0.1390068382024765, "learning_rate": 1e-06, "loss": 0.0237, "step": 212 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.027064732142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3862.0, "completions/mean_length": 686.2637329101562, "completions/mean_terminated_length": 591.4129638671875, "completions/min_length": 131.0, "completions/min_terminated_length": 131.0, "epoch": 2.0466472303206995, "grad_norm": 0.14716660976409912, "learning_rate": 1e-06, "loss": -0.0253, "num_tokens": 128437057.0, "reward": 0.6068638563156128, "reward_std": 0.1875499039888382, "rewards/simpleverify_reward/mean": 0.6068638563156128, "rewards/simpleverify_reward/std": 0.48851478099823, "step": 213 }, { "clip_ratio/high_max": 0.0020143745314271655, "clip_ratio/high_mean": 0.0007835138403606834, "clip_ratio/low_mean": 0.0005380199963838095, "clip_ratio/low_min": 1.4521375305776019e-05, "clip_ratio/region_mean": 0.0013215338294685353, "epoch": 2.0559766763848395, "grad_norm": 0.1282065212726593, "learning_rate": 1e-06, "loss": 0.0439, "step": 214 }, { "clip_ratio/high_max": 0.0018602317250042688, "clip_ratio/high_mean": 0.000792966699009412, "clip_ratio/low_mean": 0.0005655530549120158, "clip_ratio/low_min": 1.725565925880801e-05, "clip_ratio/region_mean": 0.0013585197484644596, "epoch": 2.0653061224489795, "grad_norm": 0.1371399313211441, "learning_rate": 1e-06, "loss": 0.0011, "step": 215 }, { "clip_ratio/high_max": 0.001978111577045638, "clip_ratio/high_mean": 0.000836518327560043, "clip_ratio/low_mean": 0.0006739635018675472, "clip_ratio/low_min": 3.91881894756807e-05, "clip_ratio/region_mean": 0.0015104818157851696, "epoch": 2.0746355685131195, "grad_norm": 0.14824816584587097, "learning_rate": 1e-06, "loss": -0.007, "step": 216 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.030412946428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 4047.0, "completions/mean_length": 705.4707641601562, "completions/mean_terminated_length": 599.1202392578125, "completions/min_length": 109.0, "completions/min_terminated_length": 109.0, "epoch": 2.0839650145772595, "grad_norm": 0.12516848742961884, "learning_rate": 1e-06, "loss": 0.0063, "num_tokens": 130896080.0, "reward": 0.6065848469734192, "reward_std": 0.17559024691581726, "rewards/simpleverify_reward/mean": 0.6065848469734192, "rewards/simpleverify_reward/std": 0.4885757565498352, "step": 217 }, { "clip_ratio/high_max": 0.0016527549087186344, "clip_ratio/high_mean": 0.0007704099480179138, "clip_ratio/low_mean": 0.0005447022658700007, "clip_ratio/low_min": 2.095397485391004e-05, "clip_ratio/region_mean": 0.001315112218435388, "epoch": 2.0932944606413995, "grad_norm": 0.13521195948123932, "learning_rate": 1e-06, "loss": -0.0098, "step": 218 }, { "clip_ratio/high_max": 0.0016879313552635722, "clip_ratio/high_mean": 0.0006502251071651699, "clip_ratio/low_mean": 0.0005313638930601883, "clip_ratio/low_min": 1.3845812645740807e-05, "clip_ratio/region_mean": 0.0011815889956778847, "epoch": 2.1026239067055394, "grad_norm": 0.12559908628463745, "learning_rate": 1e-06, "loss": 0.0249, "step": 219 }, { "clip_ratio/high_max": 0.0016857977243489586, "clip_ratio/high_mean": 0.0006662545238214079, "clip_ratio/low_mean": 0.0006408788494809414, "clip_ratio/low_min": 5.082922325527761e-05, "clip_ratio/region_mean": 0.001307133337832056, "epoch": 2.1119533527696794, "grad_norm": 0.12447912245988846, "learning_rate": 1e-06, "loss": -0.0209, "step": 220 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0326450892857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 4094.0, "completions/mean_length": 710.2179565429688, "completions/mean_terminated_length": 595.958740234375, "completions/min_length": 93.0, "completions/min_terminated_length": 93.0, "epoch": 2.1212827988338194, "grad_norm": 0.11906172335147858, "learning_rate": 1e-06, "loss": 0.0198, "num_tokens": 133313957.0, "reward": 0.6141183376312256, "reward_std": 0.1725366711616516, "rewards/simpleverify_reward/mean": 0.6141182780265808, "rewards/simpleverify_reward/std": 0.48687073588371277, "step": 221 }, { "clip_ratio/high_max": 0.0017351251881336793, "clip_ratio/high_mean": 0.0007155742532631848, "clip_ratio/low_mean": 0.000526511616044445, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012420859056874178, "epoch": 2.130612244897959, "grad_norm": 0.13691619038581848, "learning_rate": 1e-06, "loss": -0.0206, "step": 222 }, { "clip_ratio/high_max": 0.001887709047878161, "clip_ratio/high_mean": 0.0006971393413550686, "clip_ratio/low_mean": 0.00048776711719256127, "clip_ratio/low_min": 1.6288768165395595e-05, "clip_ratio/region_mean": 0.0011849064721900504, "epoch": 2.139941690962099, "grad_norm": 0.14392124116420746, "learning_rate": 1e-06, "loss": 0.0047, "step": 223 }, { "clip_ratio/high_max": 0.0018709252653934527, "clip_ratio/high_mean": 0.0007758366900816327, "clip_ratio/low_mean": 0.0005251107568255975, "clip_ratio/low_min": 9.917486750055104e-06, "clip_ratio/region_mean": 0.001300947453273693, "epoch": 2.149271137026239, "grad_norm": 0.13197143375873566, "learning_rate": 1e-06, "loss": -0.0072, "step": 224 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0306919642857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 4075.0, "completions/mean_length": 706.8287353515625, "completions/mean_terminated_length": 599.5147094726562, "completions/min_length": 126.0, "completions/min_terminated_length": 126.0, "epoch": 2.158600583090379, "grad_norm": 0.1391644924879074, "learning_rate": 1e-06, "loss": -0.048, "num_tokens": 135775311.0, "reward": 0.5641741156578064, "reward_std": 0.1787848174571991, "rewards/simpleverify_reward/mean": 0.5641741156578064, "rewards/simpleverify_reward/std": 0.49593377113342285, "step": 225 }, { "clip_ratio/high_max": 0.001497692006523721, "clip_ratio/high_mean": 0.0005408432357398851, "clip_ratio/low_mean": 0.0005517146546480944, "clip_ratio/low_min": 1.0217427188763395e-05, "clip_ratio/region_mean": 0.0010925578899332322, "epoch": 2.167930029154519, "grad_norm": 0.13067105412483215, "learning_rate": 1e-06, "loss": 0.0401, "step": 226 }, { "clip_ratio/high_max": 0.0016252570349024609, "clip_ratio/high_mean": 0.0006950455917831277, "clip_ratio/low_mean": 0.0005974833820800995, "clip_ratio/low_min": 1.5830801203264855e-05, "clip_ratio/region_mean": 0.0012925289884151425, "epoch": 2.177259475218659, "grad_norm": 0.13222157955169678, "learning_rate": 1e-06, "loss": -0.009, "step": 227 }, { "clip_ratio/high_max": 0.001957140524609713, "clip_ratio/high_mean": 0.0007622112570970785, "clip_ratio/low_mean": 0.0006471719652836327, "clip_ratio/low_min": 3.56810296580079e-05, "clip_ratio/region_mean": 0.0014093832360231318, "epoch": 2.186588921282799, "grad_norm": 0.13028211891651154, "learning_rate": 1e-06, "loss": 0.0205, "step": 228 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.025669642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 4092.0, "completions/mean_length": 651.9213256835938, "completions/mean_terminated_length": 561.1838989257812, "completions/min_length": 123.0, "completions/min_terminated_length": 123.0, "epoch": 2.195918367346939, "grad_norm": 0.1332274228334427, "learning_rate": 1e-06, "loss": 0.0143, "num_tokens": 138076837.0, "reward": 0.599609375, "reward_std": 0.1792817860841751, "rewards/simpleverify_reward/mean": 0.599609375, "rewards/simpleverify_reward/std": 0.4900458753108978, "step": 229 }, { "clip_ratio/high_max": 0.0019169309707649518, "clip_ratio/high_mean": 0.0006868549589853501, "clip_ratio/low_mean": 0.0006439608987420797, "clip_ratio/low_min": 4.774453191203065e-05, "clip_ratio/region_mean": 0.0013308158595464192, "epoch": 2.205247813411079, "grad_norm": 0.13651886582374573, "learning_rate": 1e-06, "loss": 0.0398, "step": 230 }, { "clip_ratio/high_max": 0.0019190955499652773, "clip_ratio/high_mean": 0.0007637269536644453, "clip_ratio/low_mean": 0.0005183395019230375, "clip_ratio/low_min": 2.524416322557954e-05, "clip_ratio/region_mean": 0.0012820664633181877, "epoch": 2.2145772594752184, "grad_norm": 0.14699022471904755, "learning_rate": 1e-06, "loss": -0.065, "step": 231 }, { "clip_ratio/high_max": 0.0018027436744887382, "clip_ratio/high_mean": 0.0007715809297224041, "clip_ratio/low_mean": 0.0005592743509623688, "clip_ratio/low_min": 1.8751874449662864e-05, "clip_ratio/region_mean": 0.0013308552661328577, "epoch": 2.2239067055393584, "grad_norm": 0.1339794397354126, "learning_rate": 1e-06, "loss": 0.0146, "step": 232 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0326450892857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 4056.0, "completions/mean_length": 694.6426391601562, "completions/mean_terminated_length": 579.8577880859375, "completions/min_length": 124.0, "completions/min_terminated_length": 124.0, "epoch": 2.2332361516034984, "grad_norm": 0.140318363904953, "learning_rate": 1e-06, "loss": -0.0465, "num_tokens": 140421988.0, "reward": 0.6484375, "reward_std": 0.16836698353290558, "rewards/simpleverify_reward/mean": 0.6484375, "rewards/simpleverify_reward/std": 0.4775247871875763, "step": 233 }, { "clip_ratio/high_max": 0.001967182852240512, "clip_ratio/high_mean": 0.000825451736091054, "clip_ratio/low_mean": 0.0003838607522084203, "clip_ratio/low_min": 3.795394331973512e-05, "clip_ratio/region_mean": 0.0012093124751118012, "epoch": 2.2425655976676384, "grad_norm": 0.13928529620170593, "learning_rate": 1e-06, "loss": 0.0227, "step": 234 }, { "clip_ratio/high_max": 0.0020862702949671075, "clip_ratio/high_mean": 0.0007130650446924847, "clip_ratio/low_mean": 0.0005034266805523657, "clip_ratio/low_min": 2.0791749193449505e-05, "clip_ratio/region_mean": 0.0012164917025074828, "epoch": 2.2518950437317784, "grad_norm": 0.16520488262176514, "learning_rate": 1e-06, "loss": 0.0075, "step": 235 }, { "clip_ratio/high_max": 0.0018216893477074336, "clip_ratio/high_mean": 0.0007605003584103542, "clip_ratio/low_mean": 0.0005159388401807519, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012764391758537386, "epoch": 2.2612244897959184, "grad_norm": 0.12037136405706406, "learning_rate": 1e-06, "loss": 0.0151, "step": 236 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.031529017857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3954.0, "completions/mean_length": 685.5625610351562, "completions/mean_terminated_length": 574.5341186523438, "completions/min_length": 136.0, "completions/min_terminated_length": 136.0, "epoch": 2.2705539358600584, "grad_norm": 0.1411719024181366, "learning_rate": 1e-06, "loss": 0.0118, "num_tokens": 142755164.0, "reward": 0.591796875, "reward_std": 0.17373596131801605, "rewards/simpleverify_reward/mean": 0.591796875, "rewards/simpleverify_reward/std": 0.4915696680545807, "step": 237 }, { "clip_ratio/high_max": 0.0019108576379949227, "clip_ratio/high_mean": 0.000685631726810243, "clip_ratio/low_mean": 0.0006459067635660176, "clip_ratio/low_min": 5.2119856263743713e-05, "clip_ratio/region_mean": 0.0013315384821908083, "epoch": 2.2798833819241984, "grad_norm": 0.12460934370756149, "learning_rate": 1e-06, "loss": 0.0208, "step": 238 }, { "clip_ratio/high_max": 0.0016393245205108542, "clip_ratio/high_mean": 0.0006713488855893956, "clip_ratio/low_mean": 0.000597772532273666, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012691214215010405, "epoch": 2.2892128279883384, "grad_norm": 0.1892368346452713, "learning_rate": 1e-06, "loss": -0.0063, "step": 239 }, { "clip_ratio/high_max": 0.0015967985746101476, "clip_ratio/high_mean": 0.0006054456353012938, "clip_ratio/low_mean": 0.0005845423447681242, "clip_ratio/low_min": 2.2170983356772922e-05, "clip_ratio/region_mean": 0.0011899879609700292, "epoch": 2.298542274052478, "grad_norm": 0.13367941975593567, "learning_rate": 1e-06, "loss": -0.0034, "step": 240 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0320870535714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3799.0, "completions/mean_length": 699.443115234375, "completions/mean_terminated_length": 586.8446044921875, "completions/min_length": 115.0, "completions/min_terminated_length": 115.0, "epoch": 2.307871720116618, "grad_norm": 0.14915698766708374, "learning_rate": 1e-06, "loss": -0.0021, "num_tokens": 145151336.0, "reward": 0.606026828289032, "reward_std": 0.17665758728981018, "rewards/simpleverify_reward/mean": 0.6060267686843872, "rewards/simpleverify_reward/std": 0.48869720101356506, "step": 241 }, { "clip_ratio/high_max": 0.0016891596969799139, "clip_ratio/high_mean": 0.0006504538796434645, "clip_ratio/low_mean": 0.000522457858096459, "clip_ratio/low_min": 9.56681469688192e-06, "clip_ratio/region_mean": 0.0011729117431968916, "epoch": 2.317201166180758, "grad_norm": 0.13041719794273376, "learning_rate": 1e-06, "loss": 0.015, "step": 242 }, { "clip_ratio/high_max": 0.0020396548134158365, "clip_ratio/high_mean": 0.0007865351199143333, "clip_ratio/low_mean": 0.0005414672568804235, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013280023777042516, "epoch": 2.326530612244898, "grad_norm": 0.13124321401119232, "learning_rate": 1e-06, "loss": 0.0115, "step": 243 }, { "clip_ratio/high_max": 0.0017679176489764359, "clip_ratio/high_mean": 0.0007808800146449357, "clip_ratio/low_mean": 0.00048723934196459595, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012681193475145847, "epoch": 2.335860058309038, "grad_norm": 0.13256074488162994, "learning_rate": 1e-06, "loss": -0.0364, "step": 244 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.037388392857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 4082.0, "completions/mean_length": 713.2826538085938, "completions/mean_terminated_length": 581.8959350585938, "completions/min_length": 135.0, "completions/min_terminated_length": 135.0, "epoch": 2.345189504373178, "grad_norm": 0.13806745409965515, "learning_rate": 1e-06, "loss": 0.0026, "num_tokens": 147503637.0, "reward": 0.5970982313156128, "reward_std": 0.1897384375333786, "rewards/simpleverify_reward/mean": 0.5970982313156128, "rewards/simpleverify_reward/std": 0.490549772977829, "step": 245 }, { "clip_ratio/high_max": 0.0019273000762041193, "clip_ratio/high_mean": 0.0007915619462437462, "clip_ratio/low_mean": 0.0005102166128381214, "clip_ratio/low_min": 1.332338524662191e-05, "clip_ratio/region_mean": 0.0013017785604461096, "epoch": 2.354518950437318, "grad_norm": 0.14389874041080475, "learning_rate": 1e-06, "loss": -0.0011, "step": 246 }, { "clip_ratio/high_max": 0.002268379375891527, "clip_ratio/high_mean": 0.0008088488284556661, "clip_ratio/low_mean": 0.0006946353623789037, "clip_ratio/low_min": 3.291415669082198e-05, "clip_ratio/region_mean": 0.0015034841853776015, "epoch": 2.363848396501458, "grad_norm": 0.14421573281288147, "learning_rate": 1e-06, "loss": 0.0017, "step": 247 }, { "clip_ratio/high_max": 0.0018638305773492903, "clip_ratio/high_mean": 0.0007484372654289473, "clip_ratio/low_mean": 0.0005733895777666476, "clip_ratio/low_min": 1.201923078042455e-05, "clip_ratio/region_mean": 0.0013218268431955948, "epoch": 2.373177842565598, "grad_norm": 0.1331528127193451, "learning_rate": 1e-06, "loss": -0.0222, "step": 248 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.029575892857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 4088.0, "completions/mean_length": 687.2070922851562, "completions/mean_terminated_length": 583.3162841796875, "completions/min_length": 118.0, "completions/min_terminated_length": 118.0, "epoch": 2.3825072886297374, "grad_norm": 0.13363593816757202, "learning_rate": 1e-06, "loss": 0.0266, "num_tokens": 149884555.0, "reward": 0.6163504719734192, "reward_std": 0.1701686829328537, "rewards/simpleverify_reward/mean": 0.6163504719734192, "rewards/simpleverify_reward/std": 0.48634201288223267, "step": 249 }, { "clip_ratio/high_max": 0.0018454498294886434, "clip_ratio/high_mean": 0.000715472747288004, "clip_ratio/low_mean": 0.00045781824837831664, "clip_ratio/low_min": 1.2259709365025628e-05, "clip_ratio/region_mean": 0.0011732909952115733, "epoch": 2.3918367346938774, "grad_norm": 0.12076021730899811, "learning_rate": 1e-06, "loss": -0.0191, "step": 250 }, { "clip_ratio/high_max": 0.0018065480144286994, "clip_ratio/high_mean": 0.0006820464641350554, "clip_ratio/low_mean": 0.0004961704846664361, "clip_ratio/low_min": 1.4497796655632555e-05, "clip_ratio/region_mean": 0.0011782169367506867, "epoch": 2.4011661807580174, "grad_norm": 0.1459232121706009, "learning_rate": 1e-06, "loss": -0.006, "step": 251 }, { "clip_ratio/high_max": 0.0019437461596680805, "clip_ratio/high_mean": 0.0008087216647254536, "clip_ratio/low_mean": 0.0005041611780143285, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013128828395565506, "epoch": 2.4104956268221573, "grad_norm": 0.13260768353939056, "learning_rate": 1e-06, "loss": -0.0332, "step": 252 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0357142857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 4078.0, "completions/mean_length": 707.5767822265625, "completions/mean_terminated_length": 582.07958984375, "completions/min_length": 89.0, "completions/min_terminated_length": 89.0, "epoch": 2.4198250728862973, "grad_norm": 0.13935868442058563, "learning_rate": 1e-06, "loss": -0.0308, "num_tokens": 152263510.0, "reward": 0.5920759439468384, "reward_std": 0.1797669678926468, "rewards/simpleverify_reward/mean": 0.5920758843421936, "rewards/simpleverify_reward/std": 0.49151748418807983, "step": 253 }, { "clip_ratio/high_max": 0.0018102641697623767, "clip_ratio/high_mean": 0.0007259311423695181, "clip_ratio/low_mean": 0.00043456197636260185, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001160493105999194, "epoch": 2.4291545189504373, "grad_norm": 0.3038552403450012, "learning_rate": 1e-06, "loss": -0.0044, "step": 254 }, { "clip_ratio/high_max": 0.0016059820954978932, "clip_ratio/high_mean": 0.0006561253667314304, "clip_ratio/low_mean": 0.0005742549419665011, "clip_ratio/low_min": 1.1180679393874016e-05, "clip_ratio/region_mean": 0.0012303803268878255, "epoch": 2.4384839650145773, "grad_norm": 0.1411878764629364, "learning_rate": 1e-06, "loss": -0.0013, "step": 255 }, { "clip_ratio/high_max": 0.0014641743618994951, "clip_ratio/high_mean": 0.0006142590464150999, "clip_ratio/low_mean": 0.0006670611223853484, "clip_ratio/low_min": 7.736728275631322e-05, "clip_ratio/region_mean": 0.0012813201319659129, "epoch": 2.4478134110787173, "grad_norm": 0.13271407783031464, "learning_rate": 1e-06, "loss": 0.0397, "step": 256 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0357142857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 4027.0, "completions/mean_length": 700.7960815429688, "completions/mean_terminated_length": 575.0477294921875, "completions/min_length": 58.0, "completions/min_terminated_length": 58.0, "epoch": 2.4571428571428573, "grad_norm": 0.1505352407693863, "learning_rate": 1e-06, "loss": -0.0031, "num_tokens": 154602003.0, "reward": 0.6143973469734192, "reward_std": 0.17766490578651428, "rewards/simpleverify_reward/mean": 0.6143973469734192, "rewards/simpleverify_reward/std": 0.486805260181427, "step": 257 }, { "clip_ratio/high_max": 0.001881261636299314, "clip_ratio/high_mean": 0.0006120816160546383, "clip_ratio/low_mean": 0.0005930551633355208, "clip_ratio/low_min": 2.952686099888524e-05, "clip_ratio/region_mean": 0.0012051367593812756, "epoch": 2.466472303206997, "grad_norm": 0.13187003135681152, "learning_rate": 1e-06, "loss": 0.025, "step": 258 }, { "clip_ratio/high_max": 0.00179633514926536, "clip_ratio/high_mean": 0.0007189757325249957, "clip_ratio/low_mean": 0.0004956189868607908, "clip_ratio/low_min": 3.280839882791042e-05, "clip_ratio/region_mean": 0.0012145947184762917, "epoch": 2.4758017492711373, "grad_norm": 0.13737772405147552, "learning_rate": 1e-06, "loss": -0.0234, "step": 259 }, { "clip_ratio/high_max": 0.002048524111160077, "clip_ratio/high_mean": 0.0008120604215946514, "clip_ratio/low_mean": 0.0006704306299525342, "clip_ratio/low_min": 5.973153565719258e-05, "clip_ratio/region_mean": 0.0014824910867901053, "epoch": 2.485131195335277, "grad_norm": 0.13670259714126587, "learning_rate": 1e-06, "loss": -0.0054, "step": 260 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0323660714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 4066.0, "completions/mean_length": 717.0047607421875, "completions/mean_terminated_length": 603.9818115234375, "completions/min_length": 101.0, "completions/min_terminated_length": 101.0, "epoch": 2.494460641399417, "grad_norm": 0.13709160685539246, "learning_rate": 1e-06, "loss": -0.0024, "num_tokens": 157060412.0, "reward": 0.6077009439468384, "reward_std": 0.17427226901054382, "rewards/simpleverify_reward/mean": 0.6077008843421936, "rewards/simpleverify_reward/std": 0.4883309006690979, "step": 261 }, { "clip_ratio/high_max": 0.0016090473436634056, "clip_ratio/high_mean": 0.0005979425932309823, "clip_ratio/low_mean": 0.0006238093665160704, "clip_ratio/low_min": 1.4799905329709873e-05, "clip_ratio/region_mean": 0.0012217519724799786, "epoch": 2.503790087463557, "grad_norm": 0.12443902343511581, "learning_rate": 1e-06, "loss": 0.0072, "step": 262 }, { "clip_ratio/high_max": 0.0016634313069516793, "clip_ratio/high_mean": 0.0007026230123301502, "clip_ratio/low_mean": 0.0005625616377074039, "clip_ratio/low_min": 2.1605885194730945e-05, "clip_ratio/region_mean": 0.0012651846518565435, "epoch": 2.513119533527697, "grad_norm": 0.12987031042575836, "learning_rate": 1e-06, "loss": 0.0046, "step": 263 }, { "clip_ratio/high_max": 0.001642533425183501, "clip_ratio/high_mean": 0.0007387746200038237, "clip_ratio/low_mean": 0.0005474968506860023, "clip_ratio/low_min": 1.0654619472916238e-05, "clip_ratio/region_mean": 0.0012862714647781104, "epoch": 2.522448979591837, "grad_norm": 0.13111330568790436, "learning_rate": 1e-06, "loss": -0.0644, "step": 264 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.036830357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3834.0, "completions/mean_length": 756.5293579101562, "completions/mean_terminated_length": 628.832275390625, "completions/min_length": 115.0, "completions/min_terminated_length": 115.0, "epoch": 2.5317784256559768, "grad_norm": 0.13569842278957367, "learning_rate": 1e-06, "loss": -0.0345, "num_tokens": 159604173.0, "reward": 0.5792410969734192, "reward_std": 0.1794581562280655, "rewards/simpleverify_reward/mean": 0.5792410969734192, "rewards/simpleverify_reward/std": 0.49374979734420776, "step": 265 }, { "clip_ratio/high_max": 0.0015425263227371033, "clip_ratio/high_mean": 0.0006109362202550983, "clip_ratio/low_mean": 0.000575659544665541, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001186595756735187, "epoch": 2.5411078717201168, "grad_norm": 0.140055850148201, "learning_rate": 1e-06, "loss": 0.0346, "step": 266 }, { "clip_ratio/high_max": 0.0020319424838817213, "clip_ratio/high_mean": 0.0007604754755448084, "clip_ratio/low_mean": 0.0005861783329237369, "clip_ratio/low_min": 1.2977574442629702e-05, "clip_ratio/region_mean": 0.0013466538221109658, "epoch": 2.5504373177842563, "grad_norm": 0.14293383061885834, "learning_rate": 1e-06, "loss": -0.0009, "step": 267 }, { "clip_ratio/high_max": 0.0018645824020495638, "clip_ratio/high_mean": 0.0007235816374304704, "clip_ratio/low_mean": 0.0005217122379690409, "clip_ratio/low_min": 1.305619389313506e-05, "clip_ratio/region_mean": 0.0012452938790374901, "epoch": 2.5597667638483967, "grad_norm": 0.12288478761911392, "learning_rate": 1e-06, "loss": -0.0024, "step": 268 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.041294642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3979.0, "completions/mean_length": 738.6724853515625, "completions/mean_terminated_length": 594.0610961914062, "completions/min_length": 121.0, "completions/min_terminated_length": 121.0, "epoch": 2.5690962099125363, "grad_norm": 0.14716821908950806, "learning_rate": 1e-06, "loss": -0.0137, "num_tokens": 162009247.0, "reward": 0.582589328289032, "reward_std": 0.17836789786815643, "rewards/simpleverify_reward/mean": 0.5825892686843872, "rewards/simpleverify_reward/std": 0.4932006299495697, "step": 269 }, { "clip_ratio/high_max": 0.0016824276935949456, "clip_ratio/high_mean": 0.0006743534158886177, "clip_ratio/low_mean": 0.000561476893381041, "clip_ratio/low_min": 2.0370686797832604e-05, "clip_ratio/region_mean": 0.0012358303247310687, "epoch": 2.5784256559766763, "grad_norm": 0.14416323602199554, "learning_rate": 1e-06, "loss": 0.0051, "step": 270 }, { "clip_ratio/high_max": 0.0022601371820201166, "clip_ratio/high_mean": 0.000789731997429044, "clip_ratio/low_mean": 0.0005636896976284334, "clip_ratio/low_min": 2.54065034823725e-05, "clip_ratio/region_mean": 0.0013534217287087813, "epoch": 2.5877551020408163, "grad_norm": 0.14307400584220886, "learning_rate": 1e-06, "loss": -0.011, "step": 271 }, { "clip_ratio/high_max": 0.0018031828913080972, "clip_ratio/high_mean": 0.0006963682262721704, "clip_ratio/low_mean": 0.0005895868640664048, "clip_ratio/low_min": 1.0757315067166928e-05, "clip_ratio/region_mean": 0.0012859550770372152, "epoch": 2.5970845481049563, "grad_norm": 0.14521323144435883, "learning_rate": 1e-06, "loss": 0.0105, "step": 272 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.036830357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3750.0, "completions/mean_length": 689.6512451171875, "completions/mean_terminated_length": 559.3968505859375, "completions/min_length": 126.0, "completions/min_terminated_length": 126.0, "epoch": 2.6064139941690962, "grad_norm": 0.1569003015756607, "learning_rate": 1e-06, "loss": -0.0083, "num_tokens": 164277277.0, "reward": 0.6428571939468384, "reward_std": 0.18134817481040955, "rewards/simpleverify_reward/mean": 0.6428571343421936, "rewards/simpleverify_reward/std": 0.4792242646217346, "step": 273 }, { "clip_ratio/high_max": 0.0016547233281016815, "clip_ratio/high_mean": 0.0006964638869249029, "clip_ratio/low_mean": 0.0005364117569115479, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012328756456554402, "epoch": 2.6157434402332362, "grad_norm": 0.1560385674238205, "learning_rate": 1e-06, "loss": -0.0175, "step": 274 }, { "clip_ratio/high_max": 0.0019389915505598765, "clip_ratio/high_mean": 0.0008736599338590167, "clip_ratio/low_mean": 0.0005177711718715727, "clip_ratio/low_min": 3.646842833404662e-05, "clip_ratio/region_mean": 0.001391431113006547, "epoch": 2.6250728862973762, "grad_norm": 0.13099277019500732, "learning_rate": 1e-06, "loss": -0.0222, "step": 275 }, { "clip_ratio/high_max": 0.0017126097081927583, "clip_ratio/high_mean": 0.0007777880236972123, "clip_ratio/low_mean": 0.000543027943422203, "clip_ratio/low_min": 2.915318418672541e-05, "clip_ratio/region_mean": 0.0013208159871282987, "epoch": 2.6344023323615158, "grad_norm": 0.13601262867450714, "learning_rate": 1e-06, "loss": -0.0039, "step": 276 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.041015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3622.0, "completions/mean_length": 730.8733520507812, "completions/mean_terminated_length": 586.9473266601562, "completions/min_length": 96.0, "completions/min_terminated_length": 96.0, "epoch": 2.643731778425656, "grad_norm": 0.13918153941631317, "learning_rate": 1e-06, "loss": 0.0257, "num_tokens": 166648911.0, "reward": 0.6043527126312256, "reward_std": 0.17157739400863647, "rewards/simpleverify_reward/mean": 0.6043526530265808, "rewards/simpleverify_reward/std": 0.4890575110912323, "step": 277 }, { "clip_ratio/high_max": 0.001886399113573134, "clip_ratio/high_mean": 0.0007176992694439832, "clip_ratio/low_mean": 0.0004927544605379808, "clip_ratio/low_min": 1.279688785871258e-05, "clip_ratio/region_mean": 0.0012104537272534799, "epoch": 2.6530612244897958, "grad_norm": 0.13324692845344543, "learning_rate": 1e-06, "loss": -0.0008, "step": 278 }, { "clip_ratio/high_max": 0.0020507166773313656, "clip_ratio/high_mean": 0.0007854408313505701, "clip_ratio/low_mean": 0.0004666435485205511, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.00125208437020774, "epoch": 2.6623906705539357, "grad_norm": 0.14460821449756622, "learning_rate": 1e-06, "loss": -0.0188, "step": 279 }, { "clip_ratio/high_max": 0.0017408446219633333, "clip_ratio/high_mean": 0.0007421633872581879, "clip_ratio/low_mean": 0.0005106907028675778, "clip_ratio/low_min": 1.306438116444042e-05, "clip_ratio/region_mean": 0.001252854064659914, "epoch": 2.6717201166180757, "grad_norm": 0.13736505806446075, "learning_rate": 1e-06, "loss": -0.0371, "step": 280 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.034877232142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 4082.0, "completions/mean_length": 725.0527954101562, "completions/mean_terminated_length": 603.2347412109375, "completions/min_length": 102.0, "completions/min_terminated_length": 102.0, "epoch": 2.6810495626822157, "grad_norm": 0.1457400768995285, "learning_rate": 1e-06, "loss": 0.0212, "num_tokens": 169086700.0, "reward": 0.5758928656578064, "reward_std": 0.18104737997055054, "rewards/simpleverify_reward/mean": 0.5758928656578064, "rewards/simpleverify_reward/std": 0.49427565932273865, "step": 281 }, { "clip_ratio/high_max": 0.0013603413899545558, "clip_ratio/high_mean": 0.0005887462143618905, "clip_ratio/low_mean": 0.0006238162332010688, "clip_ratio/low_min": 2.4967576791823376e-05, "clip_ratio/region_mean": 0.001212562434375286, "epoch": 2.6903790087463557, "grad_norm": 0.14514592289924622, "learning_rate": 1e-06, "loss": 0.0085, "step": 282 }, { "clip_ratio/high_max": 0.0013932319161540363, "clip_ratio/high_mean": 0.0005945542961853789, "clip_ratio/low_mean": 0.000566529492061818, "clip_ratio/low_min": 3.9096401451388374e-05, "clip_ratio/region_mean": 0.0011610837864282075, "epoch": 2.6997084548104957, "grad_norm": 0.13132494688034058, "learning_rate": 1e-06, "loss": -0.0465, "step": 283 }, { "clip_ratio/high_max": 0.0016123675304697827, "clip_ratio/high_mean": 0.0006615416750719305, "clip_ratio/low_mean": 0.0006013903821440181, "clip_ratio/low_min": 2.8901733458042145e-05, "clip_ratio/region_mean": 0.00126293202265515, "epoch": 2.7090379008746357, "grad_norm": 0.15348774194717407, "learning_rate": 1e-06, "loss": -0.013, "step": 284 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0401785714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 4018.0, "completions/mean_length": 741.7824096679688, "completions/mean_terminated_length": 601.3732299804688, "completions/min_length": 107.0, "completions/min_terminated_length": 107.0, "epoch": 2.7183673469387752, "grad_norm": 0.16055156290531158, "learning_rate": 1e-06, "loss": 0.0127, "num_tokens": 171539880.0, "reward": 0.5873326063156128, "reward_std": 0.17814555764198303, "rewards/simpleverify_reward/mean": 0.5873326063156128, "rewards/simpleverify_reward/std": 0.49238264560699463, "step": 285 }, { "clip_ratio/high_max": 0.0015037911871331744, "clip_ratio/high_mean": 0.000601670970354462, "clip_ratio/low_mean": 0.0006212438565853518, "clip_ratio/low_min": 4.40897056250833e-05, "clip_ratio/region_mean": 0.0012229148123878986, "epoch": 2.7276967930029157, "grad_norm": 0.1396874189376831, "learning_rate": 1e-06, "loss": -0.0047, "step": 286 }, { "clip_ratio/high_max": 0.0019136914670525584, "clip_ratio/high_mean": 0.0007204629146144725, "clip_ratio/low_mean": 0.0005796293235107441, "clip_ratio/low_min": 3.0060630706429947e-05, "clip_ratio/region_mean": 0.0013000922444916796, "epoch": 2.7370262390670552, "grad_norm": 0.14242860674858093, "learning_rate": 1e-06, "loss": -0.0307, "step": 287 }, { "clip_ratio/high_max": 0.0017625787077122368, "clip_ratio/high_mean": 0.000634691060440673, "clip_ratio/low_mean": 0.0005519612486750702, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011866523163917009, "epoch": 2.746355685131195, "grad_norm": 0.15416951477527618, "learning_rate": 1e-06, "loss": 0.0172, "step": 288 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.033203125, "completions/max_length": 4096.0, "completions/max_terminated_length": 4067.0, "completions/mean_length": 707.8111572265625, "completions/mean_terminated_length": 591.4490356445312, "completions/min_length": 119.0, "completions/min_terminated_length": 119.0, "epoch": 2.755685131195335, "grad_norm": 0.15658265352249146, "learning_rate": 1e-06, "loss": 0.0477, "num_tokens": 173944395.0, "reward": 0.6010044813156128, "reward_std": 0.17029376327991486, "rewards/simpleverify_reward/mean": 0.6010044813156128, "rewards/simpleverify_reward/std": 0.489760160446167, "step": 289 }, { "clip_ratio/high_max": 0.002186075482313754, "clip_ratio/high_mean": 0.0007622513821843313, "clip_ratio/low_mean": 0.00046751005720579997, "clip_ratio/low_min": 1.094954404834425e-05, "clip_ratio/region_mean": 0.0012297614375711419, "epoch": 2.765014577259475, "grad_norm": 0.13978497684001923, "learning_rate": 1e-06, "loss": -0.0656, "step": 290 }, { "clip_ratio/high_max": 0.002054946766293142, "clip_ratio/high_mean": 0.0007673097970837262, "clip_ratio/low_mean": 0.00047918118980305735, "clip_ratio/low_min": 4.786132194567472e-05, "clip_ratio/region_mean": 0.0012464909959817305, "epoch": 2.774344023323615, "grad_norm": 0.1288522481918335, "learning_rate": 1e-06, "loss": -0.0219, "step": 291 }, { "clip_ratio/high_max": 0.0013565543049480766, "clip_ratio/high_mean": 0.00066883582076116, "clip_ratio/low_mean": 0.000570540191802138, "clip_ratio/low_min": 3.435014150454663e-05, "clip_ratio/region_mean": 0.0012393760225677397, "epoch": 2.783673469387755, "grad_norm": 0.13468307256698608, "learning_rate": 1e-06, "loss": 0.0457, "step": 292 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0415736607142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 4038.0, "completions/mean_length": 753.4852294921875, "completions/mean_terminated_length": 608.4969482421875, "completions/min_length": 145.0, "completions/min_terminated_length": 145.0, "epoch": 2.793002915451895, "grad_norm": 0.15004877746105194, "learning_rate": 1e-06, "loss": -0.0388, "num_tokens": 176370710.0, "reward": 0.591796875, "reward_std": 0.18566939234733582, "rewards/simpleverify_reward/mean": 0.591796875, "rewards/simpleverify_reward/std": 0.4915696680545807, "step": 293 }, { "clip_ratio/high_max": 0.0016986278242256958, "clip_ratio/high_mean": 0.0006904115471115801, "clip_ratio/low_mean": 0.000563119891921815, "clip_ratio/low_min": 1.093230730475625e-05, "clip_ratio/region_mean": 0.001253531463589752, "epoch": 2.8023323615160347, "grad_norm": 0.1370498687028885, "learning_rate": 1e-06, "loss": 0.0209, "step": 294 }, { "clip_ratio/high_max": 0.0017593850498087704, "clip_ratio/high_mean": 0.0007272644738804956, "clip_ratio/low_mean": 0.0005847599231856293, "clip_ratio/low_min": 5.377184788812883e-05, "clip_ratio/region_mean": 0.0013120243838784518, "epoch": 2.811661807580175, "grad_norm": 0.15669550001621246, "learning_rate": 1e-06, "loss": 0.0058, "step": 295 }, { "clip_ratio/high_max": 0.0017676597999525256, "clip_ratio/high_mean": 0.0007460701190211694, "clip_ratio/low_mean": 0.0005913846553085023, "clip_ratio/low_min": 7.921322958281962e-05, "clip_ratio/region_mean": 0.0013374547706916928, "epoch": 2.8209912536443147, "grad_norm": 0.13304726779460907, "learning_rate": 1e-06, "loss": -0.0142, "step": 296 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.040736607142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 4052.0, "completions/mean_length": 743.8406982421875, "completions/mean_terminated_length": 601.486083984375, "completions/min_length": 116.0, "completions/min_terminated_length": 116.0, "epoch": 2.8303206997084547, "grad_norm": 0.16311214864253998, "learning_rate": 1e-06, "loss": 0.0144, "num_tokens": 178801907.0, "reward": 0.5954241156578064, "reward_std": 0.1863442212343216, "rewards/simpleverify_reward/mean": 0.5954241156578064, "rewards/simpleverify_reward/std": 0.49087825417518616, "step": 297 }, { "clip_ratio/high_max": 0.001887887337943539, "clip_ratio/high_mean": 0.0007304969331016764, "clip_ratio/low_mean": 0.0006368638569256291, "clip_ratio/low_min": 6.011683763063047e-05, "clip_ratio/region_mean": 0.0013673607863893267, "epoch": 2.8396501457725947, "grad_norm": 0.13964280486106873, "learning_rate": 1e-06, "loss": -0.0118, "step": 298 }, { "clip_ratio/high_max": 0.0016027627170842607, "clip_ratio/high_mean": 0.0006373675878421636, "clip_ratio/low_mean": 0.0005979023208055878, "clip_ratio/low_min": 1.707650335447397e-05, "clip_ratio/region_mean": 0.0012352699013717938, "epoch": 2.8489795918367347, "grad_norm": 0.15158233046531677, "learning_rate": 1e-06, "loss": -0.0144, "step": 299 }, { "clip_ratio/high_max": 0.002065361288259737, "clip_ratio/high_mean": 0.0007712027691013645, "clip_ratio/low_mean": 0.0006289898492468637, "clip_ratio/low_min": 3.9644783100811765e-05, "clip_ratio/region_mean": 0.0014001926392666064, "epoch": 2.8583090379008746, "grad_norm": 0.14101241528987885, "learning_rate": 1e-06, "loss": -0.0198, "step": 300 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0398995535714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 4015.0, "completions/mean_length": 726.6945190429688, "completions/mean_terminated_length": 586.6738891601562, "completions/min_length": 137.0, "completions/min_terminated_length": 137.0, "epoch": 2.8676384839650146, "grad_norm": 0.15890933573246002, "learning_rate": 1e-06, "loss": 0.0298, "num_tokens": 181163492.0, "reward": 0.5778459906578064, "reward_std": 0.18270714581012726, "rewards/simpleverify_reward/mean": 0.5778459906578064, "rewards/simpleverify_reward/std": 0.4939717650413513, "step": 301 }, { "clip_ratio/high_max": 0.0020752185737364925, "clip_ratio/high_mean": 0.0007886066669016145, "clip_ratio/low_mean": 0.0005571836936724139, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013457903914968483, "epoch": 2.8769679300291546, "grad_norm": 0.16060423851013184, "learning_rate": 1e-06, "loss": -0.02, "step": 302 }, { "clip_ratio/high_max": 0.001889427556307055, "clip_ratio/high_mean": 0.0007759171785437502, "clip_ratio/low_mean": 0.0005452087325465982, "clip_ratio/low_min": 3.067931811528979e-05, "clip_ratio/region_mean": 0.001321125902904896, "epoch": 2.8862973760932946, "grad_norm": 0.14792205393314362, "learning_rate": 1e-06, "loss": -0.018, "step": 303 }, { "clip_ratio/high_max": 0.002050553182925796, "clip_ratio/high_mean": 0.0007479659725504462, "clip_ratio/low_mean": 0.0006965736811253009, "clip_ratio/low_min": 1.1349192391207907e-05, "clip_ratio/region_mean": 0.0014445396591327153, "epoch": 2.8956268221574346, "grad_norm": 0.1585221290588379, "learning_rate": 1e-06, "loss": -0.0242, "step": 304 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0382254464285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 4088.0, "completions/mean_length": 727.187255859375, "completions/mean_terminated_length": 593.2947387695312, "completions/min_length": 126.0, "completions/min_terminated_length": 126.0, "epoch": 2.904956268221574, "grad_norm": 0.14865650236606598, "learning_rate": 1e-06, "loss": 0.0056, "num_tokens": 183581235.0, "reward": 0.5973772406578064, "reward_std": 0.17145460844039917, "rewards/simpleverify_reward/mean": 0.5973772406578064, "rewards/simpleverify_reward/std": 0.4904944598674774, "step": 305 }, { "clip_ratio/high_max": 0.0015308694964915048, "clip_ratio/high_mean": 0.0005813085226691328, "clip_ratio/low_mean": 0.0005329532923497027, "clip_ratio/low_min": 1.5303623513318598e-05, "clip_ratio/region_mean": 0.0011142617877339944, "epoch": 2.914285714285714, "grad_norm": 2.1346042156219482, "learning_rate": 1e-06, "loss": -0.0236, "step": 306 }, { "clip_ratio/high_max": 0.0017693491936370265, "clip_ratio/high_mean": 0.0006461647108153556, "clip_ratio/low_mean": 0.0004714654824056197, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011176301886735018, "epoch": 2.923615160349854, "grad_norm": 0.1277737021446228, "learning_rate": 1e-06, "loss": -0.022, "step": 307 }, { "clip_ratio/high_max": 0.0020426519458851544, "clip_ratio/high_mean": 0.0007500581523345318, "clip_ratio/low_mean": 0.0006324270270852139, "clip_ratio/low_min": 8.594609425927047e-06, "clip_ratio/region_mean": 0.0013824851375829894, "epoch": 2.932944606413994, "grad_norm": 0.15168261528015137, "learning_rate": 1e-06, "loss": -0.0178, "step": 308 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0385044642857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3855.0, "completions/mean_length": 715.1574096679688, "completions/mean_terminated_length": 579.7666625976562, "completions/min_length": 128.0, "completions/min_terminated_length": 128.0, "epoch": 2.942274052478134, "grad_norm": 0.14367346465587616, "learning_rate": 1e-06, "loss": -0.0393, "num_tokens": 185945359.0, "reward": 0.6029576063156128, "reward_std": 0.17867082357406616, "rewards/simpleverify_reward/mean": 0.6029576063156128, "rewards/simpleverify_reward/std": 0.4893531799316406, "step": 309 }, { "clip_ratio/high_max": 0.0019240038091083989, "clip_ratio/high_mean": 0.0007275647567439592, "clip_ratio/low_mean": 0.0005275383027765201, "clip_ratio/low_min": 2.7385084649722558e-05, "clip_ratio/region_mean": 0.0012551030777103733, "epoch": 2.951603498542274, "grad_norm": 0.1746971607208252, "learning_rate": 1e-06, "loss": -0.0012, "step": 310 }, { "clip_ratio/high_max": 0.0019338634665473364, "clip_ratio/high_mean": 0.0007648557166248793, "clip_ratio/low_mean": 0.0005447740995805361, "clip_ratio/low_min": 2.2194860321178567e-05, "clip_ratio/region_mean": 0.0013096297770971432, "epoch": 2.960932944606414, "grad_norm": 0.22450833022594452, "learning_rate": 1e-06, "loss": -0.0199, "step": 311 }, { "clip_ratio/high_max": 0.0020230899172020145, "clip_ratio/high_mean": 0.000818266877104179, "clip_ratio/low_mean": 0.0006358919072226854, "clip_ratio/low_min": 4.9810027121566236e-05, "clip_ratio/region_mean": 0.0014541587624989916, "epoch": 2.970262390670554, "grad_norm": 0.14647658169269562, "learning_rate": 1e-06, "loss": 0.0046, "step": 312 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.042689732142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3902.0, "completions/mean_length": 767.1961669921875, "completions/mean_terminated_length": 618.75341796875, "completions/min_length": 105.0, "completions/min_terminated_length": 105.0, "epoch": 3.00932944606414, "grad_norm": 0.15083080530166626, "learning_rate": 1e-06, "loss": 0.0059, "num_tokens": 188421342.0, "reward": 0.5733817219734192, "reward_std": 0.17181220650672913, "rewards/simpleverify_reward/mean": 0.5733817219734192, "rewards/simpleverify_reward/std": 0.4946548044681549, "step": 313 }, { "clip_ratio/high_max": 0.0016557252711209003, "clip_ratio/high_mean": 0.0006323381803667871, "clip_ratio/low_mean": 0.0004348551728980965, "clip_ratio/low_min": 1.1699738024617545e-05, "clip_ratio/region_mean": 0.0010671933850971982, "epoch": 3.01865889212828, "grad_norm": 0.14265626668930054, "learning_rate": 1e-06, "loss": 0.0241, "step": 314 }, { "clip_ratio/high_max": 0.0018557919029262848, "clip_ratio/high_mean": 0.0007331268534471747, "clip_ratio/low_mean": 0.0005127108925080393, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012458377314032987, "epoch": 3.02798833819242, "grad_norm": 0.13499289751052856, "learning_rate": 1e-06, "loss": -0.0342, "step": 315 }, { "clip_ratio/high_max": 0.0016050761732913088, "clip_ratio/high_mean": 0.0006633147968386766, "clip_ratio/low_mean": 0.000516835662892845, "clip_ratio/low_min": 1.797526601876598e-05, "clip_ratio/region_mean": 0.0011801504588220268, "epoch": 3.03731778425656, "grad_norm": 0.1391589492559433, "learning_rate": 1e-06, "loss": -0.0076, "step": 316 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0460379464285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 4034.0, "completions/mean_length": 737.4810791015625, "completions/mean_terminated_length": 575.3998413085938, "completions/min_length": 145.0, "completions/min_terminated_length": 145.0, "epoch": 3.0466472303206995, "grad_norm": 0.1682860106229782, "learning_rate": 1e-06, "loss": -0.0322, "num_tokens": 190744610.0, "reward": 0.626953125, "reward_std": 0.15900085866451263, "rewards/simpleverify_reward/mean": 0.626953125, "rewards/simpleverify_reward/std": 0.48368188738822937, "step": 317 }, { "clip_ratio/high_max": 0.0018595248220663052, "clip_ratio/high_mean": 0.0007312056277442025, "clip_ratio/low_mean": 0.0004674970368796494, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001198702682813746, "epoch": 3.0559766763848395, "grad_norm": 0.144413560628891, "learning_rate": 1e-06, "loss": -0.0365, "step": 318 }, { "clip_ratio/high_max": 0.0016938992121140473, "clip_ratio/high_mean": 0.0006739841610396979, "clip_ratio/low_mean": 0.0005247096323728329, "clip_ratio/low_min": 1.1194698345207144e-05, "clip_ratio/region_mean": 0.0011986937752226368, "epoch": 3.0653061224489795, "grad_norm": 0.15740807354450226, "learning_rate": 1e-06, "loss": -0.0198, "step": 319 }, { "clip_ratio/high_max": 0.00154527689301176, "clip_ratio/high_mean": 0.0005794799581053667, "clip_ratio/low_mean": 0.0005212142827986099, "clip_ratio/low_min": 8.835170774545986e-06, "clip_ratio/region_mean": 0.0011006942477251869, "epoch": 3.0746355685131195, "grad_norm": 0.133630633354187, "learning_rate": 1e-06, "loss": 0.0161, "step": 320 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.03515625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3834.0, "completions/mean_length": 700.7826538085938, "completions/mean_terminated_length": 577.0703125, "completions/min_length": 118.0, "completions/min_terminated_length": 118.0, "epoch": 3.0839650145772595, "grad_norm": 0.14109419286251068, "learning_rate": 1e-06, "loss": -0.0004, "num_tokens": 193105175.0, "reward": 0.6222098469734192, "reward_std": 0.15982523560523987, "rewards/simpleverify_reward/mean": 0.6222098469734192, "rewards/simpleverify_reward/std": 0.48490241169929504, "step": 321 }, { "clip_ratio/high_max": 0.0018405303890176583, "clip_ratio/high_mean": 0.0006939462691661902, "clip_ratio/low_mean": 0.0005695318413927453, "clip_ratio/low_min": 3.963746075896779e-05, "clip_ratio/region_mean": 0.0012634781160159037, "epoch": 3.0932944606413995, "grad_norm": 0.13713334500789642, "learning_rate": 1e-06, "loss": 0.001, "step": 322 }, { "clip_ratio/high_max": 0.0021314829027687665, "clip_ratio/high_mean": 0.0007755124943287228, "clip_ratio/low_mean": 0.0005040785172241158, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012795909788110293, "epoch": 3.1026239067055394, "grad_norm": 0.14998696744441986, "learning_rate": 1e-06, "loss": -0.0208, "step": 323 }, { "clip_ratio/high_max": 0.0018332231775275432, "clip_ratio/high_mean": 0.0006353082790155895, "clip_ratio/low_mean": 0.0005059554232502705, "clip_ratio/low_min": 1.2457643606467173e-05, "clip_ratio/region_mean": 0.00114126371772727, "epoch": 3.1119533527696794, "grad_norm": 0.13439308106899261, "learning_rate": 1e-06, "loss": -0.0229, "step": 324 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0359933035714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3430.0, "completions/mean_length": 707.6858520507812, "completions/mean_terminated_length": 581.1757202148438, "completions/min_length": 141.0, "completions/min_terminated_length": 141.0, "epoch": 3.1212827988338194, "grad_norm": 0.15083158016204834, "learning_rate": 1e-06, "loss": 0.0006, "num_tokens": 195473153.0, "reward": 0.642578125, "reward_std": 0.16772042214870453, "rewards/simpleverify_reward/mean": 0.642578125, "rewards/simpleverify_reward/std": 0.4793073832988739, "step": 325 }, { "clip_ratio/high_max": 0.0017008618160616606, "clip_ratio/high_mean": 0.0006103743635321734, "clip_ratio/low_mean": 0.0005489015384227969, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011592758746701293, "epoch": 3.130612244897959, "grad_norm": 0.14054127037525177, "learning_rate": 1e-06, "loss": 0.022, "step": 326 }, { "clip_ratio/high_max": 0.001924179683555849, "clip_ratio/high_mean": 0.0007431304366036784, "clip_ratio/low_mean": 0.0005595924121735152, "clip_ratio/low_min": 1.2021542715956457e-05, "clip_ratio/region_mean": 0.0013027228451392148, "epoch": 3.139941690962099, "grad_norm": 0.1561124622821808, "learning_rate": 1e-06, "loss": 0.0151, "step": 327 }, { "clip_ratio/high_max": 0.0019044283399125561, "clip_ratio/high_mean": 0.0007836574204702629, "clip_ratio/low_mean": 0.0006577432995982235, "clip_ratio/low_min": 4.8525423153478187e-05, "clip_ratio/region_mean": 0.0014414007127925288, "epoch": 3.149271137026239, "grad_norm": 0.14116208255290985, "learning_rate": 1e-06, "loss": -0.0539, "step": 328 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0482700892857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3687.0, "completions/mean_length": 787.3602294921875, "completions/mean_terminated_length": 619.5517578125, "completions/min_length": 90.0, "completions/min_terminated_length": 90.0, "epoch": 3.158600583090379, "grad_norm": 0.1499595046043396, "learning_rate": 1e-06, "loss": -0.0045, "num_tokens": 197961124.0, "reward": 0.578683078289032, "reward_std": 0.1618172824382782, "rewards/simpleverify_reward/mean": 0.5786830186843872, "rewards/simpleverify_reward/std": 0.4938390552997589, "step": 329 }, { "clip_ratio/high_max": 0.0014334487495943904, "clip_ratio/high_mean": 0.0005274310124150361, "clip_ratio/low_mean": 0.0005635964548673655, "clip_ratio/low_min": 1.9367833374417387e-05, "clip_ratio/region_mean": 0.0010910274540947285, "epoch": 3.167930029154519, "grad_norm": 0.1447145640850067, "learning_rate": 1e-06, "loss": -0.0056, "step": 330 }, { "clip_ratio/high_max": 0.0019125678409182, "clip_ratio/high_mean": 0.0007179306412581354, "clip_ratio/low_mean": 0.0005062663021817571, "clip_ratio/low_min": 5.372301166062243e-05, "clip_ratio/region_mean": 0.0012241969488968607, "epoch": 3.177259475218659, "grad_norm": 0.14563141763210297, "learning_rate": 1e-06, "loss": -0.0407, "step": 331 }, { "clip_ratio/high_max": 0.0019172787542629521, "clip_ratio/high_mean": 0.000652353250188753, "clip_ratio/low_mean": 0.000561549295525765, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001213902563904412, "epoch": 3.186588921282799, "grad_norm": 0.14328908920288086, "learning_rate": 1e-06, "loss": -0.005, "step": 332 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0463169642857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 4077.0, "completions/mean_length": 786.4461669921875, "completions/mean_terminated_length": 625.7130126953125, "completions/min_length": 153.0, "completions/min_terminated_length": 153.0, "epoch": 3.195918367346939, "grad_norm": 0.17174363136291504, "learning_rate": 1e-06, "loss": -0.0295, "num_tokens": 200462731.0, "reward": 0.5881696939468384, "reward_std": 0.1739250272512436, "rewards/simpleverify_reward/mean": 0.5881696343421936, "rewards/simpleverify_reward/std": 0.49223336577415466, "step": 333 }, { "clip_ratio/high_max": 0.001682241909293225, "clip_ratio/high_mean": 0.0007225825920613715, "clip_ratio/low_mean": 0.0004522481622188934, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011748307515517808, "epoch": 3.205247813411079, "grad_norm": 0.13683918118476868, "learning_rate": 1e-06, "loss": -0.0628, "step": 334 }, { "clip_ratio/high_max": 0.0015661783945688512, "clip_ratio/high_mean": 0.000632618732197443, "clip_ratio/low_mean": 0.0004672220989050402, "clip_ratio/low_min": 4.560314482660033e-05, "clip_ratio/region_mean": 0.0010998408070008736, "epoch": 3.2145772594752184, "grad_norm": 0.12931102514266968, "learning_rate": 1e-06, "loss": -0.0141, "step": 335 }, { "clip_ratio/high_max": 0.0016842367840581574, "clip_ratio/high_mean": 0.0006364850014506374, "clip_ratio/low_mean": 0.0005490359271789202, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011855209559143987, "epoch": 3.2239067055393584, "grad_norm": 0.15676546096801758, "learning_rate": 1e-06, "loss": 0.0246, "step": 336 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0438058035714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3892.0, "completions/mean_length": 751.8731079101562, "completions/mean_terminated_length": 598.669677734375, "completions/min_length": 138.0, "completions/min_terminated_length": 138.0, "epoch": 3.2332361516034984, "grad_norm": 0.16644281148910522, "learning_rate": 1e-06, "loss": -0.0169, "num_tokens": 202875604.0, "reward": 0.5948660969734192, "reward_std": 0.17609839141368866, "rewards/simpleverify_reward/mean": 0.5948660969734192, "rewards/simpleverify_reward/std": 0.4909864366054535, "step": 337 }, { "clip_ratio/high_max": 0.0016974532263702713, "clip_ratio/high_mean": 0.0006135270086815581, "clip_ratio/low_mean": 0.0005287235017021885, "clip_ratio/low_min": 1.2040069123031572e-05, "clip_ratio/region_mean": 0.0011422505231166724, "epoch": 3.2425655976676384, "grad_norm": 0.1477290838956833, "learning_rate": 1e-06, "loss": 0.0207, "step": 338 }, { "clip_ratio/high_max": 0.0019162277458235621, "clip_ratio/high_mean": 0.0007508012877224246, "clip_ratio/low_mean": 0.0006303500140347751, "clip_ratio/low_min": 3.395771545910975e-05, "clip_ratio/region_mean": 0.0013811512981192209, "epoch": 3.2518950437317784, "grad_norm": 0.15937048196792603, "learning_rate": 1e-06, "loss": -0.0256, "step": 339 }, { "clip_ratio/high_max": 0.0016751165603636764, "clip_ratio/high_mean": 0.0006635076206293888, "clip_ratio/low_mean": 0.00049835271511256, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001161860338470433, "epoch": 3.2612244897959184, "grad_norm": 0.14596296846866608, "learning_rate": 1e-06, "loss": -0.0128, "step": 340 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0502232142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 4027.0, "completions/mean_length": 791.2760009765625, "completions/mean_terminated_length": 616.5255737304688, "completions/min_length": 121.0, "completions/min_terminated_length": 121.0, "epoch": 3.2705539358600584, "grad_norm": 0.16824442148208618, "learning_rate": 1e-06, "loss": 0.0117, "num_tokens": 205341545.0, "reward": 0.58203125, "reward_std": 0.16110403835773468, "rewards/simpleverify_reward/mean": 0.58203125, "rewards/simpleverify_reward/std": 0.49329379200935364, "step": 341 }, { "clip_ratio/high_max": 0.00144154471490765, "clip_ratio/high_mean": 0.0005335453824955039, "clip_ratio/low_mean": 0.0005625255162158282, "clip_ratio/low_min": 2.4141576432157308e-05, "clip_ratio/region_mean": 0.0010960709187202156, "epoch": 3.2798833819241984, "grad_norm": 0.14344671368598938, "learning_rate": 1e-06, "loss": -0.0146, "step": 342 }, { "clip_ratio/high_max": 0.0014679402884212323, "clip_ratio/high_mean": 0.0005841437468916411, "clip_ratio/low_mean": 0.0005644281254717498, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011485719041957054, "epoch": 3.2892128279883384, "grad_norm": 0.13074646890163422, "learning_rate": 1e-06, "loss": -0.0122, "step": 343 }, { "clip_ratio/high_max": 0.0017462081887060776, "clip_ratio/high_mean": 0.0006518919890368124, "clip_ratio/low_mean": 0.0006198894793669751, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012717814606730826, "epoch": 3.298542274052478, "grad_norm": 1.319486141204834, "learning_rate": 1e-06, "loss": -0.0057, "step": 344 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.041015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 4015.0, "completions/mean_length": 746.591552734375, "completions/mean_terminated_length": 603.3377685546875, "completions/min_length": 118.0, "completions/min_terminated_length": 118.0, "epoch": 3.307871720116618, "grad_norm": 0.16908806562423706, "learning_rate": 1e-06, "loss": -0.0246, "num_tokens": 207771369.0, "reward": 0.6188616156578064, "reward_std": 0.17556901276111603, "rewards/simpleverify_reward/mean": 0.6188616156578064, "rewards/simpleverify_reward/std": 0.485734224319458, "step": 345 }, { "clip_ratio/high_max": 0.0020696305509773083, "clip_ratio/high_mean": 0.000869048984895926, "clip_ratio/low_mean": 0.0004361812016213662, "clip_ratio/low_min": 1.9224691641284153e-05, "clip_ratio/region_mean": 0.0013052301947027445, "epoch": 3.317201166180758, "grad_norm": 0.15253499150276184, "learning_rate": 1e-06, "loss": -0.0506, "step": 346 }, { "clip_ratio/high_max": 0.0016605218406766653, "clip_ratio/high_mean": 0.0006394154843292199, "clip_ratio/low_mean": 0.0004594073816406308, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001098822871426819, "epoch": 3.326530612244898, "grad_norm": 0.14697429537773132, "learning_rate": 1e-06, "loss": 0.0007, "step": 347 }, { "clip_ratio/high_max": 0.0017449712031520903, "clip_ratio/high_mean": 0.0006544942880282179, "clip_ratio/low_mean": 0.0005992923224766855, "clip_ratio/low_min": 7.327077946683858e-05, "clip_ratio/region_mean": 0.0012537865950434934, "epoch": 3.335860058309038, "grad_norm": 0.14421366155147552, "learning_rate": 1e-06, "loss": 0.0277, "step": 348 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0418526785714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3705.0, "completions/mean_length": 742.9096069335938, "completions/mean_terminated_length": 596.4437866210938, "completions/min_length": 109.0, "completions/min_terminated_length": 109.0, "epoch": 3.345189504373178, "grad_norm": 0.16033171117305756, "learning_rate": 1e-06, "loss": 0.0022, "num_tokens": 210189677.0, "reward": 0.6286272406578064, "reward_std": 0.1753251999616623, "rewards/simpleverify_reward/mean": 0.6286272406578064, "rewards/simpleverify_reward/std": 0.4832392632961273, "step": 349 }, { "clip_ratio/high_max": 0.0015237104380503297, "clip_ratio/high_mean": 0.0006417155491362792, "clip_ratio/low_mean": 0.0004479941408135346, "clip_ratio/low_min": 2.2179858206072822e-05, "clip_ratio/region_mean": 0.001089709698135266, "epoch": 3.354518950437318, "grad_norm": 0.13921375572681427, "learning_rate": 1e-06, "loss": 0.0109, "step": 350 }, { "clip_ratio/high_max": 0.001970477773284074, "clip_ratio/high_mean": 0.0008282089820568217, "clip_ratio/low_mean": 0.00047458129938604543, "clip_ratio/low_min": 1.0345968803449068e-05, "clip_ratio/region_mean": 0.001302790260524489, "epoch": 3.363848396501458, "grad_norm": 0.14805352687835693, "learning_rate": 1e-06, "loss": -0.0373, "step": 351 }, { "clip_ratio/high_max": 0.0019470880943117663, "clip_ratio/high_mean": 0.0006964418789721094, "clip_ratio/low_mean": 0.0005613388066194602, "clip_ratio/low_min": 2.611238778627012e-05, "clip_ratio/region_mean": 0.0012577806992339902, "epoch": 3.373177842565598, "grad_norm": 0.155088871717453, "learning_rate": 1e-06, "loss": 0.0289, "step": 352 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.046595982142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3681.0, "completions/mean_length": 755.656005859375, "completions/mean_terminated_length": 592.4024047851562, "completions/min_length": 114.0, "completions/min_terminated_length": 114.0, "epoch": 3.3825072886297374, "grad_norm": 0.1511949747800827, "learning_rate": 1e-06, "loss": -0.0077, "num_tokens": 212572972.0, "reward": 0.6090959906578064, "reward_std": 0.1723087877035141, "rewards/simpleverify_reward/mean": 0.6090959906578064, "rewards/simpleverify_reward/std": 0.48802101612091064, "step": 353 }, { "clip_ratio/high_max": 0.0019328430571476929, "clip_ratio/high_mean": 0.0007563271137769334, "clip_ratio/low_mean": 0.00047994476426538313, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012362718625809066, "epoch": 3.3918367346938774, "grad_norm": 0.17145977914333344, "learning_rate": 1e-06, "loss": -0.0301, "step": 354 }, { "clip_ratio/high_max": 0.0016432817647000775, "clip_ratio/high_mean": 0.0006378463231158094, "clip_ratio/low_mean": 0.00041417255852138624, "clip_ratio/low_min": 7.787191862007603e-06, "clip_ratio/region_mean": 0.0010520188916416373, "epoch": 3.4011661807580174, "grad_norm": 0.15491217374801636, "learning_rate": 1e-06, "loss": -0.0008, "step": 355 }, { "clip_ratio/high_max": 0.0017441053460061084, "clip_ratio/high_mean": 0.000696686793162371, "clip_ratio/low_mean": 0.0005884626734768972, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012851494611823, "epoch": 3.4104956268221573, "grad_norm": 0.15466101467609406, "learning_rate": 1e-06, "loss": 0.002, "step": 356 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0418526785714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3979.0, "completions/mean_length": 743.4559326171875, "completions/mean_terminated_length": 597.0139770507812, "completions/min_length": 134.0, "completions/min_terminated_length": 134.0, "epoch": 3.4198250728862973, "grad_norm": 0.1712491363286972, "learning_rate": 1e-06, "loss": -0.0258, "num_tokens": 215007758.0, "reward": 0.5948660969734192, "reward_std": 0.17359857261180878, "rewards/simpleverify_reward/mean": 0.5948660969734192, "rewards/simpleverify_reward/std": 0.4909864366054535, "step": 357 }, { "clip_ratio/high_max": 0.0017493488376203459, "clip_ratio/high_mean": 0.0006571534941031132, "clip_ratio/low_mean": 0.0005196538349991897, "clip_ratio/low_min": 4.691276262747124e-05, "clip_ratio/region_mean": 0.0011768073127313983, "epoch": 3.4291545189504373, "grad_norm": 0.15277433395385742, "learning_rate": 1e-06, "loss": 0.003, "step": 358 }, { "clip_ratio/high_max": 0.0020389254932524636, "clip_ratio/high_mean": 0.0007624715181009378, "clip_ratio/low_mean": 0.00046846076929796254, "clip_ratio/low_min": 2.3642896849196404e-05, "clip_ratio/region_mean": 0.0012309322955843527, "epoch": 3.4384839650145773, "grad_norm": 0.16128332912921906, "learning_rate": 1e-06, "loss": -0.0264, "step": 359 }, { "clip_ratio/high_max": 0.0018701283843256533, "clip_ratio/high_mean": 0.0006704828483634628, "clip_ratio/low_mean": 0.0006096422157497727, "clip_ratio/low_min": 5.7591711993154604e-05, "clip_ratio/region_mean": 0.0012801250486518256, "epoch": 3.4478134110787173, "grad_norm": 0.16498444974422455, "learning_rate": 1e-06, "loss": 0.0306, "step": 360 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.043247767857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3621.0, "completions/mean_length": 739.0377197265625, "completions/mean_terminated_length": 587.2939453125, "completions/min_length": 147.0, "completions/min_terminated_length": 147.0, "epoch": 3.4571428571428573, "grad_norm": 0.1555006057024002, "learning_rate": 1e-06, "loss": -0.0045, "num_tokens": 217378021.0, "reward": 0.6263951063156128, "reward_std": 0.1634603589773178, "rewards/simpleverify_reward/mean": 0.6263951063156128, "rewards/simpleverify_reward/std": 0.483828067779541, "step": 361 }, { "clip_ratio/high_max": 0.0019356006960151717, "clip_ratio/high_mean": 0.000708508952811826, "clip_ratio/low_mean": 0.0004853295799875923, "clip_ratio/low_min": 2.066798879241105e-05, "clip_ratio/region_mean": 0.001193838514154777, "epoch": 3.466472303206997, "grad_norm": 0.17325197160243988, "learning_rate": 1e-06, "loss": -0.0115, "step": 362 }, { "clip_ratio/high_max": 0.001908083475427702, "clip_ratio/high_mean": 0.000746917843571282, "clip_ratio/low_mean": 0.0005170067543076584, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012639245796890464, "epoch": 3.4758017492711373, "grad_norm": 0.16269312798976898, "learning_rate": 1e-06, "loss": -0.0214, "step": 363 }, { "clip_ratio/high_max": 0.0017954828654183075, "clip_ratio/high_mean": 0.0007158147109294077, "clip_ratio/low_mean": 0.0006430930698115844, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013589077643700875, "epoch": 3.485131195335277, "grad_norm": 0.1520288586616516, "learning_rate": 1e-06, "loss": 0.0072, "step": 364 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.050502232142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 4012.0, "completions/mean_length": 789.2935791015625, "completions/mean_terminated_length": 613.4152221679688, "completions/min_length": 83.0, "completions/min_terminated_length": 83.0, "epoch": 3.494460641399417, "grad_norm": 0.16798296570777893, "learning_rate": 1e-06, "loss": -0.038, "num_tokens": 219825745.0, "reward": 0.5984933376312256, "reward_std": 0.1660442054271698, "rewards/simpleverify_reward/mean": 0.5984932780265808, "rewards/simpleverify_reward/std": 0.4902714788913727, "step": 365 }, { "clip_ratio/high_max": 0.0019970216671936214, "clip_ratio/high_mean": 0.0007460974011337385, "clip_ratio/low_mean": 0.0004188150173831673, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011649124244286213, "epoch": 3.503790087463557, "grad_norm": 0.13869068026542664, "learning_rate": 1e-06, "loss": -0.0427, "step": 366 }, { "clip_ratio/high_max": 0.002121555033227196, "clip_ratio/high_mean": 0.0007596993546030717, "clip_ratio/low_mean": 0.0004757608226100274, "clip_ratio/low_min": 2.828029028023593e-05, "clip_ratio/region_mean": 0.00123546018221532, "epoch": 3.513119533527697, "grad_norm": 0.15662145614624023, "learning_rate": 1e-06, "loss": -0.0332, "step": 367 }, { "clip_ratio/high_max": 0.0016614125815976877, "clip_ratio/high_mean": 0.0006113265144449542, "clip_ratio/low_mean": 0.0004980022154086328, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011093287357653026, "epoch": 3.522448979591837, "grad_norm": 0.1438027024269104, "learning_rate": 1e-06, "loss": 0.0185, "step": 368 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0502232142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3619.0, "completions/mean_length": 753.8460083007812, "completions/mean_terminated_length": 577.1163330078125, "completions/min_length": 117.0, "completions/min_terminated_length": 117.0, "epoch": 3.5317784256559768, "grad_norm": 0.17979514598846436, "learning_rate": 1e-06, "loss": 0.0004, "num_tokens": 222153057.0, "reward": 0.6205357313156128, "reward_std": 0.1813025176525116, "rewards/simpleverify_reward/mean": 0.6205357313156128, "rewards/simpleverify_reward/std": 0.48532140254974365, "step": 369 }, { "clip_ratio/high_max": 0.002096397598506883, "clip_ratio/high_mean": 0.0007771404707455076, "clip_ratio/low_mean": 0.0005500991755980067, "clip_ratio/low_min": 2.437080638628686e-05, "clip_ratio/region_mean": 0.0013272396427055355, "epoch": 3.5411078717201168, "grad_norm": 0.1808949112892151, "learning_rate": 1e-06, "loss": -0.0079, "step": 370 }, { "clip_ratio/high_max": 0.00196584432706004, "clip_ratio/high_mean": 0.000843215342683834, "clip_ratio/low_mean": 0.0004510059052336146, "clip_ratio/low_min": 3.11798467009794e-05, "clip_ratio/region_mean": 0.0012942212488269433, "epoch": 3.5504373177842563, "grad_norm": 1.917815089225769, "learning_rate": 1e-06, "loss": -0.0211, "step": 371 }, { "clip_ratio/high_max": 0.002029543880780693, "clip_ratio/high_mean": 0.0008524623408447951, "clip_ratio/low_mean": 0.0005110370248075924, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013634993629239034, "epoch": 3.5597667638483967, "grad_norm": 0.1599896401166916, "learning_rate": 1e-06, "loss": -0.0329, "step": 372 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.043247767857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 4030.0, "completions/mean_length": 721.3362426757812, "completions/mean_terminated_length": 568.7923583984375, "completions/min_length": 114.0, "completions/min_terminated_length": 114.0, "epoch": 3.5690962099125363, "grad_norm": 0.16206678748130798, "learning_rate": 1e-06, "loss": 0.028, "num_tokens": 224477078.0, "reward": 0.6110491156578064, "reward_std": 0.16751761734485626, "rewards/simpleverify_reward/mean": 0.6110491156578064, "rewards/simpleverify_reward/std": 0.48758018016815186, "step": 373 }, { "clip_ratio/high_max": 0.001990405222386471, "clip_ratio/high_mean": 0.0006908559130351932, "clip_ratio/low_mean": 0.0005033727957197698, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011942286910198163, "epoch": 3.5784256559766763, "grad_norm": 0.1806079000234604, "learning_rate": 1e-06, "loss": -0.0075, "step": 374 }, { "clip_ratio/high_max": 0.0016928724180615973, "clip_ratio/high_mean": 0.0006595496779482346, "clip_ratio/low_mean": 0.00048518912990402896, "clip_ratio/low_min": 4.890343825536547e-05, "clip_ratio/region_mean": 0.0011447388023952954, "epoch": 3.5877551020408163, "grad_norm": 0.15915532410144806, "learning_rate": 1e-06, "loss": -0.0208, "step": 375 }, { "clip_ratio/high_max": 0.0017365136482112575, "clip_ratio/high_mean": 0.0007073524975567125, "clip_ratio/low_mean": 0.0006399440840141324, "clip_ratio/low_min": 6.394718366209418e-05, "clip_ratio/region_mean": 0.0013472965620167088, "epoch": 3.5970845481049563, "grad_norm": 0.17882317304611206, "learning_rate": 1e-06, "loss": -0.0198, "step": 376 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0440848214285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3830.0, "completions/mean_length": 738.663818359375, "completions/mean_terminated_length": 583.8303833007812, "completions/min_length": 114.0, "completions/min_terminated_length": 114.0, "epoch": 3.6064139941690962, "grad_norm": 0.17072638869285583, "learning_rate": 1e-06, "loss": -0.0512, "num_tokens": 226829609.0, "reward": 0.6183035969734192, "reward_std": 0.17573967576026917, "rewards/simpleverify_reward/mean": 0.6183035969734192, "rewards/simpleverify_reward/std": 0.48587048053741455, "step": 377 }, { "clip_ratio/high_max": 0.0016286867285089102, "clip_ratio/high_mean": 0.0007230995397549123, "clip_ratio/low_mean": 0.0005177054285923077, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012408049660734832, "epoch": 3.6157434402332362, "grad_norm": 0.15557238459587097, "learning_rate": 1e-06, "loss": -0.031, "step": 378 }, { "clip_ratio/high_max": 0.0016644917050143704, "clip_ratio/high_mean": 0.0007506675574404653, "clip_ratio/low_mean": 0.0005514921804206097, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013021597187616862, "epoch": 3.6250728862973762, "grad_norm": 0.14364144206047058, "learning_rate": 1e-06, "loss": -0.0254, "step": 379 }, { "clip_ratio/high_max": 0.0016744280437706038, "clip_ratio/high_mean": 0.0007235015291371383, "clip_ratio/low_mean": 0.0006650154318776913, "clip_ratio/low_min": 1.2718763173324987e-05, "clip_ratio/region_mean": 0.0013885169792047236, "epoch": 3.6344023323615158, "grad_norm": 0.14697742462158203, "learning_rate": 1e-06, "loss": 0.0217, "step": 380 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0532924107142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 4011.0, "completions/mean_length": 778.0949096679688, "completions/mean_terminated_length": 591.3220825195312, "completions/min_length": 118.0, "completions/min_terminated_length": 118.0, "epoch": 3.643731778425656, "grad_norm": 0.16023840010166168, "learning_rate": 1e-06, "loss": -0.0106, "num_tokens": 229196909.0, "reward": 0.6141183376312256, "reward_std": 0.1779097020626068, "rewards/simpleverify_reward/mean": 0.6141182780265808, "rewards/simpleverify_reward/std": 0.48687076568603516, "step": 381 }, { "clip_ratio/high_max": 0.0018544440172263421, "clip_ratio/high_mean": 0.0007574325099994894, "clip_ratio/low_mean": 0.0005108101868245285, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012682426786341239, "epoch": 3.6530612244897958, "grad_norm": 0.1538790762424469, "learning_rate": 1e-06, "loss": -0.0248, "step": 382 }, { "clip_ratio/high_max": 0.001988415126106702, "clip_ratio/high_mean": 0.0007800547009537695, "clip_ratio/low_mean": 0.0005963827597952331, "clip_ratio/low_min": 8.278145287476946e-06, "clip_ratio/region_mean": 0.0013764374562015291, "epoch": 3.6623906705539357, "grad_norm": 0.18778975307941437, "learning_rate": 1e-06, "loss": 0.0224, "step": 383 }, { "clip_ratio/high_max": 0.0021543004259001464, "clip_ratio/high_mean": 0.0008432131016888889, "clip_ratio/low_mean": 0.00041047519243875286, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001253688293218147, "epoch": 3.6717201166180757, "grad_norm": 0.16296695172786713, "learning_rate": 1e-06, "loss": -0.0641, "step": 384 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0415736607142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3874.0, "completions/mean_length": 743.0803833007812, "completions/mean_terminated_length": 597.6407470703125, "completions/min_length": 147.0, "completions/min_terminated_length": 147.0, "epoch": 3.6810495626822157, "grad_norm": 0.17048579454421997, "learning_rate": 1e-06, "loss": -0.001, "num_tokens": 231601861.0, "reward": 0.6171875, "reward_std": 0.16816602647304535, "rewards/simpleverify_reward/mean": 0.6171875, "rewards/simpleverify_reward/std": 0.4861409664154053, "step": 385 }, { "clip_ratio/high_max": 0.0015923400133033283, "clip_ratio/high_mean": 0.0006491670274044736, "clip_ratio/low_mean": 0.0004673092471421114, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001116476265451638, "epoch": 3.6903790087463557, "grad_norm": 0.14486359059810638, "learning_rate": 1e-06, "loss": -0.0221, "step": 386 }, { "clip_ratio/high_max": 0.0015841332096897531, "clip_ratio/high_mean": 0.0005892618537473027, "clip_ratio/low_mean": 0.0005924615425101365, "clip_ratio/low_min": 1.5085686754900962e-05, "clip_ratio/region_mean": 0.0011817233971669339, "epoch": 3.6997084548104957, "grad_norm": 0.13881024718284607, "learning_rate": 1e-06, "loss": -0.0113, "step": 387 }, { "clip_ratio/high_max": 0.0016619636917312164, "clip_ratio/high_mean": 0.0006945717977941968, "clip_ratio/low_mean": 0.0006583322210644837, "clip_ratio/low_min": 3.791436301980866e-05, "clip_ratio/region_mean": 0.0013529040188586805, "epoch": 3.7090379008746357, "grad_norm": 0.1617782711982727, "learning_rate": 1e-06, "loss": 0.0067, "step": 388 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0404575892857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 4032.0, "completions/mean_length": 736.8108520507812, "completions/mean_terminated_length": 595.1759033203125, "completions/min_length": 126.0, "completions/min_terminated_length": 126.0, "epoch": 3.7183673469387752, "grad_norm": 0.1626833975315094, "learning_rate": 1e-06, "loss": -0.0223, "num_tokens": 233981127.0, "reward": 0.6598772406578064, "reward_std": 0.15166401863098145, "rewards/simpleverify_reward/mean": 0.6598772406578064, "rewards/simpleverify_reward/std": 0.4738163352012634, "step": 389 }, { "clip_ratio/high_max": 0.0014538203613483347, "clip_ratio/high_mean": 0.0005164735912330798, "clip_ratio/low_mean": 0.0005091534912935458, "clip_ratio/low_min": 2.6694317966757808e-05, "clip_ratio/region_mean": 0.0010256270870740991, "epoch": 3.7276967930029157, "grad_norm": 0.15822646021842957, "learning_rate": 1e-06, "loss": 0.0398, "step": 390 }, { "clip_ratio/high_max": 0.0016957730331341736, "clip_ratio/high_mean": 0.0006881671288283542, "clip_ratio/low_mean": 0.00040055199178823386, "clip_ratio/low_min": 1.2999167665839195e-05, "clip_ratio/region_mean": 0.0010887190983339678, "epoch": 3.7370262390670552, "grad_norm": 0.13440728187561035, "learning_rate": 1e-06, "loss": 0.0009, "step": 391 }, { "clip_ratio/high_max": 0.0017876268721011002, "clip_ratio/high_mean": 0.0007208832212199923, "clip_ratio/low_mean": 0.0004176313696007128, "clip_ratio/low_min": 1.8830973203876056e-05, "clip_ratio/region_mean": 0.0011385145917301998, "epoch": 3.746355685131195, "grad_norm": 0.1990271806716919, "learning_rate": 1e-06, "loss": -0.0338, "step": 392 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.044642857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 4090.0, "completions/mean_length": 758.7799072265625, "completions/mean_terminated_length": 602.8349609375, "completions/min_length": 109.0, "completions/min_terminated_length": 109.0, "epoch": 3.755685131195335, "grad_norm": 0.15007519721984863, "learning_rate": 1e-06, "loss": -0.0428, "num_tokens": 236397074.0, "reward": 0.6342076063156128, "reward_std": 0.17257222533226013, "rewards/simpleverify_reward/mean": 0.6342076063156128, "rewards/simpleverify_reward/std": 0.48171886801719666, "step": 393 }, { "clip_ratio/high_max": 0.0019609399350883905, "clip_ratio/high_mean": 0.0006857290136395022, "clip_ratio/low_mean": 0.0005657488441102032, "clip_ratio/low_min": 3.0051184694457334e-05, "clip_ratio/region_mean": 0.0012514778281911276, "epoch": 3.765014577259475, "grad_norm": 0.15615324676036835, "learning_rate": 1e-06, "loss": 0.0137, "step": 394 }, { "clip_ratio/high_max": 0.0019886954069079366, "clip_ratio/high_mean": 0.0007328978390432894, "clip_ratio/low_mean": 0.000493382311105961, "clip_ratio/low_min": 1.633986903470941e-05, "clip_ratio/region_mean": 0.0012262801319593564, "epoch": 3.774344023323615, "grad_norm": 0.15589962899684906, "learning_rate": 1e-06, "loss": -0.0006, "step": 395 }, { "clip_ratio/high_max": 0.0017391549699823372, "clip_ratio/high_mean": 0.0006498051825474249, "clip_ratio/low_mean": 0.0004904703673673794, "clip_ratio/low_min": 1.618751593923662e-05, "clip_ratio/region_mean": 0.0011402755189919844, "epoch": 3.783673469387755, "grad_norm": 0.15151771903038025, "learning_rate": 1e-06, "loss": -0.0308, "step": 396 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0538504464285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3945.0, "completions/mean_length": 784.2442016601562, "completions/mean_terminated_length": 595.7543334960938, "completions/min_length": 139.0, "completions/min_terminated_length": 139.0, "epoch": 3.793002915451895, "grad_norm": 0.16946612298488617, "learning_rate": 1e-06, "loss": -0.0399, "num_tokens": 238744509.0, "reward": 0.6051897406578064, "reward_std": 0.1789143830537796, "rewards/simpleverify_reward/mean": 0.6051897406578064, "rewards/simpleverify_reward/std": 0.4888781011104584, "step": 397 }, { "clip_ratio/high_max": 0.0017455039960623253, "clip_ratio/high_mean": 0.0006547926514031133, "clip_ratio/low_mean": 0.0005083036330688628, "clip_ratio/low_min": 1.3366125131142326e-05, "clip_ratio/region_mean": 0.0011630962944764178, "epoch": 3.8023323615160347, "grad_norm": 0.15056553483009338, "learning_rate": 1e-06, "loss": -0.0037, "step": 398 }, { "clip_ratio/high_max": 0.0019109847125946544, "clip_ratio/high_mean": 0.0007033257879811572, "clip_ratio/low_mean": 0.0005400387572080945, "clip_ratio/low_min": 3.2064881452242844e-05, "clip_ratio/region_mean": 0.0012433645279088523, "epoch": 3.811661807580175, "grad_norm": 0.1735132932662964, "learning_rate": 1e-06, "loss": -0.003, "step": 399 }, { "clip_ratio/high_max": 0.001825780695071444, "clip_ratio/high_mean": 0.0007096084227669053, "clip_ratio/low_mean": 0.0005670290756825125, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012766375293722376, "epoch": 3.8209912536443147, "grad_norm": 0.2980648875236511, "learning_rate": 1e-06, "loss": -0.0058, "step": 400 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.047154017857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3829.0, "completions/mean_length": 730.1213989257812, "completions/mean_terminated_length": 563.5523071289062, "completions/min_length": 139.0, "completions/min_terminated_length": 139.0, "epoch": 3.8303206997084547, "grad_norm": 0.1497042328119278, "learning_rate": 1e-06, "loss": -0.0404, "num_tokens": 241009232.0, "reward": 0.6389509439468384, "reward_std": 0.1365777552127838, "rewards/simpleverify_reward/mean": 0.6389508843421936, "rewards/simpleverify_reward/std": 0.48037174344062805, "step": 401 }, { "clip_ratio/high_max": 0.0015567933369311504, "clip_ratio/high_mean": 0.0005930696561335935, "clip_ratio/low_mean": 0.00040516785065847216, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009982375231629703, "epoch": 3.8396501457725947, "grad_norm": 0.16314265131950378, "learning_rate": 1e-06, "loss": 0.018, "step": 402 }, { "clip_ratio/high_max": 0.0014318090743472567, "clip_ratio/high_mean": 0.0004980476583114068, "clip_ratio/low_mean": 0.00037245650855766144, "clip_ratio/low_min": 2.864344605768565e-05, "clip_ratio/region_mean": 0.000870504160047858, "epoch": 3.8489795918367347, "grad_norm": 0.13035763800144196, "learning_rate": 1e-06, "loss": 0.0073, "step": 403 }, { "clip_ratio/high_max": 0.001735569429001771, "clip_ratio/high_mean": 0.0006568533081008354, "clip_ratio/low_mean": 0.00045213176463221316, "clip_ratio/low_min": 1.1138834452140145e-05, "clip_ratio/region_mean": 0.0011089850595453754, "epoch": 3.8583090379008746, "grad_norm": 0.1565546691417694, "learning_rate": 1e-06, "loss": -0.0309, "step": 404 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.052455357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3730.0, "completions/mean_length": 763.6029663085938, "completions/mean_terminated_length": 579.1239624023438, "completions/min_length": 130.0, "completions/min_terminated_length": 130.0, "epoch": 3.8676384839650146, "grad_norm": 0.1716858446598053, "learning_rate": 1e-06, "loss": -0.0421, "num_tokens": 243344881.0, "reward": 0.6088169813156128, "reward_std": 0.17893417179584503, "rewards/simpleverify_reward/mean": 0.6088169813156128, "rewards/simpleverify_reward/std": 0.4880833327770233, "step": 405 }, { "clip_ratio/high_max": 0.0016960903703875374, "clip_ratio/high_mean": 0.0005844097331646481, "clip_ratio/low_mean": 0.0006910672291269293, "clip_ratio/low_min": 6.186592509038746e-05, "clip_ratio/region_mean": 0.0012754769777529873, "epoch": 3.8769679300291546, "grad_norm": 0.2018299549818039, "learning_rate": 1e-06, "loss": 0.032, "step": 406 }, { "clip_ratio/high_max": 0.0016597934773017187, "clip_ratio/high_mean": 0.0006484699324573739, "clip_ratio/low_mean": 0.0005640515082632191, "clip_ratio/low_min": 2.0538942408165894e-05, "clip_ratio/region_mean": 0.0012125214489060454, "epoch": 3.8862973760932946, "grad_norm": 0.1596388965845108, "learning_rate": 1e-06, "loss": -0.0287, "step": 407 }, { "clip_ratio/high_max": 0.0020897839713143185, "clip_ratio/high_mean": 0.0007247646371979499, "clip_ratio/low_mean": 0.000726601359929191, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014513660207740031, "epoch": 3.8956268221574346, "grad_norm": 0.18352805078029633, "learning_rate": 1e-06, "loss": -0.0051, "step": 408 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0555245535714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3991.0, "completions/mean_length": 770.306396484375, "completions/mean_terminated_length": 574.7929077148438, "completions/min_length": 122.0, "completions/min_terminated_length": 122.0, "epoch": 3.904956268221574, "grad_norm": 0.1877947300672531, "learning_rate": 1e-06, "loss": -0.0085, "num_tokens": 245648451.0, "reward": 0.6180245876312256, "reward_std": 0.1850578337907791, "rewards/simpleverify_reward/mean": 0.6180245280265808, "rewards/simpleverify_reward/std": 0.48593834042549133, "step": 409 }, { "clip_ratio/high_max": 0.0015838508479646407, "clip_ratio/high_mean": 0.0006983145231060917, "clip_ratio/low_mean": 0.0005505159879248822, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012488305183069315, "epoch": 3.914285714285714, "grad_norm": 0.1577041894197464, "learning_rate": 1e-06, "loss": -0.0504, "step": 410 }, { "clip_ratio/high_max": 0.0017843581590568647, "clip_ratio/high_mean": 0.0008026813466130989, "clip_ratio/low_mean": 0.0006105748398113064, "clip_ratio/low_min": 1.0670992196537554e-05, "clip_ratio/region_mean": 0.0014132561664155219, "epoch": 3.923615160349854, "grad_norm": 0.1861371099948883, "learning_rate": 1e-06, "loss": -0.0111, "step": 411 }, { "clip_ratio/high_max": 0.0017410483087587636, "clip_ratio/high_mean": 0.0006244772512218333, "clip_ratio/low_mean": 0.0006662032237727544, "clip_ratio/low_min": 1.4450866729021072e-05, "clip_ratio/region_mean": 0.001290680458623683, "epoch": 3.932944606413994, "grad_norm": 0.15980687737464905, "learning_rate": 1e-06, "loss": -0.0125, "step": 412 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0541294642857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3962.0, "completions/mean_length": 769.2520141601562, "completions/mean_terminated_length": 578.8717041015625, "completions/min_length": 126.0, "completions/min_terminated_length": 126.0, "epoch": 3.942274052478134, "grad_norm": 0.16674727201461792, "learning_rate": 1e-06, "loss": 0.0046, "num_tokens": 247959098.0, "reward": 0.6272321939468384, "reward_std": 0.16054844856262207, "rewards/simpleverify_reward/mean": 0.6272321343421936, "rewards/simpleverify_reward/std": 0.48360857367515564, "step": 413 }, { "clip_ratio/high_max": 0.0014576266330550425, "clip_ratio/high_mean": 0.0005690766975021688, "clip_ratio/low_mean": 0.0003882161404362705, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009572928502166178, "epoch": 3.951603498542274, "grad_norm": 0.1270017772912979, "learning_rate": 1e-06, "loss": -0.0328, "step": 414 }, { "clip_ratio/high_max": 0.002029586576099973, "clip_ratio/high_mean": 0.0007200598156487104, "clip_ratio/low_mean": 0.0004525789336184971, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011726387310773134, "epoch": 3.960932944606414, "grad_norm": 0.14874887466430664, "learning_rate": 1e-06, "loss": -0.0451, "step": 415 }, { "clip_ratio/high_max": 0.001837100524426205, "clip_ratio/high_mean": 0.0006785253644920886, "clip_ratio/low_mean": 0.0005010693730582716, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011795947357313707, "epoch": 3.970262390670554, "grad_norm": 0.16166284680366516, "learning_rate": 1e-06, "loss": -0.0242, "step": 416 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0513392857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3862.0, "completions/mean_length": 761.6900634765625, "completions/mean_terminated_length": 581.2449951171875, "completions/min_length": 136.0, "completions/min_terminated_length": 136.0, "epoch": 4.0093294460641395, "grad_norm": 0.2930557131767273, "learning_rate": 1e-06, "loss": -0.0169, "num_tokens": 250278539.0, "reward": 0.6473214626312256, "reward_std": 0.15348640084266663, "rewards/simpleverify_reward/mean": 0.6473214030265808, "rewards/simpleverify_reward/std": 0.4778704047203064, "step": 417 }, { "clip_ratio/high_max": 0.0018760299572022632, "clip_ratio/high_mean": 0.0007250055077747675, "clip_ratio/low_mean": 0.0004588619385685888, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011838674727187026, "epoch": 4.01865889212828, "grad_norm": 0.1386263370513916, "learning_rate": 1e-06, "loss": -0.0587, "step": 418 }, { "clip_ratio/high_max": 0.001587025970366085, "clip_ratio/high_mean": 0.0006180535292514833, "clip_ratio/low_mean": 0.00048029840354502085, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010983519387082197, "epoch": 4.0279883381924195, "grad_norm": 0.15032194554805756, "learning_rate": 1e-06, "loss": 0.0129, "step": 419 }, { "clip_ratio/high_max": 0.001646757282287581, "clip_ratio/high_mean": 0.0006925615616637515, "clip_ratio/low_mean": 0.0005373675812734291, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001229929126566276, "epoch": 4.03731778425656, "grad_norm": 0.15123619139194489, "learning_rate": 1e-06, "loss": -0.0283, "step": 420 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0652901785714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 4082.0, "completions/mean_length": 813.5614013671875, "completions/mean_terminated_length": 584.2805786132812, "completions/min_length": 157.0, "completions/min_terminated_length": 157.0, "epoch": 4.0466472303206995, "grad_norm": 0.1842910498380661, "learning_rate": 1e-06, "loss": -0.0217, "num_tokens": 252595111.0, "reward": 0.6305803656578064, "reward_std": 0.15976960957050323, "rewards/simpleverify_reward/mean": 0.6305803656578064, "rewards/simpleverify_reward/std": 0.48271501064300537, "step": 421 }, { "clip_ratio/high_max": 0.0017369119632348884, "clip_ratio/high_mean": 0.000646495059299923, "clip_ratio/low_mean": 0.0005284994695102796, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011749945533665596, "epoch": 4.05597667638484, "grad_norm": 1.5191673040390015, "learning_rate": 1e-06, "loss": -0.0137, "step": 422 }, { "clip_ratio/high_max": 0.0016570338048040867, "clip_ratio/high_mean": 0.0006248306344787125, "clip_ratio/low_mean": 0.0005507421840320603, "clip_ratio/low_min": 2.894858698709868e-05, "clip_ratio/region_mean": 0.0011755728373827878, "epoch": 4.0653061224489795, "grad_norm": 0.15385717153549194, "learning_rate": 1e-06, "loss": -0.0176, "step": 423 }, { "clip_ratio/high_max": 0.00166431979596382, "clip_ratio/high_mean": 0.0006832070030213799, "clip_ratio/low_mean": 0.0005512700054168818, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001234476971148979, "epoch": 4.07463556851312, "grad_norm": 0.18055982887744904, "learning_rate": 1e-06, "loss": -0.035, "step": 424 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0650111607142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 4069.0, "completions/mean_length": 841.884521484375, "completions/mean_terminated_length": 615.6210327148438, "completions/min_length": 91.0, "completions/min_terminated_length": 91.0, "epoch": 4.0839650145772595, "grad_norm": 0.1586819291114807, "learning_rate": 1e-06, "loss": -0.0328, "num_tokens": 255001609.0, "reward": 0.5842634439468384, "reward_std": 0.1779743731021881, "rewards/simpleverify_reward/mean": 0.5842633843421936, "rewards/simpleverify_reward/std": 0.4929172992706299, "step": 425 }, { "clip_ratio/high_max": 0.0017185499091283418, "clip_ratio/high_mean": 0.0007025417398835998, "clip_ratio/low_mean": 0.00041725175515239243, "clip_ratio/low_min": 2.6774790057970677e-05, "clip_ratio/region_mean": 0.0011197934800293297, "epoch": 4.093294460641399, "grad_norm": 0.15585656464099884, "learning_rate": 1e-06, "loss": -0.0224, "step": 426 }, { "clip_ratio/high_max": 0.002055002005363349, "clip_ratio/high_mean": 0.0007782145912642591, "clip_ratio/low_mean": 0.0005176506031148165, "clip_ratio/low_min": 5.3252169891493395e-05, "clip_ratio/region_mean": 0.0012958651896042284, "epoch": 4.1026239067055394, "grad_norm": 0.15254971385002136, "learning_rate": 1e-06, "loss": -0.0143, "step": 427 }, { "clip_ratio/high_max": 0.0018155363504774868, "clip_ratio/high_mean": 0.0007737217056273948, "clip_ratio/low_mean": 0.0004892905608357978, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012630122400878463, "epoch": 4.111953352769679, "grad_norm": 0.1646735519170761, "learning_rate": 1e-06, "loss": 0.0022, "step": 428 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.054408482142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3850.0, "completions/mean_length": 794.0184326171875, "completions/mean_terminated_length": 604.025390625, "completions/min_length": 127.0, "completions/min_terminated_length": 127.0, "epoch": 4.121282798833819, "grad_norm": 0.15853774547576904, "learning_rate": 1e-06, "loss": -0.0637, "num_tokens": 257418131.0, "reward": 0.626953125, "reward_std": 0.1574157178401947, "rewards/simpleverify_reward/mean": 0.626953125, "rewards/simpleverify_reward/std": 0.48368188738822937, "step": 429 }, { "clip_ratio/high_max": 0.0018694005266297609, "clip_ratio/high_mean": 0.0006588938231288921, "clip_ratio/low_mean": 0.00044861652531835716, "clip_ratio/low_min": 2.5494595320196822e-05, "clip_ratio/region_mean": 0.0011075103393523023, "epoch": 4.130612244897959, "grad_norm": 0.17155718803405762, "learning_rate": 1e-06, "loss": -0.0313, "step": 430 }, { "clip_ratio/high_max": 0.0014508873573504388, "clip_ratio/high_mean": 0.0005630661416944349, "clip_ratio/low_mean": 0.0004963162245985586, "clip_ratio/low_min": 7.975792141223792e-05, "clip_ratio/region_mean": 0.0010593823535600677, "epoch": 4.139941690962099, "grad_norm": 0.22966422140598297, "learning_rate": 1e-06, "loss": -0.0027, "step": 431 }, { "clip_ratio/high_max": 0.0015701907650509384, "clip_ratio/high_mean": 0.0005594238764388137, "clip_ratio/low_mean": 0.0006066989790269872, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011661228454613592, "epoch": 4.149271137026239, "grad_norm": 0.14736908674240112, "learning_rate": 1e-06, "loss": 0.0238, "step": 432 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0521763392857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3760.0, "completions/mean_length": 766.5254516601562, "completions/mean_terminated_length": 583.2425537109375, "completions/min_length": 128.0, "completions/min_terminated_length": 128.0, "epoch": 4.158600583090379, "grad_norm": 0.18092715740203857, "learning_rate": 1e-06, "loss": -0.0368, "num_tokens": 259762950.0, "reward": 0.6434152126312256, "reward_std": 0.1780482828617096, "rewards/simpleverify_reward/mean": 0.6434151530265808, "rewards/simpleverify_reward/std": 0.47905752062797546, "step": 433 }, { "clip_ratio/high_max": 0.0017762898933142424, "clip_ratio/high_mean": 0.0007695888434682274, "clip_ratio/low_mean": 0.0004996745956304949, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012692634627455845, "epoch": 4.167930029154519, "grad_norm": 0.1713247150182724, "learning_rate": 1e-06, "loss": -0.0389, "step": 434 }, { "clip_ratio/high_max": 0.0020635693763324525, "clip_ratio/high_mean": 0.0008318556683661882, "clip_ratio/low_mean": 0.000595302307374368, "clip_ratio/low_min": 3.1328319892054424e-05, "clip_ratio/region_mean": 0.001427157963917125, "epoch": 4.1772594752186585, "grad_norm": 0.1598271131515503, "learning_rate": 1e-06, "loss": 0.0059, "step": 435 }, { "clip_ratio/high_max": 0.0018895576722570695, "clip_ratio/high_mean": 0.0007557596873084549, "clip_ratio/low_mean": 0.0005899570969631895, "clip_ratio/low_min": 7.403040672215866e-05, "clip_ratio/region_mean": 0.0013457167806336656, "epoch": 4.186588921282799, "grad_norm": 0.16085202991962433, "learning_rate": 1e-06, "loss": 0.0051, "step": 436 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0560825892857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3466.0, "completions/mean_length": 776.200927734375, "completions/mean_terminated_length": 578.9559326171875, "completions/min_length": 122.0, "completions/min_terminated_length": 122.0, "epoch": 4.1959183673469385, "grad_norm": 0.1625591665506363, "learning_rate": 1e-06, "loss": 0.0019, "num_tokens": 262077502.0, "reward": 0.625, "reward_std": 0.16726596653461456, "rewards/simpleverify_reward/mean": 0.625, "rewards/simpleverify_reward/std": 0.4841904640197754, "step": 437 }, { "clip_ratio/high_max": 0.0015266390437318478, "clip_ratio/high_mean": 0.0006601621807931224, "clip_ratio/low_mean": 0.00046222445780585986, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011223866531508975, "epoch": 4.205247813411079, "grad_norm": 0.1405790001153946, "learning_rate": 1e-06, "loss": -0.0476, "step": 438 }, { "clip_ratio/high_max": 0.00178735962981591, "clip_ratio/high_mean": 0.0006470835760410409, "clip_ratio/low_mean": 0.0005345592323919846, "clip_ratio/low_min": 1.1627907042566221e-05, "clip_ratio/region_mean": 0.0011816428523161449, "epoch": 4.214577259475218, "grad_norm": 0.1596137434244156, "learning_rate": 1e-06, "loss": -0.0074, "step": 439 }, { "clip_ratio/high_max": 0.0020581719509209506, "clip_ratio/high_mean": 0.000752911311792559, "clip_ratio/low_mean": 0.0005543289480556268, "clip_ratio/low_min": 1.7198679415741935e-05, "clip_ratio/region_mean": 0.0013072402725811116, "epoch": 4.223906705539359, "grad_norm": 0.1790420562028885, "learning_rate": 1e-06, "loss": -0.0252, "step": 440 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.05859375, "completions/max_length": 4096.0, "completions/max_terminated_length": 4028.0, "completions/mean_length": 792.8965454101562, "completions/mean_terminated_length": 587.3091430664062, "completions/min_length": 103.0, "completions/min_terminated_length": 103.0, "epoch": 4.233236151603498, "grad_norm": 0.15928108990192413, "learning_rate": 1e-06, "loss": -0.0223, "num_tokens": 264419547.0, "reward": 0.6383928656578064, "reward_std": 0.1556195318698883, "rewards/simpleverify_reward/mean": 0.6383928656578064, "rewards/simpleverify_reward/std": 0.48053285479545593, "step": 441 }, { "clip_ratio/high_max": 0.0013958967429061886, "clip_ratio/high_mean": 0.0005795386150566628, "clip_ratio/low_mean": 0.00043241151888651075, "clip_ratio/low_min": 1.2613521903404035e-05, "clip_ratio/region_mean": 0.0010119501348526683, "epoch": 4.242565597667639, "grad_norm": 0.1355280876159668, "learning_rate": 1e-06, "loss": -0.0028, "step": 442 }, { "clip_ratio/high_max": 0.0017295235593337566, "clip_ratio/high_mean": 0.0007008741204117541, "clip_ratio/low_mean": 0.00040665567235009803, "clip_ratio/low_min": 1.1819212886621244e-05, "clip_ratio/region_mean": 0.0011075297770730685, "epoch": 4.251895043731778, "grad_norm": 0.15423093736171722, "learning_rate": 1e-06, "loss": -0.0064, "step": 443 }, { "clip_ratio/high_max": 0.001876028094557114, "clip_ratio/high_mean": 0.0007014514321781462, "clip_ratio/low_mean": 0.0004703798567788908, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011718312744051218, "epoch": 4.261224489795918, "grad_norm": 0.1550929993391037, "learning_rate": 1e-06, "loss": -0.0164, "step": 444 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0638950892857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 4018.0, "completions/mean_length": 821.0848388671875, "completions/mean_terminated_length": 597.5511474609375, "completions/min_length": 146.0, "completions/min_terminated_length": 146.0, "epoch": 4.270553935860058, "grad_norm": 0.18370363116264343, "learning_rate": 1e-06, "loss": -0.04, "num_tokens": 266784635.0, "reward": 0.6051897406578064, "reward_std": 0.166532963514328, "rewards/simpleverify_reward/mean": 0.6051897406578064, "rewards/simpleverify_reward/std": 0.4888781011104584, "step": 445 }, { "clip_ratio/high_max": 0.001819154331315076, "clip_ratio/high_mean": 0.0006047451629456191, "clip_ratio/low_mean": 0.0004999852676519367, "clip_ratio/low_min": 2.1136287614353932e-05, "clip_ratio/region_mean": 0.0011047304251405876, "epoch": 4.279883381924198, "grad_norm": 0.1549869179725647, "learning_rate": 1e-06, "loss": -0.0368, "step": 446 }, { "clip_ratio/high_max": 0.0017878524922707584, "clip_ratio/high_mean": 0.0006844689869467402, "clip_ratio/low_mean": 0.00046923147874622373, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011537004720594268, "epoch": 4.289212827988338, "grad_norm": 0.17443861067295074, "learning_rate": 1e-06, "loss": -0.0682, "step": 447 }, { "clip_ratio/high_max": 0.001769374537616386, "clip_ratio/high_mean": 0.0006662323808086512, "clip_ratio/low_mean": 0.0006291747540672077, "clip_ratio/low_min": 3.2879397622309625e-05, "clip_ratio/region_mean": 0.0012954071426065639, "epoch": 4.298542274052478, "grad_norm": 0.1622665375471115, "learning_rate": 1e-06, "loss": 0.0405, "step": 448 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0571986607142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3931.0, "completions/mean_length": 781.6596069335938, "completions/mean_terminated_length": 580.5824584960938, "completions/min_length": 113.0, "completions/min_terminated_length": 113.0, "epoch": 4.307871720116618, "grad_norm": 0.17257143557071686, "learning_rate": 1e-06, "loss": 0.0048, "num_tokens": 269109775.0, "reward": 0.6174665689468384, "reward_std": 0.17003601789474487, "rewards/simpleverify_reward/mean": 0.6174665093421936, "rewards/simpleverify_reward/std": 0.4860736131668091, "step": 449 }, { "clip_ratio/high_max": 0.0019366961460036691, "clip_ratio/high_mean": 0.0007508508451792295, "clip_ratio/low_mean": 0.0004946251783621847, "clip_ratio/low_min": 1.4884496522427071e-05, "clip_ratio/region_mean": 0.00124547603627434, "epoch": 4.317201166180758, "grad_norm": 0.18392397463321686, "learning_rate": 1e-06, "loss": -0.0124, "step": 450 }, { "clip_ratio/high_max": 0.0016112104895000812, "clip_ratio/high_mean": 0.0006982994473219151, "clip_ratio/low_mean": 0.0005165143029444152, "clip_ratio/low_min": 1.4859724615234882e-05, "clip_ratio/region_mean": 0.0012148137793701608, "epoch": 4.326530612244898, "grad_norm": 0.1385180801153183, "learning_rate": 1e-06, "loss": -0.0212, "step": 451 }, { "clip_ratio/high_max": 0.0019618884616647847, "clip_ratio/high_mean": 0.0007434283834300004, "clip_ratio/low_mean": 0.0004479230201468454, "clip_ratio/low_min": 1.6503829101566225e-05, "clip_ratio/region_mean": 0.0011913514063053299, "epoch": 4.335860058309038, "grad_norm": 0.16442346572875977, "learning_rate": 1e-06, "loss": -0.0426, "step": 452 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0479910714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3906.0, "completions/mean_length": 723.0287475585938, "completions/mean_terminated_length": 552.9961547851562, "completions/min_length": 125.0, "completions/min_terminated_length": 125.0, "epoch": 4.345189504373177, "grad_norm": 0.16835449635982513, "learning_rate": 1e-06, "loss": -0.0173, "num_tokens": 271344278.0, "reward": 0.6643415689468384, "reward_std": 0.15220901370048523, "rewards/simpleverify_reward/mean": 0.6643415093421936, "rewards/simpleverify_reward/std": 0.47228604555130005, "step": 453 }, { "clip_ratio/high_max": 0.0018229342240374535, "clip_ratio/high_mean": 0.0008269524532806827, "clip_ratio/low_mean": 0.0004218065087115974, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012487589556258172, "epoch": 4.354518950437318, "grad_norm": 0.17263202369213104, "learning_rate": 1e-06, "loss": -0.0136, "step": 454 }, { "clip_ratio/high_max": 0.0015906876942608505, "clip_ratio/high_mean": 0.0006529083375426126, "clip_ratio/low_mean": 0.0003650285343610449, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010179368837270886, "epoch": 4.363848396501457, "grad_norm": 0.159929558634758, "learning_rate": 1e-06, "loss": -0.0385, "step": 455 }, { "clip_ratio/high_max": 0.0017552264289406594, "clip_ratio/high_mean": 0.000716193537300569, "clip_ratio/low_mean": 0.00046633646206828416, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011825299843621906, "epoch": 4.373177842565598, "grad_norm": 0.15799127519130707, "learning_rate": 1e-06, "loss": 0.0093, "step": 456 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0689174107142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 4088.0, "completions/mean_length": 827.1336669921875, "completions/mean_terminated_length": 585.1768188476562, "completions/min_length": 131.0, "completions/min_terminated_length": 131.0, "epoch": 4.382507288629737, "grad_norm": 0.16509738564491272, "learning_rate": 1e-06, "loss": 0.0188, "num_tokens": 273672029.0, "reward": 0.6135603189468384, "reward_std": 0.15460233390331268, "rewards/simpleverify_reward/mean": 0.6135602593421936, "rewards/simpleverify_reward/std": 0.4870012700557709, "step": 457 }, { "clip_ratio/high_max": 0.0016824231388454791, "clip_ratio/high_mean": 0.0006363692427839851, "clip_ratio/low_mean": 0.0004770959512825357, "clip_ratio/low_min": 3.4780190617311746e-05, "clip_ratio/region_mean": 0.0011134652158943936, "epoch": 4.391836734693878, "grad_norm": 0.17502401769161224, "learning_rate": 1e-06, "loss": -0.039, "step": 458 }, { "clip_ratio/high_max": 0.0021970111411064863, "clip_ratio/high_mean": 0.0007737250361969927, "clip_ratio/low_mean": 0.0004727707300844486, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012464957835618407, "epoch": 4.401166180758017, "grad_norm": 0.16904127597808838, "learning_rate": 1e-06, "loss": -0.0563, "step": 459 }, { "clip_ratio/high_max": 0.001470122042519506, "clip_ratio/high_mean": 0.000577422145397577, "clip_ratio/low_mean": 0.0004953699599354877, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010727920962381177, "epoch": 4.410495626822158, "grad_norm": 0.14105382561683655, "learning_rate": 1e-06, "loss": -0.0077, "step": 460 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0636160714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3947.0, "completions/mean_length": 806.2918701171875, "completions/mean_terminated_length": 582.7955932617188, "completions/min_length": 108.0, "completions/min_terminated_length": 108.0, "epoch": 4.419825072886297, "grad_norm": 0.17948602139949799, "learning_rate": 1e-06, "loss": -0.0319, "num_tokens": 275990939.0, "reward": 0.6163504719734192, "reward_std": 0.17113545536994934, "rewards/simpleverify_reward/mean": 0.6163504719734192, "rewards/simpleverify_reward/std": 0.48634201288223267, "step": 461 }, { "clip_ratio/high_max": 0.0019584093715820927, "clip_ratio/high_mean": 0.0007783609744365094, "clip_ratio/low_mean": 0.0005458138257381506, "clip_ratio/low_min": 1.2230920219735708e-05, "clip_ratio/region_mean": 0.0013241747692518402, "epoch": 4.429154518950437, "grad_norm": 0.18692326545715332, "learning_rate": 1e-06, "loss": -0.027, "step": 462 }, { "clip_ratio/high_max": 0.0023554015679110307, "clip_ratio/high_mean": 0.000786347078246763, "clip_ratio/low_mean": 0.0005615844720523455, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013479315603035502, "epoch": 4.438483965014577, "grad_norm": 0.18051128089427948, "learning_rate": 1e-06, "loss": -0.0028, "step": 463 }, { "clip_ratio/high_max": 0.0017112216664827429, "clip_ratio/high_mean": 0.0006975450269237626, "clip_ratio/low_mean": 0.0005394989275373518, "clip_ratio/low_min": 3.585085869417526e-05, "clip_ratio/region_mean": 0.0012370439762889873, "epoch": 4.447813411078717, "grad_norm": 0.18073555827140808, "learning_rate": 1e-06, "loss": -0.0074, "step": 464 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0666852678571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 4041.0, "completions/mean_length": 823.5042114257812, "completions/mean_terminated_length": 589.6846313476562, "completions/min_length": 122.0, "completions/min_terminated_length": 122.0, "epoch": 4.457142857142857, "grad_norm": 0.17046238481998444, "learning_rate": 1e-06, "loss": -0.0231, "num_tokens": 278321378.0, "reward": 0.6414620876312256, "reward_std": 0.1632375568151474, "rewards/simpleverify_reward/mean": 0.6414620280265808, "rewards/simpleverify_reward/std": 0.47963806986808777, "step": 465 }, { "clip_ratio/high_max": 0.0019202635521651246, "clip_ratio/high_mean": 0.0007743567221041303, "clip_ratio/low_mean": 0.00043936525980825536, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012137220001022797, "epoch": 4.466472303206997, "grad_norm": 0.1488325297832489, "learning_rate": 1e-06, "loss": -0.0454, "step": 466 }, { "clip_ratio/high_max": 0.0016387407631555106, "clip_ratio/high_mean": 0.0006996799947955878, "clip_ratio/low_mean": 0.00044457677267928375, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011442567774793133, "epoch": 4.475801749271137, "grad_norm": 0.1506938636302948, "learning_rate": 1e-06, "loss": -0.0111, "step": 467 }, { "clip_ratio/high_max": 0.0017838676903920714, "clip_ratio/high_mean": 0.0007482338587578852, "clip_ratio/low_mean": 0.0005135449530371261, "clip_ratio/low_min": 1.0738831406342797e-05, "clip_ratio/region_mean": 0.0012617788270290475, "epoch": 4.485131195335277, "grad_norm": 0.1549980193376541, "learning_rate": 1e-06, "loss": -0.0218, "step": 468 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0647321428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 4086.0, "completions/mean_length": 799.0039672851562, "completions/mean_terminated_length": 570.8108520507812, "completions/min_length": 124.0, "completions/min_terminated_length": 124.0, "epoch": 4.494460641399417, "grad_norm": 0.1409902572631836, "learning_rate": 1e-06, "loss": -0.0212, "num_tokens": 280576216.0, "reward": 0.630859375, "reward_std": 0.1381639540195465, "rewards/simpleverify_reward/mean": 0.630859375, "rewards/simpleverify_reward/std": 0.4826394319534302, "step": 469 }, { "clip_ratio/high_max": 0.0016942859620030504, "clip_ratio/high_mean": 0.0005648482838296331, "clip_ratio/low_mean": 0.00044805930701841135, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010129076072189491, "epoch": 4.503790087463557, "grad_norm": 0.14243486523628235, "learning_rate": 1e-06, "loss": -0.0006, "step": 470 }, { "clip_ratio/high_max": 0.0016794254261185415, "clip_ratio/high_mean": 0.0006308445072136237, "clip_ratio/low_mean": 0.0003216693421563832, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009525138384560705, "epoch": 4.513119533527696, "grad_norm": 0.1521860808134079, "learning_rate": 1e-06, "loss": -0.034, "step": 471 }, { "clip_ratio/high_max": 0.0017588110677024815, "clip_ratio/high_mean": 0.0005770983661932405, "clip_ratio/low_mean": 0.0004511466131589259, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001028244962071767, "epoch": 4.522448979591837, "grad_norm": 0.15094251930713654, "learning_rate": 1e-06, "loss": 0.0051, "step": 472 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0541294642857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3630.0, "completions/mean_length": 763.6389770507812, "completions/mean_terminated_length": 572.9374389648438, "completions/min_length": 136.0, "completions/min_terminated_length": 136.0, "epoch": 4.531778425655976, "grad_norm": 0.15289786458015442, "learning_rate": 1e-06, "loss": 0.0106, "num_tokens": 282861610.0, "reward": 0.6584821939468384, "reward_std": 0.14216618239879608, "rewards/simpleverify_reward/mean": 0.6584821343421936, "rewards/simpleverify_reward/std": 0.47428491711616516, "step": 473 }, { "clip_ratio/high_max": 0.0016475709126098081, "clip_ratio/high_mean": 0.0005964512492937502, "clip_ratio/low_mean": 0.0003697634188029042, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009662146658229176, "epoch": 4.541107871720117, "grad_norm": 0.1814448982477188, "learning_rate": 1e-06, "loss": -0.0102, "step": 474 }, { "clip_ratio/high_max": 0.0018393037789792288, "clip_ratio/high_mean": 0.0007131470238164184, "clip_ratio/low_mean": 0.00040759869352768874, "clip_ratio/low_min": 1.0860121619771235e-05, "clip_ratio/region_mean": 0.0011207457064301707, "epoch": 4.550437317784256, "grad_norm": 0.1481669694185257, "learning_rate": 1e-06, "loss": -0.0456, "step": 475 }, { "clip_ratio/high_max": 0.0016047709505073726, "clip_ratio/high_mean": 0.0006100727405282669, "clip_ratio/low_mean": 0.0003834084895970591, "clip_ratio/low_min": 1.1542012543941382e-05, "clip_ratio/region_mean": 0.0009934812296705786, "epoch": 4.559766763848397, "grad_norm": 0.14176668226718903, "learning_rate": 1e-06, "loss": -0.0184, "step": 476 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0599888392857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3823.0, "completions/mean_length": 796.6350708007812, "completions/mean_terminated_length": 586.0789184570312, "completions/min_length": 83.0, "completions/min_terminated_length": 83.0, "epoch": 4.569096209912536, "grad_norm": 0.16055330634117126, "learning_rate": 1e-06, "loss": 0.017, "num_tokens": 285201246.0, "reward": 0.6319754719734192, "reward_std": 0.1568402349948883, "rewards/simpleverify_reward/mean": 0.6319754719734192, "rewards/simpleverify_reward/std": 0.48233532905578613, "step": 477 }, { "clip_ratio/high_max": 0.001711064724077005, "clip_ratio/high_mean": 0.0006581487386938534, "clip_ratio/low_mean": 0.0004293446745577967, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010874934159801342, "epoch": 4.578425655976677, "grad_norm": 0.2273939996957779, "learning_rate": 1e-06, "loss": -0.0434, "step": 478 }, { "clip_ratio/high_max": 0.001937008448294364, "clip_ratio/high_mean": 0.0006951656746423396, "clip_ratio/low_mean": 0.0003929034305656387, "clip_ratio/low_min": 1.2918560969410464e-05, "clip_ratio/region_mean": 0.001088069097022526, "epoch": 4.587755102040816, "grad_norm": 0.16511285305023193, "learning_rate": 1e-06, "loss": -0.0497, "step": 479 }, { "clip_ratio/high_max": 0.001626110497454647, "clip_ratio/high_mean": 0.0006513912117043219, "clip_ratio/low_mean": 0.0005444948112653947, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011958860304730479, "epoch": 4.597084548104956, "grad_norm": 0.15174348652362823, "learning_rate": 1e-06, "loss": -0.0169, "step": 480 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0666852678571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 4029.0, "completions/mean_length": 806.5734252929688, "completions/mean_terminated_length": 571.5441284179688, "completions/min_length": 128.0, "completions/min_terminated_length": 128.0, "epoch": 4.606413994169096, "grad_norm": 0.18017293512821198, "learning_rate": 1e-06, "loss": -0.0347, "num_tokens": 287455629.0, "reward": 0.634765625, "reward_std": 0.16694959998130798, "rewards/simpleverify_reward/mean": 0.634765625, "rewards/simpleverify_reward/std": 0.481563001871109, "step": 481 }, { "clip_ratio/high_max": 0.0016363070026272908, "clip_ratio/high_mean": 0.000620061031440855, "clip_ratio/low_mean": 0.0005493105563800782, "clip_ratio/low_min": 1.1823684872069862e-05, "clip_ratio/region_mean": 0.001169371582363965, "epoch": 4.615743440233236, "grad_norm": 0.16212166845798492, "learning_rate": 1e-06, "loss": -0.0263, "step": 482 }, { "clip_ratio/high_max": 0.0016550270847801585, "clip_ratio/high_mean": 0.000658214765280718, "clip_ratio/low_mean": 0.0005530433027161052, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012112580952816643, "epoch": 4.625072886297376, "grad_norm": 0.19823713600635529, "learning_rate": 1e-06, "loss": -0.0305, "step": 483 }, { "clip_ratio/high_max": 0.001748654161929153, "clip_ratio/high_mean": 0.0006819641184847569, "clip_ratio/low_mean": 0.0005694148455859249, "clip_ratio/low_min": 2.4945616132754367e-05, "clip_ratio/region_mean": 0.0012513789406511933, "epoch": 4.634402332361516, "grad_norm": 0.1694829761981964, "learning_rate": 1e-06, "loss": 0.0061, "step": 484 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0675223214285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3852.0, "completions/mean_length": 807.7447509765625, "completions/mean_terminated_length": 569.6364135742188, "completions/min_length": 115.0, "completions/min_terminated_length": 115.0, "epoch": 4.643731778425656, "grad_norm": 0.17225079238414764, "learning_rate": 1e-06, "loss": -0.0228, "num_tokens": 289713898.0, "reward": 0.6498326063156128, "reward_std": 0.15560904145240784, "rewards/simpleverify_reward/mean": 0.6498326063156128, "rewards/simpleverify_reward/std": 0.4770887792110443, "step": 485 }, { "clip_ratio/high_max": 0.0018281830380146857, "clip_ratio/high_mean": 0.0006607974692087737, "clip_ratio/low_mean": 0.0005245961265245569, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011853936048282776, "epoch": 4.653061224489796, "grad_norm": 0.16889061033725739, "learning_rate": 1e-06, "loss": -0.0212, "step": 486 }, { "clip_ratio/high_max": 0.0017574783814779948, "clip_ratio/high_mean": 0.0006251507784327259, "clip_ratio/low_mean": 0.00046523190030711703, "clip_ratio/low_min": 2.171269807149656e-05, "clip_ratio/region_mean": 0.0010903826769208536, "epoch": 4.662390670553936, "grad_norm": 0.15160730481147766, "learning_rate": 1e-06, "loss": -0.057, "step": 487 }, { "clip_ratio/high_max": 0.001700678843917558, "clip_ratio/high_mean": 0.0006102727966208477, "clip_ratio/low_mean": 0.00043213086792093236, "clip_ratio/low_min": 2.847072664735606e-05, "clip_ratio/region_mean": 0.0010424036663607694, "epoch": 4.671720116618076, "grad_norm": 0.1467406451702118, "learning_rate": 1e-06, "loss": -0.0435, "step": 488 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.072265625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3853.0, "completions/mean_length": 846.8736572265625, "completions/mean_terminated_length": 593.7837524414062, "completions/min_length": 118.0, "completions/min_terminated_length": 118.0, "epoch": 4.681049562682215, "grad_norm": 0.1671878844499588, "learning_rate": 1e-06, "loss": -0.0069, "num_tokens": 292067877.0, "reward": 0.5959821939468384, "reward_std": 0.16265968978405, "rewards/simpleverify_reward/mean": 0.5959821343421936, "rewards/simpleverify_reward/std": 0.4907694160938263, "step": 489 }, { "clip_ratio/high_max": 0.0017800632049329579, "clip_ratio/high_mean": 0.000645394109596964, "clip_ratio/low_mean": 0.0005155562212166842, "clip_ratio/low_min": 1.059501573763555e-05, "clip_ratio/region_mean": 0.0011609503380896058, "epoch": 4.690379008746356, "grad_norm": 0.1820664256811142, "learning_rate": 1e-06, "loss": -0.0243, "step": 490 }, { "clip_ratio/high_max": 0.001451982228900306, "clip_ratio/high_mean": 0.00048603458617435535, "clip_ratio/low_mean": 0.0005943237947576563, "clip_ratio/low_min": 2.7072765078628436e-05, "clip_ratio/region_mean": 0.0010803583863889799, "epoch": 4.699708454810495, "grad_norm": 0.14541809260845184, "learning_rate": 1e-06, "loss": -0.0112, "step": 491 }, { "clip_ratio/high_max": 0.0018098521359206643, "clip_ratio/high_mean": 0.000712790553279774, "clip_ratio/low_mean": 0.0005242238667051424, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012370144067972433, "epoch": 4.709037900874636, "grad_norm": 0.1725468635559082, "learning_rate": 1e-06, "loss": -0.0486, "step": 492 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0700334821428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3976.0, "completions/mean_length": 824.8197631835938, "completions/mean_terminated_length": 578.4752197265625, "completions/min_length": 130.0, "completions/min_terminated_length": 130.0, "epoch": 4.718367346938775, "grad_norm": 0.16549234092235565, "learning_rate": 1e-06, "loss": -0.0463, "num_tokens": 294373039.0, "reward": 0.6015625, "reward_std": 0.15264755487442017, "rewards/simpleverify_reward/mean": 0.6015625, "rewards/simpleverify_reward/std": 0.48964470624923706, "step": 493 }, { "clip_ratio/high_max": 0.002090883848723024, "clip_ratio/high_mean": 0.0008752855937927961, "clip_ratio/low_mean": 0.0004054106766488985, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012806962695322, "epoch": 4.727696793002916, "grad_norm": 0.16990157961845398, "learning_rate": 1e-06, "loss": -0.0558, "step": 494 }, { "clip_ratio/high_max": 0.001590513449627906, "clip_ratio/high_mean": 0.0006046717126082513, "clip_ratio/low_mean": 0.0005121511876495788, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011168229175382294, "epoch": 4.737026239067055, "grad_norm": 0.15452304482460022, "learning_rate": 1e-06, "loss": 0.0123, "step": 495 }, { "clip_ratio/high_max": 0.0016474544354423415, "clip_ratio/high_mean": 0.0006577183930858155, "clip_ratio/low_mean": 0.0005060788575974584, "clip_ratio/low_min": 3.056159948755521e-05, "clip_ratio/region_mean": 0.0011637972311291378, "epoch": 4.746355685131196, "grad_norm": 0.1446140557527542, "learning_rate": 1e-06, "loss": -0.0128, "step": 496 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0611049107142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3985.0, "completions/mean_length": 784.6869506835938, "completions/mean_terminated_length": 569.1809692382812, "completions/min_length": 146.0, "completions/min_terminated_length": 146.0, "epoch": 4.755685131195335, "grad_norm": 0.16506457328796387, "learning_rate": 1e-06, "loss": -0.0168, "num_tokens": 296654589.0, "reward": 0.6629464626312256, "reward_std": 0.15868228673934937, "rewards/simpleverify_reward/mean": 0.6629464030265808, "rewards/simpleverify_reward/std": 0.4727693498134613, "step": 497 }, { "clip_ratio/high_max": 0.0016336544213118032, "clip_ratio/high_mean": 0.000628269662229286, "clip_ratio/low_mean": 0.000538008482180885, "clip_ratio/low_min": 1.716561382636428e-05, "clip_ratio/region_mean": 0.001166278150776634, "epoch": 4.765014577259475, "grad_norm": 0.16749195754528046, "learning_rate": 1e-06, "loss": 0.0048, "step": 498 }, { "clip_ratio/high_max": 0.0016753753006923944, "clip_ratio/high_mean": 0.0006900196549395332, "clip_ratio/low_mean": 0.0004772403244714951, "clip_ratio/low_min": 2.4543333893234376e-05, "clip_ratio/region_mean": 0.0011672599321173038, "epoch": 4.774344023323615, "grad_norm": 0.1594279408454895, "learning_rate": 1e-06, "loss": -0.0284, "step": 499 }, { "clip_ratio/high_max": 0.0019788632926065475, "clip_ratio/high_mean": 0.000834638256492326, "clip_ratio/low_mean": 0.0004795458453372703, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013141840972821228, "epoch": 4.783673469387755, "grad_norm": 0.1730535924434662, "learning_rate": 1e-06, "loss": -0.0394, "step": 500 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0711495535714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3730.0, "completions/mean_length": 833.8058471679688, "completions/mean_terminated_length": 583.923095703125, "completions/min_length": 115.0, "completions/min_terminated_length": 115.0, "epoch": 4.793002915451895, "grad_norm": 0.17397665977478027, "learning_rate": 1e-06, "loss": 0.0014, "num_tokens": 298962541.0, "reward": 0.6088169813156128, "reward_std": 0.1732492595911026, "rewards/simpleverify_reward/mean": 0.6088169813156128, "rewards/simpleverify_reward/std": 0.4880833327770233, "step": 501 }, { "clip_ratio/high_max": 0.0018363112430961337, "clip_ratio/high_mean": 0.0007852283961256035, "clip_ratio/low_mean": 0.0005944966187598766, "clip_ratio/low_min": 2.053219395747874e-05, "clip_ratio/region_mean": 0.001379725057631731, "epoch": 4.802332361516035, "grad_norm": 0.1594071388244629, "learning_rate": 1e-06, "loss": -0.0451, "step": 502 }, { "clip_ratio/high_max": 0.0024349970335606486, "clip_ratio/high_mean": 0.0007892740850365954, "clip_ratio/low_mean": 0.00047703678501420654, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012663108864217065, "epoch": 4.811661807580175, "grad_norm": 0.17455720901489258, "learning_rate": 1e-06, "loss": -0.0142, "step": 503 }, { "clip_ratio/high_max": 0.0017092355683416827, "clip_ratio/high_mean": 0.0006282927934080362, "clip_ratio/low_mean": 0.0005937968144280603, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001222089591465192, "epoch": 4.820991253644315, "grad_norm": 0.18474893271923065, "learning_rate": 1e-06, "loss": -0.0265, "step": 504 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0725446428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3825.0, "completions/mean_length": 848.1721801757812, "completions/mean_terminated_length": 594.1303100585938, "completions/min_length": 109.0, "completions/min_terminated_length": 109.0, "epoch": 4.830320699708455, "grad_norm": 0.1464848518371582, "learning_rate": 1e-06, "loss": -0.0154, "num_tokens": 301279550.0, "reward": 0.6116071939468384, "reward_std": 0.14221274852752686, "rewards/simpleverify_reward/mean": 0.6116071343421936, "rewards/simpleverify_reward/std": 0.4874527156352997, "step": 505 }, { "clip_ratio/high_max": 0.0014580070674128365, "clip_ratio/high_mean": 0.0005691684300472843, "clip_ratio/low_mean": 0.00047314550192822935, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010423139356134925, "epoch": 4.839650145772595, "grad_norm": 0.1745438277721405, "learning_rate": 1e-06, "loss": -0.0225, "step": 506 }, { "clip_ratio/high_max": 0.001409866294125095, "clip_ratio/high_mean": 0.0005082495536044007, "clip_ratio/low_mean": 0.0005924450961174443, "clip_ratio/low_min": 2.4126617063302547e-05, "clip_ratio/region_mean": 0.0011006946479028556, "epoch": 4.848979591836734, "grad_norm": 0.1804339587688446, "learning_rate": 1e-06, "loss": 0.0325, "step": 507 }, { "clip_ratio/high_max": 0.0020154405647190288, "clip_ratio/high_mean": 0.000697391804351355, "clip_ratio/low_mean": 0.0005949333003627544, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012923251015308779, "epoch": 4.858309037900875, "grad_norm": 0.14549504220485687, "learning_rate": 1e-06, "loss": -0.0279, "step": 508 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.058314732142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3936.0, "completions/mean_length": 772.7352294921875, "completions/mean_terminated_length": 566.93896484375, "completions/min_length": 137.0, "completions/min_terminated_length": 137.0, "epoch": 4.867638483965014, "grad_norm": 0.18545009195804596, "learning_rate": 1e-06, "loss": 0.0026, "num_tokens": 303558529.0, "reward": 0.645089328289032, "reward_std": 0.14416037499904633, "rewards/simpleverify_reward/mean": 0.6450892686843872, "rewards/simpleverify_reward/std": 0.47855302691459656, "step": 509 }, { "clip_ratio/high_max": 0.0017520904548291583, "clip_ratio/high_mean": 0.0007147005890146829, "clip_ratio/low_mean": 0.00043912383625865914, "clip_ratio/low_min": 1.8634465959621593e-05, "clip_ratio/region_mean": 0.0011538244289113209, "epoch": 4.876967930029155, "grad_norm": 0.41909289360046387, "learning_rate": 1e-06, "loss": -0.0384, "step": 510 }, { "clip_ratio/high_max": 0.0019077877295785584, "clip_ratio/high_mean": 0.0007473638561350526, "clip_ratio/low_mean": 0.00041528556675984873, "clip_ratio/low_min": 7.082955562509596e-06, "clip_ratio/region_mean": 0.0011626494051597547, "epoch": 4.886297376093294, "grad_norm": 0.2740522027015686, "learning_rate": 1e-06, "loss": -0.0508, "step": 511 }, { "clip_ratio/high_max": 0.0017902710278576706, "clip_ratio/high_mean": 0.0007105464155756636, "clip_ratio/low_mean": 0.0005105611162434798, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012211075263621751, "epoch": 4.895626822157435, "grad_norm": 0.1592327505350113, "learning_rate": 1e-06, "loss": 0.0046, "step": 512 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0786830357142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3833.0, "completions/mean_length": 890.0569458007812, "completions/mean_terminated_length": 616.2604370117188, "completions/min_length": 139.0, "completions/min_terminated_length": 139.0, "epoch": 4.904956268221574, "grad_norm": 0.16860119998455048, "learning_rate": 1e-06, "loss": -0.0109, "num_tokens": 305959909.0, "reward": 0.5920759439468384, "reward_std": 0.16322749853134155, "rewards/simpleverify_reward/mean": 0.5920758843421936, "rewards/simpleverify_reward/std": 0.49151745438575745, "step": 513 }, { "clip_ratio/high_max": 0.0017482600887888111, "clip_ratio/high_mean": 0.0007450438024534378, "clip_ratio/low_mean": 0.0004490707851800835, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011941146112803835, "epoch": 4.914285714285715, "grad_norm": 0.1644207388162613, "learning_rate": 1e-06, "loss": -0.0654, "step": 514 }, { "clip_ratio/high_max": 0.0017871362651931122, "clip_ratio/high_mean": 0.0006504295524791814, "clip_ratio/low_mean": 0.0004959479956596624, "clip_ratio/low_min": 1.6430072719231248e-05, "clip_ratio/region_mean": 0.0011463775335869286, "epoch": 4.923615160349854, "grad_norm": 0.1818559616804123, "learning_rate": 1e-06, "loss": -0.0107, "step": 515 }, { "clip_ratio/high_max": 0.001849125103035476, "clip_ratio/high_mean": 0.0007588699472762528, "clip_ratio/low_mean": 0.0005431335630419198, "clip_ratio/low_min": 1.9060667909798212e-05, "clip_ratio/region_mean": 0.0013020035330555402, "epoch": 4.932944606413994, "grad_norm": 0.16835439205169678, "learning_rate": 1e-06, "loss": -0.0307, "step": 516 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0627790178571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 4028.0, "completions/mean_length": 785.4707641601562, "completions/mean_terminated_length": 563.7174682617188, "completions/min_length": 107.0, "completions/min_terminated_length": 107.0, "epoch": 4.942274052478134, "grad_norm": 0.19936411082744598, "learning_rate": 1e-06, "loss": 0.0189, "num_tokens": 308187292.0, "reward": 0.6777344346046448, "reward_std": 0.1579391360282898, "rewards/simpleverify_reward/mean": 0.677734375, "rewards/simpleverify_reward/std": 0.46740928292274475, "step": 517 }, { "clip_ratio/high_max": 0.0017451446656195913, "clip_ratio/high_mean": 0.0006671811424894258, "clip_ratio/low_mean": 0.0005201925719120482, "clip_ratio/low_min": 2.6558548597677145e-05, "clip_ratio/region_mean": 0.0011873736984853167, "epoch": 4.9516034985422746, "grad_norm": 0.16927321255207062, "learning_rate": 1e-06, "loss": -0.0221, "step": 518 }, { "clip_ratio/high_max": 0.0017596554462215863, "clip_ratio/high_mean": 0.0007114917652870645, "clip_ratio/low_mean": 0.0004735882448585471, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011850800074171275, "epoch": 4.960932944606414, "grad_norm": 0.17465786635875702, "learning_rate": 1e-06, "loss": -0.0353, "step": 519 }, { "clip_ratio/high_max": 0.0018506885826354846, "clip_ratio/high_mean": 0.0007523213535023388, "clip_ratio/low_mean": 0.000490812831230869, "clip_ratio/low_min": 5.27871980011696e-05, "clip_ratio/region_mean": 0.0012431341892806813, "epoch": 4.970262390670554, "grad_norm": 0.15720783174037933, "learning_rate": 1e-06, "loss": -0.0645, "step": 520 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.083984375, "completions/max_length": 4096.0, "completions/max_terminated_length": 3918.0, "completions/mean_length": 884.3713989257812, "completions/mean_terminated_length": 589.9149780273438, "completions/min_length": 99.0, "completions/min_terminated_length": 99.0, "epoch": 5.0093294460641395, "grad_norm": 0.17047230899333954, "learning_rate": 1e-06, "loss": -0.0303, "num_tokens": 310466263.0, "reward": 0.627511203289032, "reward_std": 0.14933177828788757, "rewards/simpleverify_reward/mean": 0.6275111436843872, "rewards/simpleverify_reward/std": 0.4835350513458252, "step": 521 }, { "clip_ratio/high_max": 0.001955667274160078, "clip_ratio/high_mean": 0.0006694280600640923, "clip_ratio/low_mean": 0.0004651347890103352, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011345628663548268, "epoch": 5.01865889212828, "grad_norm": 0.15380945801734924, "learning_rate": 1e-06, "loss": -0.0312, "step": 522 }, { "clip_ratio/high_max": 0.0017127370338130277, "clip_ratio/high_mean": 0.0006110355325290584, "clip_ratio/low_mean": 0.00044808946859120624, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010591249811113812, "epoch": 5.0279883381924195, "grad_norm": 0.1480766236782074, "learning_rate": 1e-06, "loss": -0.0541, "step": 523 }, { "clip_ratio/high_max": 0.001740496518323198, "clip_ratio/high_mean": 0.0006830871489000856, "clip_ratio/low_mean": 0.0004891702910754248, "clip_ratio/low_min": 1.6386995412176475e-05, "clip_ratio/region_mean": 0.0011722574527084362, "epoch": 5.03731778425656, "grad_norm": 0.1687982976436615, "learning_rate": 1e-06, "loss": -0.0464, "step": 524 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.060546875, "completions/max_length": 4096.0, "completions/max_terminated_length": 3993.0, "completions/mean_length": 786.1392822265625, "completions/mean_terminated_length": 572.8218383789062, "completions/min_length": 109.0, "completions/min_terminated_length": 109.0, "epoch": 5.0466472303206995, "grad_norm": 0.20013578236103058, "learning_rate": 1e-06, "loss": -0.0438, "num_tokens": 312730978.0, "reward": 0.656808078289032, "reward_std": 0.1669861227273941, "rewards/simpleverify_reward/mean": 0.6568080186843872, "rewards/simpleverify_reward/std": 0.4748412072658539, "step": 525 }, { "clip_ratio/high_max": 0.0019370762165635824, "clip_ratio/high_mean": 0.0007461931327270577, "clip_ratio/low_mean": 0.0004376541633064335, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001183847325592069, "epoch": 5.05597667638484, "grad_norm": 0.1817643642425537, "learning_rate": 1e-06, "loss": -0.0626, "step": 526 }, { "clip_ratio/high_max": 0.0021404393592092674, "clip_ratio/high_mean": 0.0008244173986895476, "clip_ratio/low_mean": 0.0005027399649861763, "clip_ratio/low_min": 1.6191710528801195e-05, "clip_ratio/region_mean": 0.0013271573479869403, "epoch": 5.0653061224489795, "grad_norm": 0.205192431807518, "learning_rate": 1e-06, "loss": -0.0025, "step": 527 }, { "clip_ratio/high_max": 0.002198145415604813, "clip_ratio/high_mean": 0.000832049991004169, "clip_ratio/low_mean": 0.0006733552281730226, "clip_ratio/low_min": 1.4988009752414655e-05, "clip_ratio/region_mean": 0.0015054052310006227, "epoch": 5.07463556851312, "grad_norm": 0.1974824219942093, "learning_rate": 1e-06, "loss": -0.0009, "step": 528 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0647321428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 4081.0, "completions/mean_length": 805.4827270507812, "completions/mean_terminated_length": 577.7380981445312, "completions/min_length": 142.0, "completions/min_terminated_length": 142.0, "epoch": 5.0839650145772595, "grad_norm": 0.16723059117794037, "learning_rate": 1e-06, "loss": -0.0228, "num_tokens": 315033300.0, "reward": 0.6325334906578064, "reward_std": 0.15192611515522003, "rewards/simpleverify_reward/mean": 0.6325334906578064, "rewards/simpleverify_reward/std": 0.48218226432800293, "step": 529 }, { "clip_ratio/high_max": 0.0016032916319090873, "clip_ratio/high_mean": 0.000640082953395904, "clip_ratio/low_mean": 0.000429756568337325, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.00106983954901807, "epoch": 5.093294460641399, "grad_norm": 0.19978292286396027, "learning_rate": 1e-06, "loss": -0.0463, "step": 530 }, { "clip_ratio/high_max": 0.001967564836377278, "clip_ratio/high_mean": 0.0007006679570622509, "clip_ratio/low_mean": 0.0005326970422174782, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001233365008374676, "epoch": 5.1026239067055394, "grad_norm": 0.16195450723171234, "learning_rate": 1e-06, "loss": -0.0026, "step": 531 }, { "clip_ratio/high_max": 0.001773791380401235, "clip_ratio/high_mean": 0.0007210992062027799, "clip_ratio/low_mean": 0.0004256388401699951, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011467380427347962, "epoch": 5.111953352769679, "grad_norm": 0.16078218817710876, "learning_rate": 1e-06, "loss": -0.0483, "step": 532 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0638950892857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 4091.0, "completions/mean_length": 801.1615600585938, "completions/mean_terminated_length": 576.2679443359375, "completions/min_length": 115.0, "completions/min_terminated_length": 115.0, "epoch": 5.121282798833819, "grad_norm": 0.16979290544986725, "learning_rate": 1e-06, "loss": -0.0093, "num_tokens": 317327231.0, "reward": 0.6322544813156128, "reward_std": 0.1420711725950241, "rewards/simpleverify_reward/mean": 0.6322544813156128, "rewards/simpleverify_reward/std": 0.4822588860988617, "step": 533 }, { "clip_ratio/high_max": 0.0015332114126067609, "clip_ratio/high_mean": 0.0005610633215837879, "clip_ratio/low_mean": 0.0005109396361149265, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010720029495132621, "epoch": 5.130612244897959, "grad_norm": 0.18899470567703247, "learning_rate": 1e-06, "loss": -0.006, "step": 534 }, { "clip_ratio/high_max": 0.001572541834320873, "clip_ratio/high_mean": 0.0006039745221642079, "clip_ratio/low_mean": 0.00033990416045526217, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009438786873943172, "epoch": 5.139941690962099, "grad_norm": 0.17636209726333618, "learning_rate": 1e-06, "loss": -0.0275, "step": 535 }, { "clip_ratio/high_max": 0.0013353214053495321, "clip_ratio/high_mean": 0.000494846821311512, "clip_ratio/low_mean": 0.0004114320390726789, "clip_ratio/low_min": 1.2760310710291378e-05, "clip_ratio/region_mean": 0.0009062788594746962, "epoch": 5.149271137026239, "grad_norm": 0.13246281445026398, "learning_rate": 1e-06, "loss": -0.0515, "step": 536 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0655691964285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3994.0, "completions/mean_length": 832.9512329101562, "completions/mean_terminated_length": 603.9823608398438, "completions/min_length": 135.0, "completions/min_terminated_length": 135.0, "epoch": 5.158600583090379, "grad_norm": 0.18116618692874908, "learning_rate": 1e-06, "loss": -0.0927, "num_tokens": 319696792.0, "reward": 0.6456473469734192, "reward_std": 0.1588129997253418, "rewards/simpleverify_reward/mean": 0.6456473469734192, "rewards/simpleverify_reward/std": 0.4783834218978882, "step": 537 }, { "clip_ratio/high_max": 0.0015995923022273928, "clip_ratio/high_mean": 0.0006100470818637405, "clip_ratio/low_mean": 0.0004792993604496587, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001089346464141272, "epoch": 5.167930029154519, "grad_norm": 0.15331891179084778, "learning_rate": 1e-06, "loss": -0.003, "step": 538 }, { "clip_ratio/high_max": 0.0019568572788557503, "clip_ratio/high_mean": 0.000759633352572564, "clip_ratio/low_mean": 0.0004491568843150162, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012087902396160644, "epoch": 5.1772594752186585, "grad_norm": 0.16631989181041718, "learning_rate": 1e-06, "loss": -0.0363, "step": 539 }, { "clip_ratio/high_max": 0.0014886765129631385, "clip_ratio/high_mean": 0.0005932464737270493, "clip_ratio/low_mean": 0.0005202244815336599, "clip_ratio/low_min": 1.1533493307069875e-05, "clip_ratio/region_mean": 0.0011134709675388876, "epoch": 5.186588921282799, "grad_norm": 0.16802409291267395, "learning_rate": 1e-06, "loss": 0.0113, "step": 540 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0848214285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 4079.0, "completions/mean_length": 882.8652954101562, "completions/mean_terminated_length": 585.0625, "completions/min_length": 117.0, "completions/min_terminated_length": 117.0, "epoch": 5.1959183673469385, "grad_norm": 0.20843741297721863, "learning_rate": 1e-06, "loss": -0.0272, "num_tokens": 321976717.0, "reward": 0.6618303656578064, "reward_std": 0.1619601845741272, "rewards/simpleverify_reward/mean": 0.6618303656578064, "rewards/simpleverify_reward/std": 0.47315263748168945, "step": 541 }, { "clip_ratio/high_max": 0.0020606734324246645, "clip_ratio/high_mean": 0.0007219298186100787, "clip_ratio/low_mean": 0.00044256585351831745, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011644956612144597, "epoch": 5.205247813411079, "grad_norm": 0.17509101331233978, "learning_rate": 1e-06, "loss": -0.0413, "step": 542 }, { "clip_ratio/high_max": 0.0020411370969668496, "clip_ratio/high_mean": 0.0006745372238583514, "clip_ratio/low_mean": 0.0005398414778028382, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001214378688018769, "epoch": 5.214577259475218, "grad_norm": 0.3389990031719208, "learning_rate": 1e-06, "loss": -0.0209, "step": 543 }, { "clip_ratio/high_max": 0.0018388565731584094, "clip_ratio/high_mean": 0.0007249668997246772, "clip_ratio/low_mean": 0.000492347087856615, "clip_ratio/low_min": 1.052011430147104e-05, "clip_ratio/region_mean": 0.0012173140203231014, "epoch": 5.223906705539359, "grad_norm": 0.16252154111862183, "learning_rate": 1e-06, "loss": -0.0543, "step": 544 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0797991071428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 4005.0, "completions/mean_length": 858.257568359375, "completions/mean_terminated_length": 577.4830322265625, "completions/min_length": 126.0, "completions/min_terminated_length": 126.0, "epoch": 5.233236151603498, "grad_norm": 0.2006680965423584, "learning_rate": 1e-06, "loss": -0.0952, "num_tokens": 324232696.0, "reward": 0.627511203289032, "reward_std": 0.1555141806602478, "rewards/simpleverify_reward/mean": 0.6275111436843872, "rewards/simpleverify_reward/std": 0.4835350513458252, "step": 545 }, { "clip_ratio/high_max": 0.0020282839541323483, "clip_ratio/high_mean": 0.0007134952829801477, "clip_ratio/low_mean": 0.0004866696444878471, "clip_ratio/low_min": 2.1540581656154245e-05, "clip_ratio/region_mean": 0.0012001649265585002, "epoch": 5.242565597667639, "grad_norm": 0.20588167011737823, "learning_rate": 1e-06, "loss": -0.0049, "step": 546 }, { "clip_ratio/high_max": 0.0016589477600064129, "clip_ratio/high_mean": 0.0005730121538363164, "clip_ratio/low_mean": 0.000672330845191027, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012453430026653223, "epoch": 5.251895043731778, "grad_norm": 0.18625015020370483, "learning_rate": 1e-06, "loss": 0.0301, "step": 547 }, { "clip_ratio/high_max": 0.0015911375485302415, "clip_ratio/high_mean": 0.0006710776651743799, "clip_ratio/low_mean": 0.000556720558051893, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012277981732040644, "epoch": 5.261224489795918, "grad_norm": 0.15086063742637634, "learning_rate": 1e-06, "loss": -0.0566, "step": 548 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.07421875, "completions/max_length": 4096.0, "completions/max_terminated_length": 3778.0, "completions/mean_length": 844.4623413085938, "completions/mean_terminated_length": 583.79052734375, "completions/min_length": 125.0, "completions/min_terminated_length": 125.0, "epoch": 5.270553935860058, "grad_norm": 0.16636420786380768, "learning_rate": 1e-06, "loss": -0.031, "num_tokens": 326531737.0, "reward": 0.6233259439468384, "reward_std": 0.15480685234069824, "rewards/simpleverify_reward/mean": 0.6233258843421936, "rewards/simpleverify_reward/std": 0.48461970686912537, "step": 549 }, { "clip_ratio/high_max": 0.0018381755253358278, "clip_ratio/high_mean": 0.0006138017488410696, "clip_ratio/low_mean": 0.00043640605372274877, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010502077948331134, "epoch": 5.279883381924198, "grad_norm": 0.15271244943141937, "learning_rate": 1e-06, "loss": -0.0078, "step": 550 }, { "clip_ratio/high_max": 0.0014226701568986755, "clip_ratio/high_mean": 0.0005932845197094139, "clip_ratio/low_mean": 0.0005550599953494384, "clip_ratio/low_min": 1.258558222616557e-05, "clip_ratio/region_mean": 0.0011483445141493576, "epoch": 5.289212827988338, "grad_norm": 0.1884109377861023, "learning_rate": 1e-06, "loss": -0.0105, "step": 551 }, { "clip_ratio/high_max": 0.002118112995958654, "clip_ratio/high_mean": 0.000746199435525341, "clip_ratio/low_mean": 0.0004646158904506592, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012108152877772227, "epoch": 5.298542274052478, "grad_norm": 0.1807921677827835, "learning_rate": 1e-06, "loss": -0.0501, "step": 552 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0705915178571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 4030.0, "completions/mean_length": 832.4637451171875, "completions/mean_terminated_length": 584.5878295898438, "completions/min_length": 132.0, "completions/min_terminated_length": 132.0, "epoch": 5.307871720116618, "grad_norm": 0.18503239750862122, "learning_rate": 1e-06, "loss": -0.0162, "num_tokens": 328825207.0, "reward": 0.6208147406578064, "reward_std": 0.14395149052143097, "rewards/simpleverify_reward/mean": 0.6208147406578064, "rewards/simpleverify_reward/std": 0.4852519929409027, "step": 553 }, { "clip_ratio/high_max": 0.001781543393008178, "clip_ratio/high_mean": 0.0006201458709256258, "clip_ratio/low_mean": 0.0004408057166074286, "clip_ratio/low_min": 1.7420425137970597e-05, "clip_ratio/region_mean": 0.0010609515720716445, "epoch": 5.317201166180758, "grad_norm": 0.14983299374580383, "learning_rate": 1e-06, "loss": -0.0363, "step": 554 }, { "clip_ratio/high_max": 0.0015027187982923351, "clip_ratio/high_mean": 0.0006011237092025112, "clip_ratio/low_mean": 0.0005386862349041621, "clip_ratio/low_min": 1.4820962860540021e-05, "clip_ratio/region_mean": 0.0011398099304642528, "epoch": 5.326530612244898, "grad_norm": 0.18694572150707245, "learning_rate": 1e-06, "loss": -0.003, "step": 555 }, { "clip_ratio/high_max": 0.0016934028535615653, "clip_ratio/high_mean": 0.0006510670154966647, "clip_ratio/low_mean": 0.0005705908679374261, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012216579161759, "epoch": 5.335860058309038, "grad_norm": 0.15860022604465485, "learning_rate": 1e-06, "loss": -0.039, "step": 556 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0747767857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 4015.0, "completions/mean_length": 833.1409301757812, "completions/mean_terminated_length": 569.435791015625, "completions/min_length": 122.0, "completions/min_terminated_length": 122.0, "epoch": 5.345189504373177, "grad_norm": 0.19835641980171204, "learning_rate": 1e-06, "loss": -0.0123, "num_tokens": 331083752.0, "reward": 0.6116071939468384, "reward_std": 0.1561620533466339, "rewards/simpleverify_reward/mean": 0.6116071343421936, "rewards/simpleverify_reward/std": 0.4874527156352997, "step": 557 }, { "clip_ratio/high_max": 0.0016342291819455568, "clip_ratio/high_mean": 0.0006149912860564655, "clip_ratio/low_mean": 0.00042929897472276934, "clip_ratio/low_min": 1.4966474736866076e-05, "clip_ratio/region_mean": 0.0010442902621434769, "epoch": 5.354518950437318, "grad_norm": 0.17907220125198364, "learning_rate": 1e-06, "loss": -0.0467, "step": 558 }, { "clip_ratio/high_max": 0.001423776957381051, "clip_ratio/high_mean": 0.0005347745100152679, "clip_ratio/low_mean": 0.0005187666929487023, "clip_ratio/low_min": 1.5096617971721571e-05, "clip_ratio/region_mean": 0.0010535411856835708, "epoch": 5.363848396501457, "grad_norm": 0.20485956966876984, "learning_rate": 1e-06, "loss": 0.0096, "step": 559 }, { "clip_ratio/high_max": 0.0018811190020642243, "clip_ratio/high_mean": 0.0006570361420017434, "clip_ratio/low_mean": 0.0004965717116647284, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011536078473000089, "epoch": 5.373177842565598, "grad_norm": 0.1854930967092514, "learning_rate": 1e-06, "loss": -0.0434, "step": 560 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0789620535714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 4073.0, "completions/mean_length": 862.5728759765625, "completions/mean_terminated_length": 585.365966796875, "completions/min_length": 127.0, "completions/min_terminated_length": 127.0, "epoch": 5.382507288629737, "grad_norm": 0.1711321324110031, "learning_rate": 1e-06, "loss": -0.0692, "num_tokens": 333388829.0, "reward": 0.6448103189468384, "reward_std": 0.14114131033420563, "rewards/simpleverify_reward/mean": 0.6448102593421936, "rewards/simpleverify_reward/std": 0.47863754630088806, "step": 561 }, { "clip_ratio/high_max": 0.0016683209796610754, "clip_ratio/high_mean": 0.0006413352539311745, "clip_ratio/low_mean": 0.0003861095628963085, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010274448122800095, "epoch": 5.391836734693878, "grad_norm": 0.18957296013832092, "learning_rate": 1e-06, "loss": -0.0059, "step": 562 }, { "clip_ratio/high_max": 0.001683412952843355, "clip_ratio/high_mean": 0.0006351347219606396, "clip_ratio/low_mean": 0.0004673253229157126, "clip_ratio/low_min": 1.572722612763755e-05, "clip_ratio/region_mean": 0.001102460049878573, "epoch": 5.401166180758017, "grad_norm": 0.2089579701423645, "learning_rate": 1e-06, "loss": 0.0352, "step": 563 }, { "clip_ratio/high_max": 0.002265447543322807, "clip_ratio/high_mean": 0.0007958085079735611, "clip_ratio/low_mean": 0.0003556897477210441, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011514982434164267, "epoch": 5.410495626822158, "grad_norm": 0.153586745262146, "learning_rate": 1e-06, "loss": -0.0537, "step": 564 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.068359375, "completions/max_length": 4096.0, "completions/max_terminated_length": 4053.0, "completions/mean_length": 815.7098388671875, "completions/mean_terminated_length": 575.0177001953125, "completions/min_length": 133.0, "completions/min_terminated_length": 133.0, "epoch": 5.419825072886297, "grad_norm": 0.16920828819274902, "learning_rate": 1e-06, "loss": -0.0173, "num_tokens": 335645189.0, "reward": 0.6554129719734192, "reward_std": 0.13421785831451416, "rewards/simpleverify_reward/mean": 0.6554129719734192, "rewards/simpleverify_reward/std": 0.47529974579811096, "step": 565 }, { "clip_ratio/high_max": 0.0016000844916561618, "clip_ratio/high_mean": 0.0006011554414726561, "clip_ratio/low_mean": 0.0003890909056281089, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.000990246349829249, "epoch": 5.429154518950437, "grad_norm": 0.1850632131099701, "learning_rate": 1e-06, "loss": -0.0178, "step": 566 }, { "clip_ratio/high_max": 0.0014410002622753382, "clip_ratio/high_mean": 0.0005146989424247295, "clip_ratio/low_mean": 0.00037568848301816615, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.00089038743499259, "epoch": 5.438483965014577, "grad_norm": 0.13413308560848236, "learning_rate": 1e-06, "loss": -0.0278, "step": 567 }, { "clip_ratio/high_max": 0.0016109980697365245, "clip_ratio/high_mean": 0.0006101308254073956, "clip_ratio/low_mean": 0.000399355597437534, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010094864192069508, "epoch": 5.447813411078717, "grad_norm": 0.36132484674453735, "learning_rate": 1e-06, "loss": -0.0141, "step": 568 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0758928571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 4018.0, "completions/mean_length": 844.4088134765625, "completions/mean_terminated_length": 577.369873046875, "completions/min_length": 136.0, "completions/min_terminated_length": 136.0, "epoch": 5.457142857142857, "grad_norm": 0.19413873553276062, "learning_rate": 1e-06, "loss": -0.0493, "num_tokens": 337925398.0, "reward": 0.623604953289032, "reward_std": 0.15734858810901642, "rewards/simpleverify_reward/mean": 0.6236048936843872, "rewards/simpleverify_reward/std": 0.4845485985279083, "step": 569 }, { "clip_ratio/high_max": 0.0019011441800103057, "clip_ratio/high_mean": 0.0007047142717055976, "clip_ratio/low_mean": 0.0005612510203718557, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012659652893489692, "epoch": 5.466472303206997, "grad_norm": 0.17730866372585297, "learning_rate": 1e-06, "loss": -0.0096, "step": 570 }, { "clip_ratio/high_max": 0.001854261354310438, "clip_ratio/high_mean": 0.0007169763266574591, "clip_ratio/low_mean": 0.000560453856451204, "clip_ratio/low_min": 1.3724198652198538e-05, "clip_ratio/region_mean": 0.001277430168556748, "epoch": 5.475801749271137, "grad_norm": 0.21302056312561035, "learning_rate": 1e-06, "loss": -0.0145, "step": 571 }, { "clip_ratio/high_max": 0.0020660388727264944, "clip_ratio/high_mean": 0.0007477424214812345, "clip_ratio/low_mean": 0.0005718158136005513, "clip_ratio/low_min": 4.204546803521225e-05, "clip_ratio/region_mean": 0.0013195582432672381, "epoch": 5.485131195335277, "grad_norm": 0.14659832417964935, "learning_rate": 1e-06, "loss": -0.0476, "step": 572 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0864955357142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 4043.0, "completions/mean_length": 871.3898315429688, "completions/mean_terminated_length": 566.0662841796875, "completions/min_length": 108.0, "completions/min_terminated_length": 108.0, "epoch": 5.494460641399417, "grad_norm": 0.213838130235672, "learning_rate": 1e-06, "loss": -0.0044, "num_tokens": 340141827.0, "reward": 0.6383928656578064, "reward_std": 0.15588076412677765, "rewards/simpleverify_reward/mean": 0.6383928656578064, "rewards/simpleverify_reward/std": 0.48053285479545593, "step": 573 }, { "clip_ratio/high_max": 0.0014853468110231915, "clip_ratio/high_mean": 0.0005599576820713992, "clip_ratio/low_mean": 0.0006037944040144794, "clip_ratio/low_min": 4.248208824719768e-05, "clip_ratio/region_mean": 0.0011637521092779934, "epoch": 5.503790087463557, "grad_norm": 0.15161681175231934, "learning_rate": 1e-06, "loss": -0.0169, "step": 574 }, { "clip_ratio/high_max": 0.0015665026767237578, "clip_ratio/high_mean": 0.0006645263219979824, "clip_ratio/low_mean": 0.00042034307443827856, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010848693855223246, "epoch": 5.513119533527696, "grad_norm": 0.16119639575481415, "learning_rate": 1e-06, "loss": -0.0524, "step": 575 }, { "clip_ratio/high_max": 0.0019932384966523387, "clip_ratio/high_mean": 0.0007560629019280896, "clip_ratio/low_mean": 0.000436156990872405, "clip_ratio/low_min": 1.4602804185415152e-05, "clip_ratio/region_mean": 0.001192219842778286, "epoch": 5.522448979591837, "grad_norm": 0.19391030073165894, "learning_rate": 1e-06, "loss": -0.0391, "step": 576 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.083984375, "completions/max_length": 4096.0, "completions/max_terminated_length": 3902.0, "completions/mean_length": 880.8582763671875, "completions/mean_terminated_length": 586.0797729492188, "completions/min_length": 104.0, "completions/min_terminated_length": 104.0, "epoch": 5.531778425655976, "grad_norm": 0.18205483257770538, "learning_rate": 1e-06, "loss": -0.0317, "num_tokens": 342455095.0, "reward": 0.619698703289032, "reward_std": 0.15065722167491913, "rewards/simpleverify_reward/mean": 0.6196986436843872, "rewards/simpleverify_reward/std": 0.4855285882949829, "step": 577 }, { "clip_ratio/high_max": 0.0016507245472894283, "clip_ratio/high_mean": 0.0006562711068909266, "clip_ratio/low_mean": 0.0005797248468297767, "clip_ratio/low_min": 2.43253762164386e-05, "clip_ratio/region_mean": 0.0012359959619061556, "epoch": 5.541107871720117, "grad_norm": 0.17395898699760437, "learning_rate": 1e-06, "loss": -0.0283, "step": 578 }, { "clip_ratio/high_max": 0.0020349821061245166, "clip_ratio/high_mean": 0.0008121973569359398, "clip_ratio/low_mean": 0.0004218567082716618, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001234054085216485, "epoch": 5.550437317784256, "grad_norm": 0.188312366604805, "learning_rate": 1e-06, "loss": -0.0478, "step": 579 }, { "clip_ratio/high_max": 0.001663971179368673, "clip_ratio/high_mean": 0.0006076005993236322, "clip_ratio/low_mean": 0.0004901666238765756, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010977672318404075, "epoch": 5.559766763848397, "grad_norm": 0.1530812829732895, "learning_rate": 1e-06, "loss": -0.0326, "step": 580 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0770089285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 4030.0, "completions/mean_length": 844.4356079101562, "completions/mean_terminated_length": 573.1442260742188, "completions/min_length": 106.0, "completions/min_terminated_length": 106.0, "epoch": 5.569096209912536, "grad_norm": 0.18244686722755432, "learning_rate": 1e-06, "loss": -0.0616, "num_tokens": 344680544.0, "reward": 0.615792453289032, "reward_std": 0.15627732872962952, "rewards/simpleverify_reward/mean": 0.6157923936843872, "rewards/simpleverify_reward/std": 0.48647522926330566, "step": 581 }, { "clip_ratio/high_max": 0.0015449805232492508, "clip_ratio/high_mean": 0.0005822530583827756, "clip_ratio/low_mean": 0.0004482807917156606, "clip_ratio/low_min": 1.833920214266982e-05, "clip_ratio/region_mean": 0.0010305338491889415, "epoch": 5.578425655976677, "grad_norm": 0.20111502707004547, "learning_rate": 1e-06, "loss": 0.0067, "step": 582 }, { "clip_ratio/high_max": 0.0018104326154571027, "clip_ratio/high_mean": 0.0007156709925766336, "clip_ratio/low_mean": 0.0006186384889588226, "clip_ratio/low_min": 1.369712936138967e-05, "clip_ratio/region_mean": 0.001334309494268382, "epoch": 5.587755102040816, "grad_norm": 0.19932028651237488, "learning_rate": 1e-06, "loss": -0.0335, "step": 583 }, { "clip_ratio/high_max": 0.0019716324386536144, "clip_ratio/high_mean": 0.0007255510445247637, "clip_ratio/low_mean": 0.0005671784983860562, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012927295356348623, "epoch": 5.597084548104956, "grad_norm": 0.18363231420516968, "learning_rate": 1e-06, "loss": -0.027, "step": 584 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.072265625, "completions/max_length": 4096.0, "completions/max_terminated_length": 4061.0, "completions/mean_length": 832.6936645507812, "completions/mean_terminated_length": 578.499267578125, "completions/min_length": 130.0, "completions/min_terminated_length": 130.0, "epoch": 5.606413994169096, "grad_norm": 0.22223405539989471, "learning_rate": 1e-06, "loss": -0.023, "num_tokens": 346954182.0, "reward": 0.6517857313156128, "reward_std": 0.17940276861190796, "rewards/simpleverify_reward/mean": 0.6517857313156128, "rewards/simpleverify_reward/std": 0.47647082805633545, "step": 585 }, { "clip_ratio/high_max": 0.0017914833297254518, "clip_ratio/high_mean": 0.0007438935081154341, "clip_ratio/low_mean": 0.00043349184079488623, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011773853766499087, "epoch": 5.615743440233236, "grad_norm": 0.20177073776721954, "learning_rate": 1e-06, "loss": -0.0118, "step": 586 }, { "clip_ratio/high_max": 0.0020517980738077313, "clip_ratio/high_mean": 0.0008486046390316915, "clip_ratio/low_mean": 0.0005989305991533911, "clip_ratio/low_min": 3.537484099069843e-05, "clip_ratio/region_mean": 0.0014475352363660932, "epoch": 5.625072886297376, "grad_norm": 0.20891094207763672, "learning_rate": 1e-06, "loss": -0.0243, "step": 587 }, { "clip_ratio/high_max": 0.0022704226576024666, "clip_ratio/high_mean": 0.0009260783044737764, "clip_ratio/low_mean": 0.0005142547879586346, "clip_ratio/low_min": 1.6013322237995453e-05, "clip_ratio/region_mean": 0.0014403331078938209, "epoch": 5.634402332361516, "grad_norm": 0.20481827855110168, "learning_rate": 1e-06, "loss": -0.0486, "step": 588 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0655691964285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 4045.0, "completions/mean_length": 806.849365234375, "completions/mean_terminated_length": 576.0489501953125, "completions/min_length": 130.0, "completions/min_terminated_length": 130.0, "epoch": 5.643731778425656, "grad_norm": 0.17292119562625885, "learning_rate": 1e-06, "loss": -0.0426, "num_tokens": 349233274.0, "reward": 0.6609933376312256, "reward_std": 0.16518807411193848, "rewards/simpleverify_reward/mean": 0.6609932780265808, "rewards/simpleverify_reward/std": 0.4734381437301636, "step": 589 }, { "clip_ratio/high_max": 0.002037296009802958, "clip_ratio/high_mean": 0.0007623148740094621, "clip_ratio/low_mean": 0.0004606584352586651, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012229733292770106, "epoch": 5.653061224489796, "grad_norm": 0.17287051677703857, "learning_rate": 1e-06, "loss": -0.0697, "step": 590 }, { "clip_ratio/high_max": 0.002014611363847507, "clip_ratio/high_mean": 0.0007831189377611736, "clip_ratio/low_mean": 0.0005189218591112876, "clip_ratio/low_min": 1.9531249563442543e-05, "clip_ratio/region_mean": 0.0013020407968724612, "epoch": 5.662390670553936, "grad_norm": 0.4022330939769745, "learning_rate": 1e-06, "loss": -0.0234, "step": 591 }, { "clip_ratio/high_max": 0.0016244555554294493, "clip_ratio/high_mean": 0.0006879297070554458, "clip_ratio/low_mean": 0.0005500339875652571, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001237963697349187, "epoch": 5.671720116618076, "grad_norm": 0.19127298891544342, "learning_rate": 1e-06, "loss": -0.0218, "step": 592 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0719866071428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 4088.0, "completions/mean_length": 814.4601440429688, "completions/mean_terminated_length": 559.9088745117188, "completions/min_length": 106.0, "completions/min_terminated_length": 106.0, "epoch": 5.681049562682215, "grad_norm": 0.19455616176128387, "learning_rate": 1e-06, "loss": -0.0214, "num_tokens": 351465867.0, "reward": 0.65625, "reward_std": 0.16461783647537231, "rewards/simpleverify_reward/mean": 0.65625, "rewards/simpleverify_reward/std": 0.47502514719963074, "step": 593 }, { "clip_ratio/high_max": 0.0019496347740641795, "clip_ratio/high_mean": 0.0007430120058415923, "clip_ratio/low_mean": 0.000535668328666361, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012786802981281653, "epoch": 5.690379008746356, "grad_norm": 0.1858552247285843, "learning_rate": 1e-06, "loss": -0.0143, "step": 594 }, { "clip_ratio/high_max": 0.002082385999528924, "clip_ratio/high_mean": 0.0008012666467038798, "clip_ratio/low_mean": 0.0004942620180372614, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012955286547366995, "epoch": 5.699708454810495, "grad_norm": 0.17814786732196808, "learning_rate": 1e-06, "loss": -0.0645, "step": 595 }, { "clip_ratio/high_max": 0.0018528501896071248, "clip_ratio/high_mean": 0.0007449557724612532, "clip_ratio/low_mean": 0.0005658427626258344, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013107984959788155, "epoch": 5.709037900874636, "grad_norm": 0.19466310739517212, "learning_rate": 1e-06, "loss": -0.0319, "step": 596 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0806361607142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3806.0, "completions/mean_length": 849.6428833007812, "completions/mean_terminated_length": 564.9093017578125, "completions/min_length": 114.0, "completions/min_terminated_length": 114.0, "epoch": 5.718367346938775, "grad_norm": 0.18718212842941284, "learning_rate": 1e-06, "loss": -0.0096, "num_tokens": 353698779.0, "reward": 0.631417453289032, "reward_std": 0.1659332811832428, "rewards/simpleverify_reward/mean": 0.6314173936843872, "rewards/simpleverify_reward/std": 0.4824877381324768, "step": 597 }, { "clip_ratio/high_max": 0.0021189050130487885, "clip_ratio/high_mean": 0.0006776575555704767, "clip_ratio/low_mean": 0.0005482922842929838, "clip_ratio/low_min": 1.3340448276721872e-05, "clip_ratio/region_mean": 0.00122594981075963, "epoch": 5.727696793002916, "grad_norm": 0.18694739043712616, "learning_rate": 1e-06, "loss": -0.0023, "step": 598 }, { "clip_ratio/high_max": 0.002289648116857279, "clip_ratio/high_mean": 0.000821371568235918, "clip_ratio/low_mean": 0.0005259198433122947, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013472914142766967, "epoch": 5.737026239067055, "grad_norm": 0.339325875043869, "learning_rate": 1e-06, "loss": -0.0497, "step": 599 }, { "clip_ratio/high_max": 0.0020793250514543615, "clip_ratio/high_mean": 0.0008713081406312995, "clip_ratio/low_mean": 0.00048785687067720573, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013591649840236641, "epoch": 5.746355685131196, "grad_norm": 0.20651331543922424, "learning_rate": 1e-06, "loss": -0.0795, "step": 600 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0728236607142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3960.0, "completions/mean_length": 826.8889770507812, "completions/mean_terminated_length": 570.12158203125, "completions/min_length": 113.0, "completions/min_terminated_length": 113.0, "epoch": 5.755685131195335, "grad_norm": 0.2097044289112091, "learning_rate": 1e-06, "loss": -0.0132, "num_tokens": 355946253.0, "reward": 0.6515067219734192, "reward_std": 0.15474586188793182, "rewards/simpleverify_reward/mean": 0.6515067219734192, "rewards/simpleverify_reward/std": 0.4765596091747284, "step": 601 }, { "clip_ratio/high_max": 0.0016629277270112652, "clip_ratio/high_mean": 0.0006993716233409941, "clip_ratio/low_mean": 0.00039496780755143845, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010943394227069803, "epoch": 5.765014577259475, "grad_norm": 0.19422578811645508, "learning_rate": 1e-06, "loss": -0.0334, "step": 602 }, { "clip_ratio/high_max": 0.0017504096795164514, "clip_ratio/high_mean": 0.0006965141637920169, "clip_ratio/low_mean": 0.00047480702596658375, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011713211788446642, "epoch": 5.774344023323615, "grad_norm": 0.20042185485363007, "learning_rate": 1e-06, "loss": -0.0144, "step": 603 }, { "clip_ratio/high_max": 0.0018145676476706285, "clip_ratio/high_mean": 0.0007647844122402603, "clip_ratio/low_mean": 0.0005150871284058667, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012798714997188654, "epoch": 5.783673469387755, "grad_norm": 0.19204364717006683, "learning_rate": 1e-06, "loss": -0.0355, "step": 604 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.06640625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3962.0, "completions/mean_length": 801.7449951171875, "completions/mean_terminated_length": 567.4255981445312, "completions/min_length": 126.0, "completions/min_terminated_length": 126.0, "epoch": 5.793002915451895, "grad_norm": 0.18423829972743988, "learning_rate": 1e-06, "loss": -0.0445, "num_tokens": 358213427.0, "reward": 0.6593192219734192, "reward_std": 0.14203426241874695, "rewards/simpleverify_reward/mean": 0.6593192219734192, "rewards/simpleverify_reward/std": 0.4740042984485626, "step": 605 }, { "clip_ratio/high_max": 0.0019589028706832323, "clip_ratio/high_mean": 0.000710647696905653, "clip_ratio/low_mean": 0.0004432571436154831, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011539048246049788, "epoch": 5.802332361516035, "grad_norm": 0.17675606906414032, "learning_rate": 1e-06, "loss": -0.0213, "step": 606 }, { "clip_ratio/high_max": 0.001919848316902062, "clip_ratio/high_mean": 0.000605018805799773, "clip_ratio/low_mean": 0.0004781268030455976, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010831455929292133, "epoch": 5.811661807580175, "grad_norm": 0.22321327030658722, "learning_rate": 1e-06, "loss": 0.0114, "step": 607 }, { "clip_ratio/high_max": 0.002080896680126898, "clip_ratio/high_mean": 0.000779173758928664, "clip_ratio/low_mean": 0.00042547861994535197, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012046523588651326, "epoch": 5.820991253644315, "grad_norm": 0.4969524145126343, "learning_rate": 1e-06, "loss": -0.0386, "step": 608 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0658482142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3964.0, "completions/mean_length": 792.935302734375, "completions/mean_terminated_length": 560.1027221679688, "completions/min_length": 128.0, "completions/min_terminated_length": 128.0, "epoch": 5.830320699708455, "grad_norm": 0.18938985466957092, "learning_rate": 1e-06, "loss": -0.0196, "num_tokens": 360444995.0, "reward": 0.65625, "reward_std": 0.14381837844848633, "rewards/simpleverify_reward/mean": 0.65625, "rewards/simpleverify_reward/std": 0.47502514719963074, "step": 609 }, { "clip_ratio/high_max": 0.001524298593722051, "clip_ratio/high_mean": 0.0005985083435007255, "clip_ratio/low_mean": 0.00036379449511514395, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009623028345231432, "epoch": 5.839650145772595, "grad_norm": 0.2879739999771118, "learning_rate": 1e-06, "loss": -0.0478, "step": 610 }, { "clip_ratio/high_max": 0.001523315571830608, "clip_ratio/high_mean": 0.0006729792457917938, "clip_ratio/low_mean": 0.000502141438118997, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011751206548069604, "epoch": 5.848979591836734, "grad_norm": 0.6009973883628845, "learning_rate": 1e-06, "loss": -0.0179, "step": 611 }, { "clip_ratio/high_max": 0.0017355095478706062, "clip_ratio/high_mean": 0.0006042955174052622, "clip_ratio/low_mean": 0.0005082190946268383, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011125146302219946, "epoch": 5.858309037900875, "grad_norm": 0.16360551118850708, "learning_rate": 1e-06, "loss": -0.0306, "step": 612 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0714285714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3960.0, "completions/mean_length": 816.6080322265625, "completions/mean_terminated_length": 564.3470458984375, "completions/min_length": 119.0, "completions/min_terminated_length": 119.0, "epoch": 5.867638483965014, "grad_norm": 0.21044263243675232, "learning_rate": 1e-06, "loss": -0.0136, "num_tokens": 362681286.0, "reward": 0.626953125, "reward_std": 0.15739361941814423, "rewards/simpleverify_reward/mean": 0.626953125, "rewards/simpleverify_reward/std": 0.48368188738822937, "step": 613 }, { "clip_ratio/high_max": 0.0018042911033262499, "clip_ratio/high_mean": 0.0007082291376718786, "clip_ratio/low_mean": 0.0004894588851129811, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011976879941357765, "epoch": 5.876967930029155, "grad_norm": 0.2792727053165436, "learning_rate": 1e-06, "loss": -0.0372, "step": 614 }, { "clip_ratio/high_max": 0.0016625044663669541, "clip_ratio/high_mean": 0.0006735863121321017, "clip_ratio/low_mean": 0.000512330012043094, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011859163350891322, "epoch": 5.886297376093294, "grad_norm": 0.18887755274772644, "learning_rate": 1e-06, "loss": -0.0177, "step": 615 }, { "clip_ratio/high_max": 0.002102853570249863, "clip_ratio/high_mean": 0.0007626020660609356, "clip_ratio/low_mean": 0.0005016071204408945, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012642092078749556, "epoch": 5.895626822157435, "grad_norm": 0.18160508573055267, "learning_rate": 1e-06, "loss": -0.0223, "step": 616 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0747767857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3904.0, "completions/mean_length": 837.1476440429688, "completions/mean_terminated_length": 573.7662963867188, "completions/min_length": 140.0, "completions/min_terminated_length": 140.0, "epoch": 5.904956268221574, "grad_norm": 0.181693896651268, "learning_rate": 1e-06, "loss": -0.037, "num_tokens": 364951495.0, "reward": 0.6356027126312256, "reward_std": 0.16414853930473328, "rewards/simpleverify_reward/mean": 0.6356026530265808, "rewards/simpleverify_reward/std": 0.48132792115211487, "step": 617 }, { "clip_ratio/high_max": 0.0020826181425945833, "clip_ratio/high_mean": 0.0008691009461472277, "clip_ratio/low_mean": 0.00038438939009211026, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012534903398773167, "epoch": 5.914285714285715, "grad_norm": 0.2003132402896881, "learning_rate": 1e-06, "loss": -0.0145, "step": 618 }, { "clip_ratio/high_max": 0.002007332532230066, "clip_ratio/high_mean": 0.0008116581429931102, "clip_ratio/low_mean": 0.0005396271408244502, "clip_ratio/low_min": 2.53336593232234e-05, "clip_ratio/region_mean": 0.0013512853038264439, "epoch": 5.923615160349854, "grad_norm": 0.21043142676353455, "learning_rate": 1e-06, "loss": 0.0069, "step": 619 }, { "clip_ratio/high_max": 0.002099548997648526, "clip_ratio/high_mean": 0.0008514019482390722, "clip_ratio/low_mean": 0.000475009193223741, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013264111512398813, "epoch": 5.932944606413994, "grad_norm": 0.1694907397031784, "learning_rate": 1e-06, "loss": -0.0933, "step": 620 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0795200892857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3580.0, "completions/mean_length": 854.1113891601562, "completions/mean_terminated_length": 574.045166015625, "completions/min_length": 114.0, "completions/min_terminated_length": 114.0, "epoch": 5.942274052478134, "grad_norm": 0.18965250253677368, "learning_rate": 1e-06, "loss": -0.0496, "num_tokens": 367215070.0, "reward": 0.6263951063156128, "reward_std": 0.14611202478408813, "rewards/simpleverify_reward/mean": 0.6263951063156128, "rewards/simpleverify_reward/std": 0.48382803797721863, "step": 621 }, { "clip_ratio/high_max": 0.0015953285910654813, "clip_ratio/high_mean": 0.0005693040584446862, "clip_ratio/low_mean": 0.0006223833215699415, "clip_ratio/low_min": 1.7284291971009225e-05, "clip_ratio/region_mean": 0.0011916873772861436, "epoch": 5.9516034985422746, "grad_norm": 0.21745505928993225, "learning_rate": 1e-06, "loss": -0.0198, "step": 622 }, { "clip_ratio/high_max": 0.0017771577295206953, "clip_ratio/high_mean": 0.000617987519945018, "clip_ratio/low_mean": 0.0004934930348099442, "clip_ratio/low_min": 1.0385509995103348e-05, "clip_ratio/region_mean": 0.001111480567487888, "epoch": 5.960932944606414, "grad_norm": 0.17846538126468658, "learning_rate": 1e-06, "loss": -0.0603, "step": 623 }, { "clip_ratio/high_max": 0.0016141903506650124, "clip_ratio/high_mean": 0.0005768920527771115, "clip_ratio/low_mean": 0.0005270603815006325, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011039524397347122, "epoch": 5.970262390670554, "grad_norm": 0.17976225912570953, "learning_rate": 1e-06, "loss": -0.0052, "step": 624 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0772879464285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3991.0, "completions/mean_length": 838.2081909179688, "completions/mean_terminated_length": 565.3299560546875, "completions/min_length": 117.0, "completions/min_terminated_length": 117.0, "epoch": 6.0093294460641395, "grad_norm": 0.180943563580513, "learning_rate": 1e-06, "loss": -0.0148, "num_tokens": 369459536.0, "reward": 0.638671875, "reward_std": 0.15799503028392792, "rewards/simpleverify_reward/mean": 0.638671875, "rewards/simpleverify_reward/std": 0.48045241832733154, "step": 625 }, { "clip_ratio/high_max": 0.0019111113615508657, "clip_ratio/high_mean": 0.0008241054874815745, "clip_ratio/low_mean": 0.00048130419872904895, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.00130540968166315, "epoch": 6.01865889212828, "grad_norm": 0.16932891309261322, "learning_rate": 1e-06, "loss": -0.066, "step": 626 }, { "clip_ratio/high_max": 0.0017463585645600688, "clip_ratio/high_mean": 0.0006623057433898794, "clip_ratio/low_mean": 0.0005850219404237578, "clip_ratio/low_min": 1.5344954590545967e-05, "clip_ratio/region_mean": 0.0012473276583477855, "epoch": 6.0279883381924195, "grad_norm": 0.18621303141117096, "learning_rate": 1e-06, "loss": -0.0485, "step": 627 }, { "clip_ratio/high_max": 0.0020054941342095844, "clip_ratio/high_mean": 0.000741424207262753, "clip_ratio/low_mean": 0.0005911076168558793, "clip_ratio/low_min": 2.0866985778411618e-05, "clip_ratio/region_mean": 0.0013325318177521694, "epoch": 6.03731778425656, "grad_norm": 0.20130859315395355, "learning_rate": 1e-06, "loss": -0.0288, "step": 628 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0717075892857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3988.0, "completions/mean_length": 837.3390502929688, "completions/mean_terminated_length": 585.6179809570312, "completions/min_length": 111.0, "completions/min_terminated_length": 111.0, "epoch": 6.0466472303206995, "grad_norm": 0.15916641056537628, "learning_rate": 1e-06, "loss": -0.0243, "num_tokens": 371758815.0, "reward": 0.654296875, "reward_std": 0.14336808025836945, "rewards/simpleverify_reward/mean": 0.654296875, "rewards/simpleverify_reward/std": 0.47566333413124084, "step": 629 }, { "clip_ratio/high_max": 0.0019978033742518164, "clip_ratio/high_mean": 0.0008109211048576981, "clip_ratio/low_mean": 0.0003858172276522964, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011967383543378673, "epoch": 6.05597667638484, "grad_norm": 0.27212151885032654, "learning_rate": 1e-06, "loss": -0.0505, "step": 630 }, { "clip_ratio/high_max": 0.0014448578531300882, "clip_ratio/high_mean": 0.0005721313928006566, "clip_ratio/low_mean": 0.00044613399495574413, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010182654041273054, "epoch": 6.0653061224489795, "grad_norm": 0.1675054132938385, "learning_rate": 1e-06, "loss": -0.0183, "step": 631 }, { "clip_ratio/high_max": 0.001640133163164137, "clip_ratio/high_mean": 0.0006688504363410175, "clip_ratio/low_mean": 0.0004371279692350072, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011059784155804664, "epoch": 6.07463556851312, "grad_norm": 0.16391079127788544, "learning_rate": 1e-06, "loss": -0.012, "step": 632 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0772879464285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3875.0, "completions/mean_length": 827.0315551757812, "completions/mean_terminated_length": 553.2171630859375, "completions/min_length": 127.0, "completions/min_terminated_length": 127.0, "epoch": 6.0839650145772595, "grad_norm": 0.2132154405117035, "learning_rate": 1e-06, "loss": -0.0278, "num_tokens": 373955480.0, "reward": 0.6462053656578064, "reward_std": 0.14434394240379333, "rewards/simpleverify_reward/mean": 0.6462053656578064, "rewards/simpleverify_reward/std": 0.4782131314277649, "step": 633 }, { "clip_ratio/high_max": 0.0017695217866275925, "clip_ratio/high_mean": 0.0006087339961595717, "clip_ratio/low_mean": 0.0005346975567590562, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011434315383667126, "epoch": 6.093294460641399, "grad_norm": 0.19545234739780426, "learning_rate": 1e-06, "loss": -0.0232, "step": 634 }, { "clip_ratio/high_max": 0.0018392324855085462, "clip_ratio/high_mean": 0.0006680393216811353, "clip_ratio/low_mean": 0.0005261369410618499, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011941762240894604, "epoch": 6.1026239067055394, "grad_norm": 0.17889843881130219, "learning_rate": 1e-06, "loss": -0.0536, "step": 635 }, { "clip_ratio/high_max": 0.0016560599560762057, "clip_ratio/high_mean": 0.0005449154932648526, "clip_ratio/low_mean": 0.00047547072153975023, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010203862075286452, "epoch": 6.111953352769679, "grad_norm": 0.15422596037387848, "learning_rate": 1e-06, "loss": -0.0378, "step": 636 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0758928571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3999.0, "completions/mean_length": 815.4894409179688, "completions/mean_terminated_length": 546.0755004882812, "completions/min_length": 150.0, "completions/min_terminated_length": 150.0, "epoch": 6.121282798833819, "grad_norm": 0.22009728848934174, "learning_rate": 1e-06, "loss": 0.0004, "num_tokens": 376102266.0, "reward": 0.6707589626312256, "reward_std": 0.15314660966396332, "rewards/simpleverify_reward/mean": 0.6707589030265808, "rewards/simpleverify_reward/std": 0.4700031876564026, "step": 637 }, { "clip_ratio/high_max": 0.0019018017483176664, "clip_ratio/high_mean": 0.0007251497627294157, "clip_ratio/low_mean": 0.00038743803179386305, "clip_ratio/low_min": 2.8538812330225483e-05, "clip_ratio/region_mean": 0.0011125878227176145, "epoch": 6.130612244897959, "grad_norm": 0.2109302431344986, "learning_rate": 1e-06, "loss": -0.0476, "step": 638 }, { "clip_ratio/high_max": 0.002016391055803979, "clip_ratio/high_mean": 0.0007509664374083513, "clip_ratio/low_mean": 0.000596598689298844, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013475651103362907, "epoch": 6.139941690962099, "grad_norm": 0.23006491363048553, "learning_rate": 1e-06, "loss": -0.0179, "step": 639 }, { "clip_ratio/high_max": 0.0021102485770825297, "clip_ratio/high_mean": 0.0007890428987593623, "clip_ratio/low_mean": 0.00045815737303200876, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.00124720025996794, "epoch": 6.149271137026239, "grad_norm": 11.751675605773926, "learning_rate": 1e-06, "loss": -0.0556, "step": 640 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0756138392857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3730.0, "completions/mean_length": 843.1788940429688, "completions/mean_terminated_length": 577.1014404296875, "completions/min_length": 111.0, "completions/min_terminated_length": 111.0, "epoch": 6.158600583090379, "grad_norm": 0.20077800750732422, "learning_rate": 1e-06, "loss": 0.0059, "num_tokens": 378371027.0, "reward": 0.645089328289032, "reward_std": 0.1514761745929718, "rewards/simpleverify_reward/mean": 0.6450892686843872, "rewards/simpleverify_reward/std": 0.47855302691459656, "step": 641 }, { "clip_ratio/high_max": 0.0018093725557264406, "clip_ratio/high_mean": 0.0006465406604547752, "clip_ratio/low_mean": 0.0004882820039711078, "clip_ratio/low_min": 1.3895064512325916e-05, "clip_ratio/region_mean": 0.0011348226835252717, "epoch": 6.167930029154519, "grad_norm": 0.24738873541355133, "learning_rate": 1e-06, "loss": -0.0075, "step": 642 }, { "clip_ratio/high_max": 0.0019450786094239447, "clip_ratio/high_mean": 0.0006936511381354649, "clip_ratio/low_mean": 0.0005324054491211427, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012260565636097454, "epoch": 6.1772594752186585, "grad_norm": 0.20359836518764496, "learning_rate": 1e-06, "loss": -0.0117, "step": 643 }, { "clip_ratio/high_max": 0.002001240172830876, "clip_ratio/high_mean": 0.0007106064185791183, "clip_ratio/low_mean": 0.00044160690026728844, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011522133318067063, "epoch": 6.186588921282799, "grad_norm": 0.16259709000587463, "learning_rate": 1e-06, "loss": -0.071, "step": 644 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0739397321428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3807.0, "completions/mean_length": 821.9088134765625, "completions/mean_terminated_length": 560.4944458007812, "completions/min_length": 111.0, "completions/min_terminated_length": 111.0, "epoch": 6.1959183673469385, "grad_norm": 0.21361172199249268, "learning_rate": 1e-06, "loss": -0.0276, "num_tokens": 380583108.0, "reward": 0.6417410969734192, "reward_std": 0.1375633329153061, "rewards/simpleverify_reward/mean": 0.6417410969734192, "rewards/simpleverify_reward/std": 0.4795556962490082, "step": 645 }, { "clip_ratio/high_max": 0.0017254963640880305, "clip_ratio/high_mean": 0.0006217411664692918, "clip_ratio/low_mean": 0.0004720675610769831, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010938087434624322, "epoch": 6.205247813411079, "grad_norm": 0.19000041484832764, "learning_rate": 1e-06, "loss": -0.0106, "step": 646 }, { "clip_ratio/high_max": 0.001678241416811943, "clip_ratio/high_mean": 0.0006433825528802117, "clip_ratio/low_mean": 0.0004639452008632361, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011073277528339531, "epoch": 6.214577259475218, "grad_norm": 0.2536756694316864, "learning_rate": 1e-06, "loss": -0.0291, "step": 647 }, { "clip_ratio/high_max": 0.0014916392210579943, "clip_ratio/high_mean": 0.000654184437735239, "clip_ratio/low_mean": 0.0004817107310373103, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001135895126935793, "epoch": 6.223906705539359, "grad_norm": 0.1747714728116989, "learning_rate": 1e-06, "loss": -0.0387, "step": 648 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0803571428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 4019.0, "completions/mean_length": 864.1604614257812, "completions/mean_terminated_length": 581.7666625976562, "completions/min_length": 147.0, "completions/min_terminated_length": 147.0, "epoch": 6.233236151603498, "grad_norm": 0.2180013656616211, "learning_rate": 1e-06, "loss": -0.0234, "num_tokens": 382860099.0, "reward": 0.6336495876312256, "reward_std": 0.14166535437107086, "rewards/simpleverify_reward/mean": 0.6336495280265808, "rewards/simpleverify_reward/std": 0.4818740487098694, "step": 649 }, { "clip_ratio/high_max": 0.0016542827852390474, "clip_ratio/high_mean": 0.0006372446687237243, "clip_ratio/low_mean": 0.00040699134524402325, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010442360380693572, "epoch": 6.242565597667639, "grad_norm": 0.20060643553733826, "learning_rate": 1e-06, "loss": -0.0652, "step": 650 }, { "clip_ratio/high_max": 0.0018010023013630416, "clip_ratio/high_mean": 0.0006616992995986948, "clip_ratio/low_mean": 0.0003899089779224596, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010516082656977233, "epoch": 6.251895043731778, "grad_norm": 0.15634788572788239, "learning_rate": 1e-06, "loss": -0.056, "step": 651 }, { "clip_ratio/high_max": 0.0017703866797091905, "clip_ratio/high_mean": 0.0006805026278016157, "clip_ratio/low_mean": 0.0005054153243690962, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001185917943075765, "epoch": 6.261224489795918, "grad_norm": 0.16988395154476166, "learning_rate": 1e-06, "loss": 0.0036, "step": 652 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.072265625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3522.0, "completions/mean_length": 824.581787109375, "completions/mean_terminated_length": 569.7554931640625, "completions/min_length": 142.0, "completions/min_terminated_length": 142.0, "epoch": 6.270553935860058, "grad_norm": 0.19914013147354126, "learning_rate": 1e-06, "loss": -0.0447, "num_tokens": 385124448.0, "reward": 0.662109375, "reward_std": 0.14276023209095, "rewards/simpleverify_reward/mean": 0.662109375, "rewards/simpleverify_reward/std": 0.4730570614337921, "step": 653 }, { "clip_ratio/high_max": 0.0015155960099946242, "clip_ratio/high_mean": 0.0006415695352188777, "clip_ratio/low_mean": 0.0004885129683316336, "clip_ratio/low_min": 1.5656312825740315e-05, "clip_ratio/region_mean": 0.0011300824917270802, "epoch": 6.279883381924198, "grad_norm": 0.15262603759765625, "learning_rate": 1e-06, "loss": -0.0402, "step": 654 }, { "clip_ratio/high_max": 0.001792514984117588, "clip_ratio/high_mean": 0.0006467905004683416, "clip_ratio/low_mean": 0.0004672713571380882, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011140618335048202, "epoch": 6.289212827988338, "grad_norm": 0.24154537916183472, "learning_rate": 1e-06, "loss": 0.0038, "step": 655 }, { "clip_ratio/high_max": 0.001680771223618649, "clip_ratio/high_mean": 0.0006721687150275102, "clip_ratio/low_mean": 0.0005189267676541931, "clip_ratio/low_min": 1.0798203220474534e-05, "clip_ratio/region_mean": 0.0011910954854101874, "epoch": 6.298542274052478, "grad_norm": 0.19271902740001678, "learning_rate": 1e-06, "loss": -0.0202, "step": 656 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.083984375, "completions/max_length": 4096.0, "completions/max_terminated_length": 3996.0, "completions/mean_length": 868.6490478515625, "completions/mean_terminated_length": 572.7510986328125, "completions/min_length": 128.0, "completions/min_terminated_length": 128.0, "epoch": 6.307871720116618, "grad_norm": 0.19102539122104645, "learning_rate": 1e-06, "loss": -0.0491, "num_tokens": 387362046.0, "reward": 0.6467634439468384, "reward_std": 0.14754925668239594, "rewards/simpleverify_reward/mean": 0.6467633843421936, "rewards/simpleverify_reward/std": 0.4780421257019043, "step": 657 }, { "clip_ratio/high_max": 0.0015077910975378472, "clip_ratio/high_mean": 0.0005448483316286001, "clip_ratio/low_mean": 0.0004979889699825435, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010428373207105324, "epoch": 6.317201166180758, "grad_norm": 0.2001597285270691, "learning_rate": 1e-06, "loss": -0.0121, "step": 658 }, { "clip_ratio/high_max": 0.0016795138581073843, "clip_ratio/high_mean": 0.0006460722761403304, "clip_ratio/low_mean": 0.0005056716308899922, "clip_ratio/low_min": 1.539029835839756e-05, "clip_ratio/region_mean": 0.0011517439015733544, "epoch": 6.326530612244898, "grad_norm": 0.16916713118553162, "learning_rate": 1e-06, "loss": -0.0171, "step": 659 }, { "clip_ratio/high_max": 0.0015090197484823875, "clip_ratio/high_mean": 0.0006242363915589522, "clip_ratio/low_mean": 0.0004678622508436092, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010920986169367097, "epoch": 6.335860058309038, "grad_norm": 0.15516625344753265, "learning_rate": 1e-06, "loss": -0.0337, "step": 660 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.080078125, "completions/max_length": 4096.0, "completions/max_terminated_length": 4058.0, "completions/mean_length": 871.0572509765625, "completions/mean_terminated_length": 590.3297119140625, "completions/min_length": 118.0, "completions/min_terminated_length": 118.0, "epoch": 6.345189504373177, "grad_norm": 0.2225402593612671, "learning_rate": 1e-06, "loss": -0.0407, "num_tokens": 389668867.0, "reward": 0.647042453289032, "reward_std": 0.15738600492477417, "rewards/simpleverify_reward/mean": 0.6470423936843872, "rewards/simpleverify_reward/std": 0.4779563546180725, "step": 661 }, { "clip_ratio/high_max": 0.0018748947841231711, "clip_ratio/high_mean": 0.0007284926250576973, "clip_ratio/low_mean": 0.0004653870773836388, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011938796960748732, "epoch": 6.354518950437318, "grad_norm": 0.1916041523218155, "learning_rate": 1e-06, "loss": -0.0333, "step": 662 }, { "clip_ratio/high_max": 0.0016897407767828554, "clip_ratio/high_mean": 0.0006873527181596728, "clip_ratio/low_mean": 0.0005372693540266482, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001224622090376215, "epoch": 6.363848396501457, "grad_norm": 0.21988339722156525, "learning_rate": 1e-06, "loss": -0.0406, "step": 663 }, { "clip_ratio/high_max": 0.001975298931938596, "clip_ratio/high_mean": 0.0007808620448486181, "clip_ratio/low_mean": 0.00044298864258962567, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012238506969879381, "epoch": 6.373177842565598, "grad_norm": 0.1673637181520462, "learning_rate": 1e-06, "loss": -0.0358, "step": 664 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.080078125, "completions/max_length": 4096.0, "completions/max_terminated_length": 3934.0, "completions/mean_length": 844.1975708007812, "completions/mean_terminated_length": 561.1319580078125, "completions/min_length": 128.0, "completions/min_terminated_length": 128.0, "epoch": 6.382507288629737, "grad_norm": 0.24774032831192017, "learning_rate": 1e-06, "loss": -0.0244, "num_tokens": 391880991.0, "reward": 0.6637834906578064, "reward_std": 0.14746694266796112, "rewards/simpleverify_reward/mean": 0.6637834906578064, "rewards/simpleverify_reward/std": 0.4724798798561096, "step": 665 }, { "clip_ratio/high_max": 0.0018417991850583348, "clip_ratio/high_mean": 0.0006980487560213078, "clip_ratio/low_mean": 0.000498709401654196, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011967581704084296, "epoch": 6.391836734693878, "grad_norm": 0.22237743437290192, "learning_rate": 1e-06, "loss": -0.025, "step": 666 }, { "clip_ratio/high_max": 0.0018021965988737065, "clip_ratio/high_mean": 0.0006544726657011779, "clip_ratio/low_mean": 0.0004853781247220468, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011398507976991823, "epoch": 6.401166180758017, "grad_norm": 0.21763181686401367, "learning_rate": 1e-06, "loss": -0.0342, "step": 667 }, { "clip_ratio/high_max": 0.00155338420154294, "clip_ratio/high_mean": 0.0006823031617386732, "clip_ratio/low_mean": 0.00042829306175917736, "clip_ratio/low_min": 1.4019739865034353e-05, "clip_ratio/region_mean": 0.0011105962039437145, "epoch": 6.410495626822158, "grad_norm": 0.20646756887435913, "learning_rate": 1e-06, "loss": -0.0342, "step": 668 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0817522321428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3954.0, "completions/mean_length": 877.3275756835938, "completions/mean_terminated_length": 590.7669067382812, "completions/min_length": 116.0, "completions/min_terminated_length": 116.0, "epoch": 6.419825072886297, "grad_norm": 0.2248879224061966, "learning_rate": 1e-06, "loss": -0.0232, "num_tokens": 394172885.0, "reward": 0.6456473469734192, "reward_std": 0.15996688604354858, "rewards/simpleverify_reward/mean": 0.6456473469734192, "rewards/simpleverify_reward/std": 0.4783834218978882, "step": 669 }, { "clip_ratio/high_max": 0.0016958152918959968, "clip_ratio/high_mean": 0.0005900728738197358, "clip_ratio/low_mean": 0.0004904292691207957, "clip_ratio/low_min": 1.5707464626757428e-05, "clip_ratio/region_mean": 0.001080502122931648, "epoch": 6.429154518950437, "grad_norm": 0.1845603585243225, "learning_rate": 1e-06, "loss": -0.0064, "step": 670 }, { "clip_ratio/high_max": 0.0019418751289776992, "clip_ratio/high_mean": 0.0007124814346752828, "clip_ratio/low_mean": 0.000418040090607974, "clip_ratio/low_min": 3.099082641710993e-05, "clip_ratio/region_mean": 0.0011305215375614353, "epoch": 6.438483965014577, "grad_norm": 0.19204656779766083, "learning_rate": 1e-06, "loss": -0.0496, "step": 671 }, { "clip_ratio/high_max": 0.0022493279684567824, "clip_ratio/high_mean": 0.0007983937757671811, "clip_ratio/low_mean": 0.0004772635948029347, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012756573596561793, "epoch": 6.447813411078717, "grad_norm": 0.18901929259300232, "learning_rate": 1e-06, "loss": -0.0292, "step": 672 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0694754464285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3932.0, "completions/mean_length": 810.593505859375, "completions/mean_terminated_length": 565.2962646484375, "completions/min_length": 106.0, "completions/min_terminated_length": 106.0, "epoch": 6.457142857142857, "grad_norm": 0.21581843495368958, "learning_rate": 1e-06, "loss": -0.0015, "num_tokens": 396410868.0, "reward": 0.6657366156578064, "reward_std": 0.14741115272045135, "rewards/simpleverify_reward/mean": 0.6657366156578064, "rewards/simpleverify_reward/std": 0.4717981517314911, "step": 673 }, { "clip_ratio/high_max": 0.0019230346661061049, "clip_ratio/high_mean": 0.0007148980312194908, "clip_ratio/low_mean": 0.0005448466426969389, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012597446620929986, "epoch": 6.466472303206997, "grad_norm": 0.1966102570295334, "learning_rate": 1e-06, "loss": 0.0217, "step": 674 }, { "clip_ratio/high_max": 0.002173857061279705, "clip_ratio/high_mean": 0.0007900414493633434, "clip_ratio/low_mean": 0.0003965330542996526, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011865745254908688, "epoch": 6.475801749271137, "grad_norm": 0.1834590882062912, "learning_rate": 1e-06, "loss": -0.0465, "step": 675 }, { "clip_ratio/high_max": 0.001623847634618869, "clip_ratio/high_mean": 0.0007225248191389255, "clip_ratio/low_mean": 0.000422624663769966, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011451495047367644, "epoch": 6.485131195335277, "grad_norm": 0.17319194972515106, "learning_rate": 1e-06, "loss": -0.0283, "step": 676 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0982142857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 4054.0, "completions/mean_length": 914.7176513671875, "completions/mean_terminated_length": 568.2413330078125, "completions/min_length": 127.0, "completions/min_terminated_length": 127.0, "epoch": 6.494460641399417, "grad_norm": 0.19094020128250122, "learning_rate": 1e-06, "loss": -0.0105, "num_tokens": 398624800.0, "reward": 0.630859375, "reward_std": 0.14885348081588745, "rewards/simpleverify_reward/mean": 0.630859375, "rewards/simpleverify_reward/std": 0.4826394319534302, "step": 677 }, { "clip_ratio/high_max": 0.001881691554444842, "clip_ratio/high_mean": 0.0007484051020583138, "clip_ratio/low_mean": 0.00044781656924897106, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011962216449319385, "epoch": 6.503790087463557, "grad_norm": 0.1850719302892685, "learning_rate": 1e-06, "loss": -0.0595, "step": 678 }, { "clip_ratio/high_max": 0.0018747696085483767, "clip_ratio/high_mean": 0.0006893331283208681, "clip_ratio/low_mean": 0.0004583078116411343, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011476409163151402, "epoch": 6.513119533527696, "grad_norm": 0.1884315460920334, "learning_rate": 1e-06, "loss": -0.0401, "step": 679 }, { "clip_ratio/high_max": 0.0019821650384983514, "clip_ratio/high_mean": 0.0007736466868664138, "clip_ratio/low_mean": 0.000367193008059985, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011408396967453882, "epoch": 6.522448979591837, "grad_norm": 0.17585389316082, "learning_rate": 1e-06, "loss": -0.051, "step": 680 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0767299107142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 4021.0, "completions/mean_length": 823.3345947265625, "completions/mean_terminated_length": 551.3541870117188, "completions/min_length": 109.0, "completions/min_terminated_length": 109.0, "epoch": 6.531778425655976, "grad_norm": 0.17501617968082428, "learning_rate": 1e-06, "loss": -0.0777, "num_tokens": 400788583.0, "reward": 0.674386203289032, "reward_std": 0.12206340581178665, "rewards/simpleverify_reward/mean": 0.6743861436843872, "rewards/simpleverify_reward/std": 0.4686691164970398, "step": 681 }, { "clip_ratio/high_max": 0.0020343566575320438, "clip_ratio/high_mean": 0.0007003311129665235, "clip_ratio/low_mean": 0.00029494082605197036, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009952719174179947, "epoch": 6.541107871720117, "grad_norm": 0.17865709960460663, "learning_rate": 1e-06, "loss": -0.0622, "step": 682 }, { "clip_ratio/high_max": 0.0013081465112918522, "clip_ratio/high_mean": 0.00044355563704812084, "clip_ratio/low_mean": 0.00037754168261017185, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008210972937376937, "epoch": 6.550437317784256, "grad_norm": 0.14851190149784088, "learning_rate": 1e-06, "loss": -0.0008, "step": 683 }, { "clip_ratio/high_max": 0.001638154066313291, "clip_ratio/high_mean": 0.0005351824356694124, "clip_ratio/low_mean": 0.0004173257893853588, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009525082350592129, "epoch": 6.559766763848397, "grad_norm": 0.1799805611371994, "learning_rate": 1e-06, "loss": 0.0121, "step": 684 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0892857142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 4051.0, "completions/mean_length": 900.0469360351562, "completions/mean_terminated_length": 586.7181396484375, "completions/min_length": 80.0, "completions/min_terminated_length": 80.0, "epoch": 6.569096209912536, "grad_norm": 0.2008001208305359, "learning_rate": 1e-06, "loss": -0.0588, "num_tokens": 403039743.0, "reward": 0.6442522406578064, "reward_std": 0.14997920393943787, "rewards/simpleverify_reward/mean": 0.6442522406578064, "rewards/simpleverify_reward/std": 0.4788060784339905, "step": 685 }, { "clip_ratio/high_max": 0.00179415347520262, "clip_ratio/high_mean": 0.0006383218733390095, "clip_ratio/low_mean": 0.000445042412593466, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010833643063961063, "epoch": 6.578425655976677, "grad_norm": 0.2055046558380127, "learning_rate": 1e-06, "loss": -0.0438, "step": 686 }, { "clip_ratio/high_max": 0.0018635420128703117, "clip_ratio/high_mean": 0.0006467426937888376, "clip_ratio/low_mean": 0.0004333778133513988, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010801204916788265, "epoch": 6.587755102040816, "grad_norm": 0.19540689885616302, "learning_rate": 1e-06, "loss": 0.0094, "step": 687 }, { "clip_ratio/high_max": 0.001631882867513923, "clip_ratio/high_mean": 0.0006623790777666727, "clip_ratio/low_mean": 0.00039637817098991945, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.00105875722510973, "epoch": 6.597084548104956, "grad_norm": 0.15322351455688477, "learning_rate": 1e-06, "loss": -0.0606, "step": 688 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.08203125, "completions/max_length": 4096.0, "completions/max_terminated_length": 3702.0, "completions/mean_length": 837.6593627929688, "completions/mean_terminated_length": 546.4884033203125, "completions/min_length": 96.0, "completions/min_terminated_length": 96.0, "epoch": 6.606413994169096, "grad_norm": 0.2096838653087616, "learning_rate": 1e-06, "loss": -0.0309, "num_tokens": 405208298.0, "reward": 0.6492745876312256, "reward_std": 0.15717659890651703, "rewards/simpleverify_reward/mean": 0.6492745280265808, "rewards/simpleverify_reward/std": 0.47726374864578247, "step": 689 }, { "clip_ratio/high_max": 0.002003064117161557, "clip_ratio/high_mean": 0.0007813471165718511, "clip_ratio/low_mean": 0.0004949123267579125, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001276259408768965, "epoch": 6.615743440233236, "grad_norm": 0.18902616202831268, "learning_rate": 1e-06, "loss": -0.0589, "step": 690 }, { "clip_ratio/high_max": 0.0016377372958231717, "clip_ratio/high_mean": 0.0006486062820840743, "clip_ratio/low_mean": 0.000528112855135987, "clip_ratio/low_min": 4.5247938942338806e-05, "clip_ratio/region_mean": 0.0011767191390390508, "epoch": 6.625072886297376, "grad_norm": 0.2456071674823761, "learning_rate": 1e-06, "loss": -0.0143, "step": 691 }, { "clip_ratio/high_max": 0.0021963772669550963, "clip_ratio/high_mean": 0.0008106098975986242, "clip_ratio/low_mean": 0.0006367460082401522, "clip_ratio/low_min": 2.705199585761875e-05, "clip_ratio/region_mean": 0.001447355913114734, "epoch": 6.634402332361516, "grad_norm": 0.20757293701171875, "learning_rate": 1e-06, "loss": -0.0273, "step": 692 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0923549107142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3937.0, "completions/mean_length": 887.820068359375, "completions/mean_terminated_length": 561.3805541992188, "completions/min_length": 137.0, "completions/min_terminated_length": 137.0, "epoch": 6.643731778425656, "grad_norm": 0.20216889679431915, "learning_rate": 1e-06, "loss": -0.0083, "num_tokens": 407404821.0, "reward": 0.6233259439468384, "reward_std": 0.13665281236171722, "rewards/simpleverify_reward/mean": 0.6233258843421936, "rewards/simpleverify_reward/std": 0.484619677066803, "step": 693 }, { "clip_ratio/high_max": 0.0018329286722291727, "clip_ratio/high_mean": 0.000612566343988874, "clip_ratio/low_mean": 0.00036259853732190095, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009751648467499763, "epoch": 6.653061224489796, "grad_norm": 0.219848170876503, "learning_rate": 1e-06, "loss": -0.0479, "step": 694 }, { "clip_ratio/high_max": 0.0015159104150370695, "clip_ratio/high_mean": 0.0005945071061432827, "clip_ratio/low_mean": 0.000426095187322062, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010206022889178712, "epoch": 6.662390670553936, "grad_norm": 0.1805429607629776, "learning_rate": 1e-06, "loss": -0.0632, "step": 695 }, { "clip_ratio/high_max": 0.0018989177697221749, "clip_ratio/high_mean": 0.0006635959316554363, "clip_ratio/low_mean": 0.00039761355128575815, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010612094756652368, "epoch": 6.671720116618076, "grad_norm": 0.16476891934871674, "learning_rate": 1e-06, "loss": -0.0307, "step": 696 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.087890625, "completions/max_length": 4096.0, "completions/max_terminated_length": 4056.0, "completions/mean_length": 885.5938110351562, "completions/mean_terminated_length": 576.2398681640625, "completions/min_length": 103.0, "completions/min_terminated_length": 103.0, "epoch": 6.681049562682215, "grad_norm": 0.17955568432807922, "learning_rate": 1e-06, "loss": -0.0295, "num_tokens": 409643365.0, "reward": 0.635323703289032, "reward_std": 0.14519838988780975, "rewards/simpleverify_reward/mean": 0.6353236436843872, "rewards/simpleverify_reward/std": 0.48140645027160645, "step": 697 }, { "clip_ratio/high_max": 0.0018984113885380793, "clip_ratio/high_mean": 0.000720341682608705, "clip_ratio/low_mean": 0.0004642996682377998, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011846413253806531, "epoch": 6.690379008746356, "grad_norm": 0.18967975676059723, "learning_rate": 1e-06, "loss": -0.0249, "step": 698 }, { "clip_ratio/high_max": 0.0017850211297627538, "clip_ratio/high_mean": 0.0006796514817324351, "clip_ratio/low_mean": 0.00035860798561770935, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010382594646216603, "epoch": 6.699708454810495, "grad_norm": 0.17434947192668915, "learning_rate": 1e-06, "loss": -0.0483, "step": 699 }, { "clip_ratio/high_max": 0.0016480593112646602, "clip_ratio/high_mean": 0.0006639448947680648, "clip_ratio/low_mean": 0.000420415624830639, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010843605123227462, "epoch": 6.709037900874636, "grad_norm": 0.1805799901485443, "learning_rate": 1e-06, "loss": -0.0566, "step": 700 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.07421875, "completions/max_length": 4096.0, "completions/max_terminated_length": 3859.0, "completions/mean_length": 819.0831909179688, "completions/mean_terminated_length": 556.376708984375, "completions/min_length": 142.0, "completions/min_terminated_length": 142.0, "epoch": 6.718367346938775, "grad_norm": 0.19341816008090973, "learning_rate": 1e-06, "loss": -0.0152, "num_tokens": 411860919.0, "reward": 0.6501116156578064, "reward_std": 0.1439398229122162, "rewards/simpleverify_reward/mean": 0.6501116156578064, "rewards/simpleverify_reward/std": 0.47700104117393494, "step": 701 }, { "clip_ratio/high_max": 0.001703212910797447, "clip_ratio/high_mean": 0.0006290100445767166, "clip_ratio/low_mean": 0.0003708048984663037, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.000999814954411704, "epoch": 6.727696793002916, "grad_norm": 0.20410582423210144, "learning_rate": 1e-06, "loss": -0.0314, "step": 702 }, { "clip_ratio/high_max": 0.0015942964018904604, "clip_ratio/high_mean": 0.000552623137991759, "clip_ratio/low_mean": 0.0005183341395422758, "clip_ratio/low_min": 5.9580554079730064e-05, "clip_ratio/region_mean": 0.0010709572670748457, "epoch": 6.737026239067055, "grad_norm": 0.2757868766784668, "learning_rate": 1e-06, "loss": -0.0008, "step": 703 }, { "clip_ratio/high_max": 0.0016764399915700778, "clip_ratio/high_mean": 0.0005993985059831175, "clip_ratio/low_mean": 0.0005170372112388577, "clip_ratio/low_min": 1.5628907931386493e-05, "clip_ratio/region_mean": 0.0011164357056259178, "epoch": 6.746355685131196, "grad_norm": 0.18425646424293518, "learning_rate": 1e-06, "loss": -0.0254, "step": 704 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0691964285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 4096.0, "completions/mean_length": 815.4955444335938, "completions/mean_terminated_length": 571.62109375, "completions/min_length": 131.0, "completions/min_terminated_length": 131.0, "epoch": 6.755685131195335, "grad_norm": 0.20383980870246887, "learning_rate": 1e-06, "loss": -0.0384, "num_tokens": 414109911.0, "reward": 0.6590402126312256, "reward_std": 0.14694827795028687, "rewards/simpleverify_reward/mean": 0.6590401530265808, "rewards/simpleverify_reward/std": 0.47409799695014954, "step": 705 }, { "clip_ratio/high_max": 0.001700190274277702, "clip_ratio/high_mean": 0.0006360341467370745, "clip_ratio/low_mean": 0.0004943805106449872, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001130414650106104, "epoch": 6.765014577259475, "grad_norm": 0.18222685158252716, "learning_rate": 1e-06, "loss": -0.0088, "step": 706 }, { "clip_ratio/high_max": 0.001966163174074609, "clip_ratio/high_mean": 0.0007385770131804748, "clip_ratio/low_mean": 0.0004963445514931664, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012349215776339406, "epoch": 6.774344023323615, "grad_norm": 0.17962031066417694, "learning_rate": 1e-06, "loss": -0.0295, "step": 707 }, { "clip_ratio/high_max": 0.0021044925451860763, "clip_ratio/high_mean": 0.0007540449969383189, "clip_ratio/low_mean": 0.0004858447446167702, "clip_ratio/low_min": 1.0340833796362858e-05, "clip_ratio/region_mean": 0.001239889745193068, "epoch": 6.783673469387755, "grad_norm": 0.177434042096138, "learning_rate": 1e-06, "loss": -0.0556, "step": 708 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0851004464285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3551.0, "completions/mean_length": 894.816162109375, "completions/mean_terminated_length": 597.0543212890625, "completions/min_length": 129.0, "completions/min_terminated_length": 129.0, "epoch": 6.793002915451895, "grad_norm": 0.20444990694522858, "learning_rate": 1e-06, "loss": -0.0922, "num_tokens": 416399972.0, "reward": 0.619698703289032, "reward_std": 0.17113712430000305, "rewards/simpleverify_reward/mean": 0.6196986436843872, "rewards/simpleverify_reward/std": 0.4855285882949829, "step": 709 }, { "clip_ratio/high_max": 0.0017304295506619383, "clip_ratio/high_mean": 0.0006968831967242295, "clip_ratio/low_mean": 0.00048700768843445985, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011838908758363687, "epoch": 6.802332361516035, "grad_norm": 0.16899698972702026, "learning_rate": 1e-06, "loss": -0.0514, "step": 710 }, { "clip_ratio/high_max": 0.0017209572470164858, "clip_ratio/high_mean": 0.0007124189123715041, "clip_ratio/low_mean": 0.0006884935010020854, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014009124206495471, "epoch": 6.811661807580175, "grad_norm": 0.19939962029457092, "learning_rate": 1e-06, "loss": -0.01, "step": 711 }, { "clip_ratio/high_max": 0.002177388989366591, "clip_ratio/high_mean": 0.0008493429686495801, "clip_ratio/low_mean": 0.0005763029357694904, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014256459144235123, "epoch": 6.820991253644315, "grad_norm": 0.2555806636810303, "learning_rate": 1e-06, "loss": -0.0228, "step": 712 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.099609375, "completions/max_length": 4096.0, "completions/max_terminated_length": 4043.0, "completions/mean_length": 923.3731079101562, "completions/mean_terminated_length": 572.3882446289062, "completions/min_length": 83.0, "completions/min_terminated_length": 83.0, "epoch": 6.830320699708455, "grad_norm": 0.19286246597766876, "learning_rate": 1e-06, "loss": -0.0269, "num_tokens": 418616253.0, "reward": 0.6325334906578064, "reward_std": 0.14845041930675507, "rewards/simpleverify_reward/mean": 0.6325334906578064, "rewards/simpleverify_reward/std": 0.48218226432800293, "step": 713 }, { "clip_ratio/high_max": 0.001752030450006714, "clip_ratio/high_mean": 0.0006948388709133724, "clip_ratio/low_mean": 0.00044091109612054424, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011357499715813901, "epoch": 6.839650145772595, "grad_norm": 0.16098390519618988, "learning_rate": 1e-06, "loss": -0.0484, "step": 714 }, { "clip_ratio/high_max": 0.0019233385646657553, "clip_ratio/high_mean": 0.000713847519364208, "clip_ratio/low_mean": 0.0004140176570217591, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011278651745669777, "epoch": 6.848979591836734, "grad_norm": 0.21482400596141815, "learning_rate": 1e-06, "loss": -0.0553, "step": 715 }, { "clip_ratio/high_max": 0.0019257325402577408, "clip_ratio/high_mean": 0.0006473211651609745, "clip_ratio/low_mean": 0.0005076603329143836, "clip_ratio/low_min": 3.637951158452779e-05, "clip_ratio/region_mean": 0.0011549815062608104, "epoch": 6.858309037900875, "grad_norm": 0.36466747522354126, "learning_rate": 1e-06, "loss": -0.0238, "step": 716 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0731026785714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 4016.0, "completions/mean_length": 840.6671752929688, "completions/mean_terminated_length": 583.925048828125, "completions/min_length": 147.0, "completions/min_terminated_length": 147.0, "epoch": 6.867638483965014, "grad_norm": 0.20603623986244202, "learning_rate": 1e-06, "loss": 0.001, "num_tokens": 420949060.0, "reward": 0.6428571939468384, "reward_std": 0.13177546858787537, "rewards/simpleverify_reward/mean": 0.6428571343421936, "rewards/simpleverify_reward/std": 0.4792242646217346, "step": 717 }, { "clip_ratio/high_max": 0.0015712799977336545, "clip_ratio/high_mean": 0.0005862439775228268, "clip_ratio/low_mean": 0.00043463564543344546, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010208796193182934, "epoch": 6.876967930029155, "grad_norm": 0.15276971459388733, "learning_rate": 1e-06, "loss": -0.0099, "step": 718 }, { "clip_ratio/high_max": 0.001603277061803965, "clip_ratio/high_mean": 0.0005762938417319674, "clip_ratio/low_mean": 0.0004266755754542828, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010029694058175664, "epoch": 6.886297376093294, "grad_norm": 0.17101603746414185, "learning_rate": 1e-06, "loss": -0.0493, "step": 719 }, { "clip_ratio/high_max": 0.0015736076202301774, "clip_ratio/high_mean": 0.0005646349468406697, "clip_ratio/low_mean": 0.0005167842791706789, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010814192464749794, "epoch": 6.895626822157435, "grad_norm": 0.16272105276584625, "learning_rate": 1e-06, "loss": -0.0391, "step": 720 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0842633928571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 4059.0, "completions/mean_length": 862.585693359375, "completions/mean_terminated_length": 565.0563354492188, "completions/min_length": 114.0, "completions/min_terminated_length": 114.0, "epoch": 6.904956268221574, "grad_norm": 0.23223808407783508, "learning_rate": 1e-06, "loss": -0.027, "num_tokens": 423144151.0, "reward": 0.6501116156578064, "reward_std": 0.13932141661643982, "rewards/simpleverify_reward/mean": 0.6501116156578064, "rewards/simpleverify_reward/std": 0.47700104117393494, "step": 721 }, { "clip_ratio/high_max": 0.0016076039682957344, "clip_ratio/high_mean": 0.0005904886556891142, "clip_ratio/low_mean": 0.00045351036851570825, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001043999018293107, "epoch": 6.914285714285715, "grad_norm": 0.2327921986579895, "learning_rate": 1e-06, "loss": -0.0033, "step": 722 }, { "clip_ratio/high_max": 0.0018033525557257235, "clip_ratio/high_mean": 0.0006281892465267447, "clip_ratio/low_mean": 0.00048224052852674504, "clip_ratio/low_min": 1.2894573956145905e-05, "clip_ratio/region_mean": 0.0011104297745987424, "epoch": 6.923615160349854, "grad_norm": 0.15336954593658447, "learning_rate": 1e-06, "loss": -0.0068, "step": 723 }, { "clip_ratio/high_max": 0.0022506157474708743, "clip_ratio/high_mean": 0.0008415904667344876, "clip_ratio/low_mean": 0.0004333978233717062, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012749883098877035, "epoch": 6.932944606413994, "grad_norm": 0.1782962828874588, "learning_rate": 1e-06, "loss": -0.087, "step": 724 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0901227678571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 4043.0, "completions/mean_length": 891.1858520507812, "completions/mean_terminated_length": 573.7509765625, "completions/min_length": 131.0, "completions/min_terminated_length": 131.0, "epoch": 6.942274052478134, "grad_norm": 0.19151705503463745, "learning_rate": 1e-06, "loss": -0.0389, "num_tokens": 425377905.0, "reward": 0.6300223469734192, "reward_std": 0.14259806275367737, "rewards/simpleverify_reward/mean": 0.6300223469734192, "rewards/simpleverify_reward/std": 0.48286566138267517, "step": 725 }, { "clip_ratio/high_max": 0.0018193204596173018, "clip_ratio/high_mean": 0.0006667091020062799, "clip_ratio/low_mean": 0.00046189829004106286, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011286074168310734, "epoch": 6.9516034985422746, "grad_norm": 0.1804550439119339, "learning_rate": 1e-06, "loss": -0.035, "step": 726 }, { "clip_ratio/high_max": 0.0018307286263734568, "clip_ratio/high_mean": 0.0005910715226491448, "clip_ratio/low_mean": 0.000504286555951694, "clip_ratio/low_min": 1.7970098269870505e-05, "clip_ratio/region_mean": 0.0010953580858767964, "epoch": 6.960932944606414, "grad_norm": 0.1740938127040863, "learning_rate": 1e-06, "loss": -0.0199, "step": 727 }, { "clip_ratio/high_max": 0.0016502861071785446, "clip_ratio/high_mean": 0.0006726188712491421, "clip_ratio/low_mean": 0.0003899285356965265, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010625473914842587, "epoch": 6.970262390670554, "grad_norm": 0.18611131608486176, "learning_rate": 1e-06, "loss": -0.0653, "step": 728 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0892857142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3708.0, "completions/mean_length": 879.0946044921875, "completions/mean_terminated_length": 563.7117309570312, "completions/min_length": 125.0, "completions/min_terminated_length": 125.0, "epoch": 7.0093294460641395, "grad_norm": 0.20551489293575287, "learning_rate": 1e-06, "loss": -0.0241, "num_tokens": 427574900.0, "reward": 0.664620578289032, "reward_std": 0.1482822746038437, "rewards/simpleverify_reward/mean": 0.6646205186843872, "rewards/simpleverify_reward/std": 0.472188800573349, "step": 729 }, { "clip_ratio/high_max": 0.0018470123723091092, "clip_ratio/high_mean": 0.0007328716701522353, "clip_ratio/low_mean": 0.0005279880642774515, "clip_ratio/low_min": 2.8761138310073875e-05, "clip_ratio/region_mean": 0.001260859737158171, "epoch": 7.01865889212828, "grad_norm": 0.1937922239303589, "learning_rate": 1e-06, "loss": -0.0388, "step": 730 }, { "clip_ratio/high_max": 0.0016552864835830405, "clip_ratio/high_mean": 0.0005913155910093337, "clip_ratio/low_mean": 0.0005204998783483461, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011118154616269749, "epoch": 7.0279883381924195, "grad_norm": 0.18923521041870117, "learning_rate": 1e-06, "loss": -0.0176, "step": 731 }, { "clip_ratio/high_max": 0.0018950151570606977, "clip_ratio/high_mean": 0.0008046949205890996, "clip_ratio/low_mean": 0.0005079878110336722, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013126827107043937, "epoch": 7.03731778425656, "grad_norm": 0.19305190443992615, "learning_rate": 1e-06, "loss": -0.0624, "step": 732 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0828683035714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3922.0, "completions/mean_length": 861.4249877929688, "completions/mean_terminated_length": 569.1618041992188, "completions/min_length": 141.0, "completions/min_terminated_length": 141.0, "epoch": 7.0466472303206995, "grad_norm": 0.1915503591299057, "learning_rate": 1e-06, "loss": 0.0042, "num_tokens": 429809631.0, "reward": 0.6595982313156128, "reward_std": 0.1309398114681244, "rewards/simpleverify_reward/mean": 0.6595982313156128, "rewards/simpleverify_reward/std": 0.4739104211330414, "step": 733 }, { "clip_ratio/high_max": 0.0016543389938306063, "clip_ratio/high_mean": 0.000632803621556377, "clip_ratio/low_mean": 0.00045656181100639515, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010893654289247934, "epoch": 7.05597667638484, "grad_norm": 0.17624014616012573, "learning_rate": 1e-06, "loss": -0.0015, "step": 734 }, { "clip_ratio/high_max": 0.0018767327092064079, "clip_ratio/high_mean": 0.0006629413010159624, "clip_ratio/low_mean": 0.00043453088437672704, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010974721953971311, "epoch": 7.0653061224489795, "grad_norm": 0.17405757308006287, "learning_rate": 1e-06, "loss": -0.0286, "step": 735 }, { "clip_ratio/high_max": 0.0022075862070778385, "clip_ratio/high_mean": 0.0008376634032174479, "clip_ratio/low_mean": 0.0004374803393147886, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012751437279803213, "epoch": 7.07463556851312, "grad_norm": 0.18027593195438385, "learning_rate": 1e-06, "loss": -0.0723, "step": 736 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0856584821428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 4049.0, "completions/mean_length": 863.0187377929688, "completions/mean_terminated_length": 560.1425170898438, "completions/min_length": 118.0, "completions/min_terminated_length": 118.0, "epoch": 7.0839650145772595, "grad_norm": 0.22154584527015686, "learning_rate": 1e-06, "loss": -0.0164, "num_tokens": 432000058.0, "reward": 0.658761203289032, "reward_std": 0.13993702828884125, "rewards/simpleverify_reward/mean": 0.6587611436843872, "rewards/simpleverify_reward/std": 0.4741915464401245, "step": 737 }, { "clip_ratio/high_max": 0.0017593325719644781, "clip_ratio/high_mean": 0.0005619896192001761, "clip_ratio/low_mean": 0.000421649217742015, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009836388453550171, "epoch": 7.093294460641399, "grad_norm": 0.2520385980606079, "learning_rate": 1e-06, "loss": -0.0078, "step": 738 }, { "clip_ratio/high_max": 0.001882607612060383, "clip_ratio/high_mean": 0.0007591964058519807, "clip_ratio/low_mean": 0.00040412644671050657, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011633228168648202, "epoch": 7.1026239067055394, "grad_norm": 0.1931236833333969, "learning_rate": 1e-06, "loss": -0.0882, "step": 739 }, { "clip_ratio/high_max": 0.0018808894674293697, "clip_ratio/high_mean": 0.000631077943580749, "clip_ratio/low_mean": 0.0005654397909893305, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011965177072852384, "epoch": 7.111953352769679, "grad_norm": 0.14806972444057465, "learning_rate": 1e-06, "loss": -0.0222, "step": 740 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0853794642857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3961.0, "completions/mean_length": 882.73779296875, "completions/mean_terminated_length": 582.7809448242188, "completions/min_length": 112.0, "completions/min_terminated_length": 112.0, "epoch": 7.121282798833819, "grad_norm": 0.18312664330005646, "learning_rate": 1e-06, "loss": -0.0417, "num_tokens": 434275182.0, "reward": 0.6565290689468384, "reward_std": 0.13003070652484894, "rewards/simpleverify_reward/mean": 0.6565290093421936, "rewards/simpleverify_reward/std": 0.4749332368373871, "step": 741 }, { "clip_ratio/high_max": 0.0019248447970312554, "clip_ratio/high_mean": 0.000602437389716215, "clip_ratio/low_mean": 0.00039166508213384077, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009941024509316776, "epoch": 7.130612244897959, "grad_norm": 0.19777043163776398, "learning_rate": 1e-06, "loss": -0.0297, "step": 742 }, { "clip_ratio/high_max": 0.0015091375935298856, "clip_ratio/high_mean": 0.0005486071977429674, "clip_ratio/low_mean": 0.0005146349731148803, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010632421799527947, "epoch": 7.139941690962099, "grad_norm": 0.1767151653766632, "learning_rate": 1e-06, "loss": -0.0293, "step": 743 }, { "clip_ratio/high_max": 0.0015703627868788317, "clip_ratio/high_mean": 0.0005670976352121215, "clip_ratio/low_mean": 0.000524028050222114, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001091125697712414, "epoch": 7.149271137026239, "grad_norm": 0.18011105060577393, "learning_rate": 1e-06, "loss": -0.0251, "step": 744 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0856584821428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3969.0, "completions/mean_length": 855.4302978515625, "completions/mean_terminated_length": 551.8431396484375, "completions/min_length": 120.0, "completions/min_terminated_length": 120.0, "epoch": 7.158600583090379, "grad_norm": 0.20788873732089996, "learning_rate": 1e-06, "loss": -0.0013, "num_tokens": 436434028.0, "reward": 0.664620578289032, "reward_std": 0.14261654019355774, "rewards/simpleverify_reward/mean": 0.6646205186843872, "rewards/simpleverify_reward/std": 0.472188800573349, "step": 745 }, { "clip_ratio/high_max": 0.0017163538905151654, "clip_ratio/high_mean": 0.000700855749528273, "clip_ratio/low_mean": 0.00044292766324360855, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011437834182288498, "epoch": 7.167930029154519, "grad_norm": 0.18138691782951355, "learning_rate": 1e-06, "loss": -0.0621, "step": 746 }, { "clip_ratio/high_max": 0.0018914211032097228, "clip_ratio/high_mean": 0.0007136706080927979, "clip_ratio/low_mean": 0.0003745596598037082, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010882302776735742, "epoch": 7.1772594752186585, "grad_norm": 0.1779525876045227, "learning_rate": 1e-06, "loss": -0.0523, "step": 747 }, { "clip_ratio/high_max": 0.0018799522804329172, "clip_ratio/high_mean": 0.000712784836650826, "clip_ratio/low_mean": 0.0004850453615290462, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001197830180899473, "epoch": 7.186588921282799, "grad_norm": 0.25139161944389343, "learning_rate": 1e-06, "loss": -0.0199, "step": 748 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0959821428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 4046.0, "completions/mean_length": 905.0929565429688, "completions/mean_terminated_length": 566.3052978515625, "completions/min_length": 120.0, "completions/min_terminated_length": 120.0, "epoch": 7.1959183673469385, "grad_norm": 0.19935838878154755, "learning_rate": 1e-06, "loss": -0.0358, "num_tokens": 438635081.0, "reward": 0.634765625, "reward_std": 0.1269468367099762, "rewards/simpleverify_reward/mean": 0.634765625, "rewards/simpleverify_reward/std": 0.481563001871109, "step": 749 }, { "clip_ratio/high_max": 0.0019427779261604883, "clip_ratio/high_mean": 0.0006427679309126688, "clip_ratio/low_mean": 0.000361142155725247, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010039100925496314, "epoch": 7.205247813411079, "grad_norm": 0.165157750248909, "learning_rate": 1e-06, "loss": -0.03, "step": 750 }, { "clip_ratio/high_max": 0.001742748874676181, "clip_ratio/high_mean": 0.0006062312768335687, "clip_ratio/low_mean": 0.00045975161037858925, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010659828803909477, "epoch": 7.214577259475218, "grad_norm": 0.20024339854717255, "learning_rate": 1e-06, "loss": -0.0113, "step": 751 }, { "clip_ratio/high_max": 0.0014818614108662587, "clip_ratio/high_mean": 0.000552613552827097, "clip_ratio/low_mean": 0.0003992932606706745, "clip_ratio/low_min": 1.270841767109232e-05, "clip_ratio/region_mean": 0.0009519068116787821, "epoch": 7.223906705539359, "grad_norm": 0.15922866761684418, "learning_rate": 1e-06, "loss": -0.05, "step": 752 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0851004464285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3622.0, "completions/mean_length": 872.064208984375, "completions/mean_terminated_length": 572.18603515625, "completions/min_length": 156.0, "completions/min_terminated_length": 156.0, "epoch": 7.233236151603498, "grad_norm": 0.21675385534763336, "learning_rate": 1e-06, "loss": -0.0224, "num_tokens": 440862631.0, "reward": 0.6749442219734192, "reward_std": 0.14189545810222626, "rewards/simpleverify_reward/mean": 0.6749442219734192, "rewards/simpleverify_reward/std": 0.4684610664844513, "step": 753 }, { "clip_ratio/high_max": 0.0018079894362017512, "clip_ratio/high_mean": 0.000772438394051278, "clip_ratio/low_mean": 0.00034654170030989917, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011189800934516825, "epoch": 7.242565597667639, "grad_norm": 0.2562139928340912, "learning_rate": 1e-06, "loss": -0.076, "step": 754 }, { "clip_ratio/high_max": 0.0017749469334376045, "clip_ratio/high_mean": 0.0006789094431951526, "clip_ratio/low_mean": 0.00037290440195647534, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010518138278712286, "epoch": 7.251895043731778, "grad_norm": 0.19067296385765076, "learning_rate": 1e-06, "loss": -0.0394, "step": 755 }, { "clip_ratio/high_max": 0.0016348356366506778, "clip_ratio/high_mean": 0.0006943949492779211, "clip_ratio/low_mean": 0.0005323717500687053, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012267667116248049, "epoch": 7.261224489795918, "grad_norm": 0.20357002317905426, "learning_rate": 1e-06, "loss": 0.0016, "step": 756 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0909598214285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 4068.0, "completions/mean_length": 905.767333984375, "completions/mean_terminated_length": 586.5481567382812, "completions/min_length": 126.0, "completions/min_terminated_length": 126.0, "epoch": 7.270553935860058, "grad_norm": 0.21813829243183136, "learning_rate": 1e-06, "loss": -0.0676, "num_tokens": 443122141.0, "reward": 0.6484375, "reward_std": 0.15404973924160004, "rewards/simpleverify_reward/mean": 0.6484375, "rewards/simpleverify_reward/std": 0.4775247871875763, "step": 757 }, { "clip_ratio/high_max": 0.0020512463452178054, "clip_ratio/high_mean": 0.0006872124467918184, "clip_ratio/low_mean": 0.00037554574919340666, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001062758223270066, "epoch": 7.279883381924198, "grad_norm": 0.19762733578681946, "learning_rate": 1e-06, "loss": -0.0572, "step": 758 }, { "clip_ratio/high_max": 0.001494745840318501, "clip_ratio/high_mean": 0.0005980004352750257, "clip_ratio/low_mean": 0.0005000911787647055, "clip_ratio/low_min": 2.8299505174800288e-05, "clip_ratio/region_mean": 0.0010980916122207418, "epoch": 7.289212827988338, "grad_norm": 0.18981638550758362, "learning_rate": 1e-06, "loss": -0.0199, "step": 759 }, { "clip_ratio/high_max": 0.0019126760962535627, "clip_ratio/high_mean": 0.0007684841966693057, "clip_ratio/low_mean": 0.000417728534557682, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011862127248605248, "epoch": 7.298542274052478, "grad_norm": 0.16325582563877106, "learning_rate": 1e-06, "loss": -0.0392, "step": 760 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.083984375, "completions/max_length": 4096.0, "completions/max_terminated_length": 4067.0, "completions/mean_length": 858.9166259765625, "completions/mean_terminated_length": 562.1264038085938, "completions/min_length": 130.0, "completions/min_terminated_length": 130.0, "epoch": 7.307871720116618, "grad_norm": 0.1727965772151947, "learning_rate": 1e-06, "loss": -0.0555, "num_tokens": 445317938.0, "reward": 0.6710379719734192, "reward_std": 0.13377831876277924, "rewards/simpleverify_reward/mean": 0.6710379719734192, "rewards/simpleverify_reward/std": 0.4699017107486725, "step": 761 }, { "clip_ratio/high_max": 0.0015749776612210553, "clip_ratio/high_mean": 0.0006320705178950448, "clip_ratio/low_mean": 0.00034099913955287775, "clip_ratio/low_min": 1.181027982966043e-05, "clip_ratio/region_mean": 0.0009730696892802371, "epoch": 7.317201166180758, "grad_norm": 0.17832113802433014, "learning_rate": 1e-06, "loss": -0.0388, "step": 762 }, { "clip_ratio/high_max": 0.0017968898955587065, "clip_ratio/high_mean": 0.0006033982590452069, "clip_ratio/low_mean": 0.00048138486863535945, "clip_ratio/low_min": 1.486325800215127e-05, "clip_ratio/region_mean": 0.001084783158148639, "epoch": 7.326530612244898, "grad_norm": 0.2007288783788681, "learning_rate": 1e-06, "loss": -0.0198, "step": 763 }, { "clip_ratio/high_max": 0.0014369994005392073, "clip_ratio/high_mean": 0.0005598374193596101, "clip_ratio/low_mean": 0.0005031501741541433, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010629876051098108, "epoch": 7.335860058309038, "grad_norm": 0.17311786115169525, "learning_rate": 1e-06, "loss": -0.0137, "step": 764 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0867745535714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 4037.0, "completions/mean_length": 881.3370971679688, "completions/mean_terminated_length": 575.8802490234375, "completions/min_length": 103.0, "completions/min_terminated_length": 103.0, "epoch": 7.345189504373177, "grad_norm": 0.1987500637769699, "learning_rate": 1e-06, "loss": 0.0117, "num_tokens": 447554442.0, "reward": 0.65625, "reward_std": 0.14067181944847107, "rewards/simpleverify_reward/mean": 0.65625, "rewards/simpleverify_reward/std": 0.47502514719963074, "step": 765 }, { "clip_ratio/high_max": 0.001671699559665285, "clip_ratio/high_mean": 0.00072492514118494, "clip_ratio/low_mean": 0.0003542542876857624, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010791794229589868, "epoch": 7.354518950437318, "grad_norm": 0.18618327379226685, "learning_rate": 1e-06, "loss": -0.0515, "step": 766 }, { "clip_ratio/high_max": 0.0015737713292764965, "clip_ratio/high_mean": 0.0005955940669082338, "clip_ratio/low_mean": 0.0003914600147254532, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.000987054063443793, "epoch": 7.363848396501457, "grad_norm": 0.3291701376438141, "learning_rate": 1e-06, "loss": -0.0365, "step": 767 }, { "clip_ratio/high_max": 0.0016665909606672358, "clip_ratio/high_mean": 0.0006562590597241069, "clip_ratio/low_mean": 0.00026593935331220564, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009221984273608541, "epoch": 7.373177842565598, "grad_norm": 0.2445688098669052, "learning_rate": 1e-06, "loss": -0.0597, "step": 768 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0719866071428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3924.0, "completions/mean_length": 784.6381225585938, "completions/mean_terminated_length": 527.7736206054688, "completions/min_length": 117.0, "completions/min_terminated_length": 117.0, "epoch": 7.382507288629737, "grad_norm": 0.2317119538784027, "learning_rate": 1e-06, "loss": -0.0524, "num_tokens": 449677113.0, "reward": 0.6824777126312256, "reward_std": 0.14188669621944427, "rewards/simpleverify_reward/mean": 0.6824776530265808, "rewards/simpleverify_reward/std": 0.46557745337486267, "step": 769 }, { "clip_ratio/high_max": 0.0018088981014443561, "clip_ratio/high_mean": 0.0006410385112758377, "clip_ratio/low_mean": 0.00037022951619292144, "clip_ratio/low_min": 1.6318537745974027e-05, "clip_ratio/region_mean": 0.0010112680356542114, "epoch": 7.391836734693878, "grad_norm": 0.2193356454372406, "learning_rate": 1e-06, "loss": -0.0283, "step": 770 }, { "clip_ratio/high_max": 0.0019619596387201454, "clip_ratio/high_mean": 0.0007403225117741385, "clip_ratio/low_mean": 0.00041822677212621784, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001158549275714904, "epoch": 7.401166180758017, "grad_norm": 0.2307719886302948, "learning_rate": 1e-06, "loss": -0.0268, "step": 771 }, { "clip_ratio/high_max": 0.0018652260368980933, "clip_ratio/high_mean": 0.0007488153223675909, "clip_ratio/low_mean": 0.00041018096135303495, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011589963069127407, "epoch": 7.410495626822158, "grad_norm": 0.1763489842414856, "learning_rate": 1e-06, "loss": -0.0157, "step": 772 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0926339285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3962.0, "completions/mean_length": 906.378662109375, "completions/mean_terminated_length": 580.7468872070312, "completions/min_length": 127.0, "completions/min_terminated_length": 127.0, "epoch": 7.419825072886297, "grad_norm": 0.19754496216773987, "learning_rate": 1e-06, "loss": 0.0075, "num_tokens": 451918374.0, "reward": 0.645089328289032, "reward_std": 0.1587468385696411, "rewards/simpleverify_reward/mean": 0.6450892686843872, "rewards/simpleverify_reward/std": 0.47855302691459656, "step": 773 }, { "clip_ratio/high_max": 0.0018791328184306622, "clip_ratio/high_mean": 0.0006938591723155696, "clip_ratio/low_mean": 0.0004620438030542573, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011559029917407315, "epoch": 7.429154518950437, "grad_norm": 0.18765835464000702, "learning_rate": 1e-06, "loss": -0.045, "step": 774 }, { "clip_ratio/high_max": 0.001910295119159855, "clip_ratio/high_mean": 0.0007346178854277241, "clip_ratio/low_mean": 0.0003823485481007083, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011169664430781268, "epoch": 7.438483965014577, "grad_norm": 0.19485649466514587, "learning_rate": 1e-06, "loss": -0.0657, "step": 775 }, { "clip_ratio/high_max": 0.0020655393236665986, "clip_ratio/high_mean": 0.0008263982363132527, "clip_ratio/low_mean": 0.00041885759583237814, "clip_ratio/low_min": 1.0632867997628637e-05, "clip_ratio/region_mean": 0.0012452558476070408, "epoch": 7.447813411078717, "grad_norm": 0.19892308115959167, "learning_rate": 1e-06, "loss": -0.0695, "step": 776 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0984933035714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3538.0, "completions/mean_length": 927.9913940429688, "completions/mean_terminated_length": 581.8734130859375, "completions/min_length": 142.0, "completions/min_terminated_length": 142.0, "epoch": 7.457142857142857, "grad_norm": 0.21647728979587555, "learning_rate": 1e-06, "loss": -0.0544, "num_tokens": 454160263.0, "reward": 0.6662946939468384, "reward_std": 0.13231097161769867, "rewards/simpleverify_reward/mean": 0.6662946343421936, "rewards/simpleverify_reward/std": 0.4716016948223114, "step": 777 }, { "clip_ratio/high_max": 0.001741565669362899, "clip_ratio/high_mean": 0.0005292889977681625, "clip_ratio/low_mean": 0.00032641477537254104, "clip_ratio/low_min": 2.1716470655519515e-05, "clip_ratio/region_mean": 0.0008557037726859562, "epoch": 7.466472303206997, "grad_norm": 0.17192858457565308, "learning_rate": 1e-06, "loss": -0.01, "step": 778 }, { "clip_ratio/high_max": 0.0021642340725520626, "clip_ratio/high_mean": 0.0007178464311436983, "clip_ratio/low_mean": 0.0004303601167521265, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011482065674499609, "epoch": 7.475801749271137, "grad_norm": 0.18827177584171295, "learning_rate": 1e-06, "loss": -0.0561, "step": 779 }, { "clip_ratio/high_max": 0.001929523808939848, "clip_ratio/high_mean": 0.0007165902279666625, "clip_ratio/low_mean": 0.00040998974873218685, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001126579947595019, "epoch": 7.485131195335277, "grad_norm": 0.22880253195762634, "learning_rate": 1e-06, "loss": -0.03, "step": 780 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0968191964285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 2509.0, "completions/mean_length": 908.238037109375, "completions/mean_terminated_length": 566.5162353515625, "completions/min_length": 122.0, "completions/min_terminated_length": 122.0, "epoch": 7.494460641399417, "grad_norm": 0.23147010803222656, "learning_rate": 1e-06, "loss": -0.0498, "num_tokens": 456350620.0, "reward": 0.6534598469734192, "reward_std": 0.1586141288280487, "rewards/simpleverify_reward/mean": 0.6534598469734192, "rewards/simpleverify_reward/std": 0.47593408823013306, "step": 781 }, { "clip_ratio/high_max": 0.0022024245190550573, "clip_ratio/high_mean": 0.0008616836239525583, "clip_ratio/low_mean": 0.00042434754732312285, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001286031194467796, "epoch": 7.503790087463557, "grad_norm": 0.20952796936035156, "learning_rate": 1e-06, "loss": -0.0623, "step": 782 }, { "clip_ratio/high_max": 0.0020926506331306882, "clip_ratio/high_mean": 0.0008953853757702745, "clip_ratio/low_mean": 0.0004955668182446971, "clip_ratio/low_min": 1.6979081919998862e-05, "clip_ratio/region_mean": 0.0013909521840105299, "epoch": 7.513119533527696, "grad_norm": 0.2690824866294861, "learning_rate": 1e-06, "loss": -0.0493, "step": 783 }, { "clip_ratio/high_max": 0.0022289208573056385, "clip_ratio/high_mean": 0.0008587889424234163, "clip_ratio/low_mean": 0.0004762093235513021, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013349982727959286, "epoch": 7.522448979591837, "grad_norm": 0.17223253846168518, "learning_rate": 1e-06, "loss": -0.0248, "step": 784 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0984933035714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3886.0, "completions/mean_length": 939.1682739257812, "completions/mean_terminated_length": 594.271484375, "completions/min_length": 111.0, "completions/min_terminated_length": 111.0, "epoch": 7.531778425655976, "grad_norm": 0.19387538731098175, "learning_rate": 1e-06, "loss": -0.0168, "num_tokens": 458604855.0, "reward": 0.6478794813156128, "reward_std": 0.1380448341369629, "rewards/simpleverify_reward/mean": 0.6478794813156128, "rewards/simpleverify_reward/std": 0.4776979386806488, "step": 785 }, { "clip_ratio/high_max": 0.002034639266639715, "clip_ratio/high_mean": 0.0007501083764509531, "clip_ratio/low_mean": 0.0004238167675794102, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001173925178591162, "epoch": 7.541107871720117, "grad_norm": 0.21685782074928284, "learning_rate": 1e-06, "loss": -0.0362, "step": 786 }, { "clip_ratio/high_max": 0.0017802858928916976, "clip_ratio/high_mean": 0.0006753787502020714, "clip_ratio/low_mean": 0.00044790494757762644, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011232836841372773, "epoch": 7.550437317784256, "grad_norm": 0.2130149006843567, "learning_rate": 1e-06, "loss": -0.0625, "step": 787 }, { "clip_ratio/high_max": 0.0016349162942788098, "clip_ratio/high_mean": 0.000646815631625941, "clip_ratio/low_mean": 0.0005097502325952519, "clip_ratio/low_min": 3.8065281842136756e-05, "clip_ratio/region_mean": 0.001156565864221193, "epoch": 7.559766763848397, "grad_norm": 0.20126572251319885, "learning_rate": 1e-06, "loss": -0.0361, "step": 788 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1029575892857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 4063.0, "completions/mean_length": 936.5918579101562, "completions/mean_terminated_length": 573.9722900390625, "completions/min_length": 138.0, "completions/min_terminated_length": 138.0, "epoch": 7.569096209912536, "grad_norm": 0.19216550886631012, "learning_rate": 1e-06, "loss": -0.0194, "num_tokens": 460818200.0, "reward": 0.638671875, "reward_std": 0.14511430263519287, "rewards/simpleverify_reward/mean": 0.638671875, "rewards/simpleverify_reward/std": 0.48045241832733154, "step": 789 }, { "clip_ratio/high_max": 0.0019140923868690152, "clip_ratio/high_mean": 0.000695674186317774, "clip_ratio/low_mean": 0.00032720606873226643, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010228802348137833, "epoch": 7.578425655976677, "grad_norm": 0.1866471767425537, "learning_rate": 1e-06, "loss": -0.0716, "step": 790 }, { "clip_ratio/high_max": 0.002002627199544804, "clip_ratio/high_mean": 0.0007636253931195824, "clip_ratio/low_mean": 0.00042201636472327664, "clip_ratio/low_min": 2.100134406646248e-05, "clip_ratio/region_mean": 0.0011856417568196775, "epoch": 7.587755102040816, "grad_norm": 0.18559928238391876, "learning_rate": 1e-06, "loss": -0.0425, "step": 791 }, { "clip_ratio/high_max": 0.0019046715497097466, "clip_ratio/high_mean": 0.0007139939325497835, "clip_ratio/low_mean": 0.0004890396153314214, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001203033527417574, "epoch": 7.597084548104956, "grad_norm": 0.19073089957237244, "learning_rate": 1e-06, "loss": -0.0253, "step": 792 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0915178571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3459.0, "completions/mean_length": 875.3117065429688, "completions/mean_terminated_length": 550.8688354492188, "completions/min_length": 141.0, "completions/min_terminated_length": 141.0, "epoch": 7.606413994169096, "grad_norm": 0.19959139823913574, "learning_rate": 1e-06, "loss": 0.0219, "num_tokens": 462958325.0, "reward": 0.654854953289032, "reward_std": 0.14005178213119507, "rewards/simpleverify_reward/mean": 0.6548548936843872, "rewards/simpleverify_reward/std": 0.47548189759254456, "step": 793 }, { "clip_ratio/high_max": 0.0018686321927816607, "clip_ratio/high_mean": 0.000683339328134025, "clip_ratio/low_mean": 0.0003787257576277625, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010620650609780569, "epoch": 7.615743440233236, "grad_norm": 0.2395373284816742, "learning_rate": 1e-06, "loss": -0.0464, "step": 794 }, { "clip_ratio/high_max": 0.0018341139439144172, "clip_ratio/high_mean": 0.0007254053925862536, "clip_ratio/low_mean": 0.0004579947244565119, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011834001197712496, "epoch": 7.625072886297376, "grad_norm": 0.28046396374702454, "learning_rate": 1e-06, "loss": -0.023, "step": 795 }, { "clip_ratio/high_max": 0.002113022157573141, "clip_ratio/high_mean": 0.0008519747279933654, "clip_ratio/low_mean": 0.00041628469421084446, "clip_ratio/low_min": 1.3414895875030197e-05, "clip_ratio/region_mean": 0.0012682594206125941, "epoch": 7.634402332361516, "grad_norm": 0.21242403984069824, "learning_rate": 1e-06, "loss": -0.0501, "step": 796 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0915178571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 4036.0, "completions/mean_length": 896.218505859375, "completions/mean_terminated_length": 573.8817749023438, "completions/min_length": 118.0, "completions/min_terminated_length": 118.0, "epoch": 7.643731778425656, "grad_norm": 0.20062634348869324, "learning_rate": 1e-06, "loss": -0.0401, "num_tokens": 465177524.0, "reward": 0.6676897406578064, "reward_std": 0.1466578096151352, "rewards/simpleverify_reward/mean": 0.6676897406578064, "rewards/simpleverify_reward/std": 0.4711073040962219, "step": 797 }, { "clip_ratio/high_max": 0.0018654073428479023, "clip_ratio/high_mean": 0.000809405704785604, "clip_ratio/low_mean": 0.00027496119400893804, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010843669115274679, "epoch": 7.653061224489796, "grad_norm": 0.2315228134393692, "learning_rate": 1e-06, "loss": -0.0859, "step": 798 }, { "clip_ratio/high_max": 0.001962542621186003, "clip_ratio/high_mean": 0.0007756761278869817, "clip_ratio/low_mean": 0.0003966537096857792, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001172329824839835, "epoch": 7.662390670553936, "grad_norm": 0.23106253147125244, "learning_rate": 1e-06, "loss": -0.0457, "step": 799 }, { "clip_ratio/high_max": 0.0018530690322222654, "clip_ratio/high_mean": 0.0007786541173118167, "clip_ratio/low_mean": 0.0004980147641617805, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012766689142154064, "epoch": 7.671720116618076, "grad_norm": 0.1881476640701294, "learning_rate": 1e-06, "loss": -0.0015, "step": 800 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1023995535714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3757.0, "completions/mean_length": 920.8856201171875, "completions/mean_terminated_length": 558.6639404296875, "completions/min_length": 145.0, "completions/min_terminated_length": 145.0, "epoch": 7.681049562682215, "grad_norm": 0.259267121553421, "learning_rate": 1e-06, "loss": -0.078, "num_tokens": 467339146.0, "reward": 0.6280692219734192, "reward_std": 0.15175700187683105, "rewards/simpleverify_reward/mean": 0.6280692219734192, "rewards/simpleverify_reward/std": 0.4833875000476837, "step": 801 }, { "clip_ratio/high_max": 0.0019524235249264166, "clip_ratio/high_mean": 0.000762080293498002, "clip_ratio/low_mean": 0.0005769195140601369, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013389998202910647, "epoch": 7.690379008746356, "grad_norm": 0.24645550549030304, "learning_rate": 1e-06, "loss": -0.0157, "step": 802 }, { "clip_ratio/high_max": 0.0016253850189968944, "clip_ratio/high_mean": 0.0006191337079144432, "clip_ratio/low_mean": 0.0006756656257493887, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012947993491252419, "epoch": 7.699708454810495, "grad_norm": 0.20638370513916016, "learning_rate": 1e-06, "loss": -0.0198, "step": 803 }, { "clip_ratio/high_max": 0.0019093915398116224, "clip_ratio/high_mean": 0.0007213024700831738, "clip_ratio/low_mean": 0.0004893839286523871, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012106863905501086, "epoch": 7.709037900874636, "grad_norm": 0.22119882702827454, "learning_rate": 1e-06, "loss": -0.0466, "step": 804 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0943080357142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 4050.0, "completions/mean_length": 893.6978759765625, "completions/mean_terminated_length": 560.2479858398438, "completions/min_length": 84.0, "completions/min_terminated_length": 84.0, "epoch": 7.718367346938775, "grad_norm": 0.2376718968153, "learning_rate": 1e-06, "loss": -0.0548, "num_tokens": 469530311.0, "reward": 0.650948703289032, "reward_std": 0.14483168721199036, "rewards/simpleverify_reward/mean": 0.6509486436843872, "rewards/simpleverify_reward/std": 0.4767366945743561, "step": 805 }, { "clip_ratio/high_max": 0.0015168275058385916, "clip_ratio/high_mean": 0.000587136348713102, "clip_ratio/low_mean": 0.00041966989124375687, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010068062474601902, "epoch": 7.727696793002916, "grad_norm": 0.18342451751232147, "learning_rate": 1e-06, "loss": -0.0584, "step": 806 }, { "clip_ratio/high_max": 0.0015585196088068187, "clip_ratio/high_mean": 0.0005844056358910166, "clip_ratio/low_mean": 0.0005695861364074517, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001153991775936447, "epoch": 7.737026239067055, "grad_norm": 0.600071132183075, "learning_rate": 1e-06, "loss": -0.0092, "step": 807 }, { "clip_ratio/high_max": 0.0024641034251544625, "clip_ratio/high_mean": 0.0008495569472870557, "clip_ratio/low_mean": 0.0005054585317338933, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013550154690165073, "epoch": 7.746355685131196, "grad_norm": 0.22856880724430084, "learning_rate": 1e-06, "loss": -0.0538, "step": 808 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1085379464285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3817.0, "completions/mean_length": 950.294677734375, "completions/mean_terminated_length": 567.29638671875, "completions/min_length": 110.0, "completions/min_terminated_length": 110.0, "epoch": 7.755685131195335, "grad_norm": 0.21829251945018768, "learning_rate": 1e-06, "loss": -0.0842, "num_tokens": 471692111.0, "reward": 0.6342076063156128, "reward_std": 0.1302090287208557, "rewards/simpleverify_reward/mean": 0.6342076063156128, "rewards/simpleverify_reward/std": 0.48171886801719666, "step": 809 }, { "clip_ratio/high_max": 0.001972185649719904, "clip_ratio/high_mean": 0.0006722137432007003, "clip_ratio/low_mean": 0.0004469868272281019, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011192005640623393, "epoch": 7.765014577259475, "grad_norm": 0.2608254849910736, "learning_rate": 1e-06, "loss": -0.0489, "step": 810 }, { "clip_ratio/high_max": 0.0017194374522659928, "clip_ratio/high_mean": 0.0006161363726278068, "clip_ratio/low_mean": 0.0004938779566145968, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011100143783551175, "epoch": 7.774344023323615, "grad_norm": 0.17729410529136658, "learning_rate": 1e-06, "loss": -0.0208, "step": 811 }, { "clip_ratio/high_max": 0.001419206484570168, "clip_ratio/high_mean": 0.0005290525832606363, "clip_ratio/low_mean": 0.00044108942620368907, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009701420094643254, "epoch": 7.783673469387755, "grad_norm": 0.1725291758775711, "learning_rate": 1e-06, "loss": 0.0172, "step": 812 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0856584821428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3870.0, "completions/mean_length": 866.4953002929688, "completions/mean_terminated_length": 563.9447631835938, "completions/min_length": 125.0, "completions/min_terminated_length": 125.0, "epoch": 7.793002915451895, "grad_norm": 0.21722500026226044, "learning_rate": 1e-06, "loss": -0.029, "num_tokens": 473916470.0, "reward": 0.6590402126312256, "reward_std": 0.13093052804470062, "rewards/simpleverify_reward/mean": 0.6590401530265808, "rewards/simpleverify_reward/std": 0.47409799695014954, "step": 813 }, { "clip_ratio/high_max": 0.0018150356299884152, "clip_ratio/high_mean": 0.0005601820007541392, "clip_ratio/low_mean": 0.0005106276676087873, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010708096488087904, "epoch": 7.802332361516035, "grad_norm": 0.2178204506635666, "learning_rate": 1e-06, "loss": -0.0079, "step": 814 }, { "clip_ratio/high_max": 0.0019058194411627483, "clip_ratio/high_mean": 0.0006897407056385418, "clip_ratio/low_mean": 0.00044222580163477687, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011319665027258452, "epoch": 7.811661807580175, "grad_norm": 0.19166560471057892, "learning_rate": 1e-06, "loss": -0.0354, "step": 815 }, { "clip_ratio/high_max": 0.001951302900124574, "clip_ratio/high_mean": 0.0007007012573012616, "clip_ratio/low_mean": 0.00043950164945272263, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011402029449527618, "epoch": 7.820991253644315, "grad_norm": 0.20802700519561768, "learning_rate": 1e-06, "loss": -0.0335, "step": 816 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0753348214285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3372.0, "completions/mean_length": 808.3117065429688, "completions/mean_terminated_length": 540.455322265625, "completions/min_length": 92.0, "completions/min_terminated_length": 92.0, "epoch": 7.830320699708455, "grad_norm": 0.18682081997394562, "learning_rate": 1e-06, "loss": -0.0352, "num_tokens": 476072243.0, "reward": 0.674386203289032, "reward_std": 0.13040666282176971, "rewards/simpleverify_reward/mean": 0.6743861436843872, "rewards/simpleverify_reward/std": 0.4686691164970398, "step": 817 }, { "clip_ratio/high_max": 0.0016263100987998769, "clip_ratio/high_mean": 0.0006963704145164229, "clip_ratio/low_mean": 0.00042667611660363036, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011230465352127794, "epoch": 7.839650145772595, "grad_norm": 0.20069776475429535, "learning_rate": 1e-06, "loss": -0.0339, "step": 818 }, { "clip_ratio/high_max": 0.0019915581397071946, "clip_ratio/high_mean": 0.0006923116652615136, "clip_ratio/low_mean": 0.00040920385072240606, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011015155141649302, "epoch": 7.848979591836734, "grad_norm": 0.23941509425640106, "learning_rate": 1e-06, "loss": -0.0177, "step": 819 }, { "clip_ratio/high_max": 0.001928217832755763, "clip_ratio/high_mean": 0.0008059317242441466, "clip_ratio/low_mean": 0.0004192757864984742, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012252075066498946, "epoch": 7.858309037900875, "grad_norm": 0.1941695213317871, "learning_rate": 1e-06, "loss": -0.0376, "step": 820 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0789620535714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3486.0, "completions/mean_length": 818.1467895507812, "completions/mean_terminated_length": 537.1311645507812, "completions/min_length": 127.0, "completions/min_terminated_length": 127.0, "epoch": 7.867638483965014, "grad_norm": 0.2326592057943344, "learning_rate": 1e-06, "loss": -0.03, "num_tokens": 478199553.0, "reward": 0.6515067219734192, "reward_std": 0.14030587673187256, "rewards/simpleverify_reward/mean": 0.6515067219734192, "rewards/simpleverify_reward/std": 0.4765596091747284, "step": 821 }, { "clip_ratio/high_max": 0.001701283825241262, "clip_ratio/high_mean": 0.0006854474122519605, "clip_ratio/low_mean": 0.0005257875423012592, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012112349286326207, "epoch": 7.876967930029155, "grad_norm": 0.17900310456752777, "learning_rate": 1e-06, "loss": -0.0236, "step": 822 }, { "clip_ratio/high_max": 0.001668118522502482, "clip_ratio/high_mean": 0.0006416020751203177, "clip_ratio/low_mean": 0.0005418368709797505, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011834389188152272, "epoch": 7.886297376093294, "grad_norm": 0.2985275983810425, "learning_rate": 1e-06, "loss": -0.0492, "step": 823 }, { "clip_ratio/high_max": 0.0018781254839268513, "clip_ratio/high_mean": 0.0007526711742684711, "clip_ratio/low_mean": 0.0005467262108140858, "clip_ratio/low_min": 1.382743357680738e-05, "clip_ratio/region_mean": 0.001299397372349631, "epoch": 7.895626822157435, "grad_norm": 0.1880408227443695, "learning_rate": 1e-06, "loss": -0.0408, "step": 824 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0867745535714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3921.0, "completions/mean_length": 869.3401489257812, "completions/mean_terminated_length": 562.7433471679688, "completions/min_length": 111.0, "completions/min_terminated_length": 111.0, "epoch": 7.904956268221574, "grad_norm": 0.2580636441707611, "learning_rate": 1e-06, "loss": -0.0837, "num_tokens": 480421668.0, "reward": 0.6476004719734192, "reward_std": 0.14782081544399261, "rewards/simpleverify_reward/mean": 0.6476004719734192, "rewards/simpleverify_reward/std": 0.47778424620628357, "step": 825 }, { "clip_ratio/high_max": 0.002644984848302556, "clip_ratio/high_mean": 0.0008558299214200815, "clip_ratio/low_mean": 0.00044309859458735446, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012989285241928883, "epoch": 7.914285714285715, "grad_norm": 0.19940990209579468, "learning_rate": 1e-06, "loss": -0.0259, "step": 826 }, { "clip_ratio/high_max": 0.0017099909273383673, "clip_ratio/high_mean": 0.00061033815927658, "clip_ratio/low_mean": 0.0005173661384105799, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011277043086010963, "epoch": 7.923615160349854, "grad_norm": 0.17693646252155304, "learning_rate": 1e-06, "loss": -0.001, "step": 827 }, { "clip_ratio/high_max": 0.002103300728776958, "clip_ratio/high_mean": 0.0007399925179925049, "clip_ratio/low_mean": 0.00043297802039887756, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011729705656762235, "epoch": 7.932944606413994, "grad_norm": 0.19478558003902435, "learning_rate": 1e-06, "loss": -0.0221, "step": 828 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.11328125, "completions/max_length": 4096.0, "completions/max_terminated_length": 3835.0, "completions/mean_length": 967.54248046875, "completions/mean_terminated_length": 567.8716430664062, "completions/min_length": 136.0, "completions/min_terminated_length": 136.0, "epoch": 7.942274052478134, "grad_norm": 0.2276674211025238, "learning_rate": 1e-06, "loss": -0.0523, "num_tokens": 482576980.0, "reward": 0.6434152126312256, "reward_std": 0.1509077548980713, "rewards/simpleverify_reward/mean": 0.6434151530265808, "rewards/simpleverify_reward/std": 0.47905752062797546, "step": 829 }, { "clip_ratio/high_max": 0.0020530131841951516, "clip_ratio/high_mean": 0.0006902190852997592, "clip_ratio/low_mean": 0.0005048754392191768, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011950945226999465, "epoch": 7.9516034985422746, "grad_norm": 0.19406114518642426, "learning_rate": 1e-06, "loss": -0.0398, "step": 830 }, { "clip_ratio/high_max": 0.00183062495852937, "clip_ratio/high_mean": 0.0006364162563841091, "clip_ratio/low_mean": 0.00045057566876494093, "clip_ratio/low_min": 1.4592575098504312e-05, "clip_ratio/region_mean": 0.0010869919115066295, "epoch": 7.960932944606414, "grad_norm": 0.23827877640724182, "learning_rate": 1e-06, "loss": -0.0492, "step": 831 }, { "clip_ratio/high_max": 0.0020628995225706603, "clip_ratio/high_mean": 0.0007603598369314568, "clip_ratio/low_mean": 0.0004754650535687688, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012358248968666885, "epoch": 7.970262390670554, "grad_norm": 0.2038354128599167, "learning_rate": 1e-06, "loss": -0.0626, "step": 832 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0890066964285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3779.0, "completions/mean_length": 896.7902221679688, "completions/mean_terminated_length": 584.218017578125, "completions/min_length": 149.0, "completions/min_terminated_length": 149.0, "epoch": 8.00932944606414, "grad_norm": 0.19130678474903107, "learning_rate": 1e-06, "loss": -0.0381, "num_tokens": 484839884.0, "reward": 0.6643415689468384, "reward_std": 0.1401463747024536, "rewards/simpleverify_reward/mean": 0.6643415093421936, "rewards/simpleverify_reward/std": 0.47228604555130005, "step": 833 }, { "clip_ratio/high_max": 0.0022396451568056364, "clip_ratio/high_mean": 0.0007475803322449792, "clip_ratio/low_mean": 0.00042583134199958295, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011734117069863714, "epoch": 8.018658892128279, "grad_norm": 0.2026178240776062, "learning_rate": 1e-06, "loss": -0.0242, "step": 834 }, { "clip_ratio/high_max": 0.0018602855670906138, "clip_ratio/high_mean": 0.0006705811065330636, "clip_ratio/low_mean": 0.0003141636318559904, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009847447599895531, "epoch": 8.02798833819242, "grad_norm": 0.20869730412960052, "learning_rate": 1e-06, "loss": -0.0497, "step": 835 }, { "clip_ratio/high_max": 0.0017626586704864167, "clip_ratio/high_mean": 0.0006909156327310484, "clip_ratio/low_mean": 0.0005240490918367868, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012149647445767187, "epoch": 8.03731778425656, "grad_norm": 0.2021002471446991, "learning_rate": 1e-06, "loss": -0.019, "step": 836 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0909598214285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3375.0, "completions/mean_length": 879.3167114257812, "completions/mean_terminated_length": 557.4508666992188, "completions/min_length": 126.0, "completions/min_terminated_length": 126.0, "epoch": 8.0466472303207, "grad_norm": 0.1803373247385025, "learning_rate": 1e-06, "loss": -0.0436, "num_tokens": 487012763.0, "reward": 0.6713169813156128, "reward_std": 0.1273239552974701, "rewards/simpleverify_reward/mean": 0.6713169813156128, "rewards/simpleverify_reward/std": 0.46980005502700806, "step": 837 }, { "clip_ratio/high_max": 0.0014776573843846563, "clip_ratio/high_mean": 0.0005845458163094008, "clip_ratio/low_mean": 0.00039471032869187184, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009792561431822833, "epoch": 8.055976676384839, "grad_norm": 0.25093957781791687, "learning_rate": 1e-06, "loss": -0.0468, "step": 838 }, { "clip_ratio/high_max": 0.0019740602183446754, "clip_ratio/high_mean": 0.0006339872525131796, "clip_ratio/low_mean": 0.00037484790072994656, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001008835142783937, "epoch": 8.06530612244898, "grad_norm": 0.2048017829656601, "learning_rate": 1e-06, "loss": -0.038, "step": 839 }, { "clip_ratio/high_max": 0.0015635843483323697, "clip_ratio/high_mean": 0.0005752003089583013, "clip_ratio/low_mean": 0.0005095755011552683, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010847758130694274, "epoch": 8.07463556851312, "grad_norm": 0.18902996182441711, "learning_rate": 1e-06, "loss": -0.0101, "step": 840 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0912388392857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3913.0, "completions/mean_length": 884.152099609375, "completions/mean_terminated_length": 561.685302734375, "completions/min_length": 137.0, "completions/min_terminated_length": 137.0, "epoch": 8.08396501457726, "grad_norm": 0.22261740267276764, "learning_rate": 1e-06, "loss": -0.0172, "num_tokens": 489203740.0, "reward": 0.6551339626312256, "reward_std": 0.15133264660835266, "rewards/simpleverify_reward/mean": 0.6551339030265808, "rewards/simpleverify_reward/std": 0.4753909111022949, "step": 841 }, { "clip_ratio/high_max": 0.001809556135413004, "clip_ratio/high_mean": 0.0007066893522278406, "clip_ratio/low_mean": 0.00031641231407775194, "clip_ratio/low_min": 1.2329847777436953e-05, "clip_ratio/region_mean": 0.0010231016676698346, "epoch": 8.093294460641399, "grad_norm": 0.2065172791481018, "learning_rate": 1e-06, "loss": -0.0627, "step": 842 }, { "clip_ratio/high_max": 0.0018721296146395616, "clip_ratio/high_mean": 0.0006802034531574463, "clip_ratio/low_mean": 0.00043648222572301165, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001116685689339647, "epoch": 8.102623906705539, "grad_norm": 0.19987669587135315, "learning_rate": 1e-06, "loss": -0.0247, "step": 843 }, { "clip_ratio/high_max": 0.001860138654592447, "clip_ratio/high_mean": 0.0006853596396467765, "clip_ratio/low_mean": 0.0005604637735814322, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001245823375938926, "epoch": 8.11195335276968, "grad_norm": 0.25699561834335327, "learning_rate": 1e-06, "loss": -0.0276, "step": 844 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.08984375, "completions/max_length": 4096.0, "completions/max_terminated_length": 4070.0, "completions/mean_length": 882.4629516601562, "completions/mean_terminated_length": 565.2467651367188, "completions/min_length": 119.0, "completions/min_terminated_length": 119.0, "epoch": 8.12128279883382, "grad_norm": 0.2067418247461319, "learning_rate": 1e-06, "loss": -0.0453, "num_tokens": 491406831.0, "reward": 0.66015625, "reward_std": 0.13159838318824768, "rewards/simpleverify_reward/mean": 0.66015625, "rewards/simpleverify_reward/std": 0.47372207045555115, "step": 845 }, { "clip_ratio/high_max": 0.001633677373320097, "clip_ratio/high_mean": 0.0006376029000421113, "clip_ratio/low_mean": 0.0003243860403472354, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009619889278837945, "epoch": 8.130612244897959, "grad_norm": 0.18661540746688843, "learning_rate": 1e-06, "loss": -0.0424, "step": 846 }, { "clip_ratio/high_max": 0.001573355697473744, "clip_ratio/high_mean": 0.0005880499074919499, "clip_ratio/low_mean": 0.00032942443976935465, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009174743354378734, "epoch": 8.139941690962099, "grad_norm": 0.19220468401908875, "learning_rate": 1e-06, "loss": -0.0298, "step": 847 }, { "clip_ratio/high_max": 0.001785242508049123, "clip_ratio/high_mean": 0.000698519386787666, "clip_ratio/low_mean": 0.0003481051203380048, "clip_ratio/low_min": 1.653001891099848e-05, "clip_ratio/region_mean": 0.001046624493028503, "epoch": 8.14927113702624, "grad_norm": 0.16994823515415192, "learning_rate": 1e-06, "loss": -0.0747, "step": 848 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0929129464285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3899.0, "completions/mean_length": 873.3125610351562, "completions/mean_terminated_length": 543.2125244140625, "completions/min_length": 129.0, "completions/min_terminated_length": 129.0, "epoch": 8.15860058309038, "grad_norm": 0.21148379147052765, "learning_rate": 1e-06, "loss": -0.0743, "num_tokens": 493531111.0, "reward": 0.6808035969734192, "reward_std": 0.15019066631793976, "rewards/simpleverify_reward/mean": 0.6808035969734192, "rewards/simpleverify_reward/std": 0.4662303328514099, "step": 849 }, { "clip_ratio/high_max": 0.001777424018655438, "clip_ratio/high_mean": 0.0007133043600333622, "clip_ratio/low_mean": 0.0005141597112015006, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012274640539544635, "epoch": 8.167930029154519, "grad_norm": 0.2077813744544983, "learning_rate": 1e-06, "loss": -0.0395, "step": 850 }, { "clip_ratio/high_max": 0.001603165870619705, "clip_ratio/high_mean": 0.0006312956793408375, "clip_ratio/low_mean": 0.0004770444666064577, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001108340140490327, "epoch": 8.177259475218658, "grad_norm": 0.22171589732170105, "learning_rate": 1e-06, "loss": -0.0649, "step": 851 }, { "clip_ratio/high_max": 0.0017332502393401228, "clip_ratio/high_mean": 0.000644186088720744, "clip_ratio/low_mean": 0.00044984264286540565, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010940287284029182, "epoch": 8.186588921282798, "grad_norm": 0.1992146223783493, "learning_rate": 1e-06, "loss": -0.0404, "step": 852 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0884486607142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 4065.0, "completions/mean_length": 890.2879638671875, "completions/mean_terminated_length": 579.2347412109375, "completions/min_length": 93.0, "completions/min_terminated_length": 93.0, "epoch": 8.19591836734694, "grad_norm": 0.19308605790138245, "learning_rate": 1e-06, "loss": -0.0341, "num_tokens": 495798039.0, "reward": 0.658761203289032, "reward_std": 0.13452884554862976, "rewards/simpleverify_reward/mean": 0.6587611436843872, "rewards/simpleverify_reward/std": 0.4741915464401245, "step": 853 }, { "clip_ratio/high_max": 0.0014049238307052292, "clip_ratio/high_mean": 0.0005261294913907477, "clip_ratio/low_mean": 0.0003645370115918922, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008906664934329456, "epoch": 8.205247813411079, "grad_norm": 0.1875889152288437, "learning_rate": 1e-06, "loss": -0.0619, "step": 854 }, { "clip_ratio/high_max": 0.001746064477629261, "clip_ratio/high_mean": 0.0005877011153643252, "clip_ratio/low_mean": 0.00042652550155253266, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010142266510229092, "epoch": 8.214577259475218, "grad_norm": 0.2389635443687439, "learning_rate": 1e-06, "loss": 0.0073, "step": 855 }, { "clip_ratio/high_max": 0.001883198714494938, "clip_ratio/high_mean": 0.0005970188949504518, "clip_ratio/low_mean": 0.0003998544134447002, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009968733120331308, "epoch": 8.223906705539358, "grad_norm": 0.2119031399488449, "learning_rate": 1e-06, "loss": -0.034, "step": 856 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0834263392857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3528.0, "completions/mean_length": 853.804443359375, "completions/mean_terminated_length": 558.7005004882812, "completions/min_length": 130.0, "completions/min_terminated_length": 130.0, "epoch": 8.2332361516035, "grad_norm": 0.20811808109283447, "learning_rate": 1e-06, "loss": -0.065, "num_tokens": 498004818.0, "reward": 0.645089328289032, "reward_std": 0.14186622202396393, "rewards/simpleverify_reward/mean": 0.6450892686843872, "rewards/simpleverify_reward/std": 0.47855302691459656, "step": 857 }, { "clip_ratio/high_max": 0.0016370575358450878, "clip_ratio/high_mean": 0.0006246589064176078, "clip_ratio/low_mean": 0.0004614503295670147, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010861092487175483, "epoch": 8.242565597667639, "grad_norm": 0.2095000296831131, "learning_rate": 1e-06, "loss": 0.0083, "step": 858 }, { "clip_ratio/high_max": 0.0018745159250102006, "clip_ratio/high_mean": 0.0007075621688272804, "clip_ratio/low_mean": 0.0003995686847702018, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011071308581449557, "epoch": 8.251895043731778, "grad_norm": 0.19211915135383606, "learning_rate": 1e-06, "loss": -0.0594, "step": 859 }, { "clip_ratio/high_max": 0.001863425039118738, "clip_ratio/high_mean": 0.0007910383328635362, "clip_ratio/low_mean": 0.0004682701246565557, "clip_ratio/low_min": 1.651691309234593e-05, "clip_ratio/region_mean": 0.0012593084611580707, "epoch": 8.261224489795918, "grad_norm": 0.20979106426239014, "learning_rate": 1e-06, "loss": -0.0161, "step": 860 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.08203125, "completions/max_length": 4096.0, "completions/max_terminated_length": 3844.0, "completions/mean_length": 850.241943359375, "completions/mean_terminated_length": 560.1954345703125, "completions/min_length": 121.0, "completions/min_terminated_length": 121.0, "epoch": 8.270553935860057, "grad_norm": 0.2153846174478531, "learning_rate": 1e-06, "loss": -0.034, "num_tokens": 500210621.0, "reward": 0.6718750596046448, "reward_std": 0.15318308770656586, "rewards/simpleverify_reward/mean": 0.671875, "rewards/simpleverify_reward/std": 0.46959611773490906, "step": 861 }, { "clip_ratio/high_max": 0.0020156478640274145, "clip_ratio/high_mean": 0.0007846157714084256, "clip_ratio/low_mean": 0.0004615660091076279, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012461817568691913, "epoch": 8.279883381924199, "grad_norm": 0.19815219938755035, "learning_rate": 1e-06, "loss": -0.0642, "step": 862 }, { "clip_ratio/high_max": 0.0018157772574340925, "clip_ratio/high_mean": 0.0007722268946963595, "clip_ratio/low_mean": 0.00043873078993783565, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012109576782677323, "epoch": 8.289212827988338, "grad_norm": 0.23526006937026978, "learning_rate": 1e-06, "loss": -0.029, "step": 863 }, { "clip_ratio/high_max": 0.0020821000980504323, "clip_ratio/high_mean": 0.0008129849666147493, "clip_ratio/low_mean": 0.0005491211222761194, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013621060716104694, "epoch": 8.298542274052478, "grad_norm": 0.2534906566143036, "learning_rate": 1e-06, "loss": -0.0302, "step": 864 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0851004464285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3654.0, "completions/mean_length": 850.0195922851562, "completions/mean_terminated_length": 548.0908813476562, "completions/min_length": 144.0, "completions/min_terminated_length": 144.0, "epoch": 8.307871720116617, "grad_norm": 0.21729153394699097, "learning_rate": 1e-06, "loss": -0.0432, "num_tokens": 502371819.0, "reward": 0.6693638563156128, "reward_std": 0.14354710280895233, "rewards/simpleverify_reward/mean": 0.6693638563156128, "rewards/simpleverify_reward/std": 0.4705078601837158, "step": 865 }, { "clip_ratio/high_max": 0.001930144353536889, "clip_ratio/high_mean": 0.0007256315402628388, "clip_ratio/low_mean": 0.00048087449295053375, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012065060436725616, "epoch": 8.317201166180759, "grad_norm": 0.20141641795635223, "learning_rate": 1e-06, "loss": -0.07, "step": 866 }, { "clip_ratio/high_max": 0.0016413264311267994, "clip_ratio/high_mean": 0.000656519971016678, "clip_ratio/low_mean": 0.0003180721996614011, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009745921634021215, "epoch": 8.326530612244898, "grad_norm": 0.18424518406391144, "learning_rate": 1e-06, "loss": -0.0119, "step": 867 }, { "clip_ratio/high_max": 0.0019865067733917385, "clip_ratio/high_mean": 0.0007619883199367905, "clip_ratio/low_mean": 0.0005698706900147954, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013318590135895647, "epoch": 8.335860058309038, "grad_norm": 0.22060516476631165, "learning_rate": 1e-06, "loss": -0.012, "step": 868 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0795200892857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3890.0, "completions/mean_length": 842.3680419921875, "completions/mean_terminated_length": 561.287353515625, "completions/min_length": 123.0, "completions/min_terminated_length": 123.0, "epoch": 8.345189504373177, "grad_norm": 0.20909646153450012, "learning_rate": 1e-06, "loss": -0.0355, "num_tokens": 504564994.0, "reward": 0.6674107313156128, "reward_std": 0.1463107466697693, "rewards/simpleverify_reward/mean": 0.6674107313156128, "rewards/simpleverify_reward/std": 0.47120651602745056, "step": 869 }, { "clip_ratio/high_max": 0.0018018705159192905, "clip_ratio/high_mean": 0.000775587204771, "clip_ratio/low_mean": 0.00044280520705797244, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012183924154669512, "epoch": 8.354518950437317, "grad_norm": 0.17816588282585144, "learning_rate": 1e-06, "loss": -0.0223, "step": 870 }, { "clip_ratio/high_max": 0.0019693659487529658, "clip_ratio/high_mean": 0.000868167702719802, "clip_ratio/low_mean": 0.00043021021792810643, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012983778942725621, "epoch": 8.363848396501458, "grad_norm": 0.21286091208457947, "learning_rate": 1e-06, "loss": -0.0465, "step": 871 }, { "clip_ratio/high_max": 0.00183451701013837, "clip_ratio/high_mean": 0.000654515510177589, "clip_ratio/low_mean": 0.0005379623089538654, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011924778191314545, "epoch": 8.373177842565598, "grad_norm": 0.2436532974243164, "learning_rate": 1e-06, "loss": -0.0085, "step": 872 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0784040178571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3301.0, "completions/mean_length": 808.1470947265625, "completions/mean_terminated_length": 528.4356689453125, "completions/min_length": 128.0, "completions/min_terminated_length": 128.0, "epoch": 8.382507288629737, "grad_norm": 0.227778822183609, "learning_rate": 1e-06, "loss": -0.0381, "num_tokens": 506660657.0, "reward": 0.6986607313156128, "reward_std": 0.141133114695549, "rewards/simpleverify_reward/mean": 0.6986607313156128, "rewards/simpleverify_reward/std": 0.45890378952026367, "step": 873 }, { "clip_ratio/high_max": 0.0021021136199124157, "clip_ratio/high_mean": 0.0007437052481691353, "clip_ratio/low_mean": 0.0003733414478119812, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011170467078045476, "epoch": 8.391836734693877, "grad_norm": 0.22037145495414734, "learning_rate": 1e-06, "loss": -0.0629, "step": 874 }, { "clip_ratio/high_max": 0.0019187404905096628, "clip_ratio/high_mean": 0.0006844189083494712, "clip_ratio/low_mean": 0.00040222005281975726, "clip_ratio/low_min": 1.1783559784817044e-05, "clip_ratio/region_mean": 0.001086638974811649, "epoch": 8.401166180758018, "grad_norm": 0.2044508457183838, "learning_rate": 1e-06, "loss": -0.0214, "step": 875 }, { "clip_ratio/high_max": 0.002168182749301195, "clip_ratio/high_mean": 0.0008433329912804766, "clip_ratio/low_mean": 0.00043435764109744923, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012776906405633781, "epoch": 8.410495626822158, "grad_norm": 0.19025807082653046, "learning_rate": 1e-06, "loss": -0.0276, "step": 876 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0744977678571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 4073.0, "completions/mean_length": 815.0315551757812, "completions/mean_terminated_length": 550.931884765625, "completions/min_length": 129.0, "completions/min_terminated_length": 129.0, "epoch": 8.419825072886297, "grad_norm": 0.21105901896953583, "learning_rate": 1e-06, "loss": -0.0166, "num_tokens": 508836578.0, "reward": 0.7198660969734192, "reward_std": 0.12055040150880814, "rewards/simpleverify_reward/mean": 0.7198660969734192, "rewards/simpleverify_reward/std": 0.4491271674633026, "step": 877 }, { "clip_ratio/high_max": 0.0017100308723456692, "clip_ratio/high_mean": 0.0006550383950525429, "clip_ratio/low_mean": 0.00029400652056210674, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009490448956057662, "epoch": 8.429154518950437, "grad_norm": 0.16956418752670288, "learning_rate": 1e-06, "loss": -0.0577, "step": 878 }, { "clip_ratio/high_max": 0.0016755704418756068, "clip_ratio/high_mean": 0.0006167489118524827, "clip_ratio/low_mean": 0.0004148780599280144, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001031626950862119, "epoch": 8.438483965014576, "grad_norm": 0.20543578267097473, "learning_rate": 1e-06, "loss": -0.0022, "step": 879 }, { "clip_ratio/high_max": 0.00184726412044256, "clip_ratio/high_mean": 0.0007297933516383637, "clip_ratio/low_mean": 0.0003036935395357432, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010334869220969267, "epoch": 8.447813411078718, "grad_norm": 0.17861880362033844, "learning_rate": 1e-06, "loss": -0.0671, "step": 880 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0870535714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3937.0, "completions/mean_length": 866.765380859375, "completions/mean_terminated_length": 558.84326171875, "completions/min_length": 104.0, "completions/min_terminated_length": 104.0, "epoch": 8.457142857142857, "grad_norm": 0.24390752613544464, "learning_rate": 1e-06, "loss": -0.0273, "num_tokens": 511015577.0, "reward": 0.6618303656578064, "reward_std": 0.15180666744709015, "rewards/simpleverify_reward/mean": 0.6618303656578064, "rewards/simpleverify_reward/std": 0.47315260767936707, "step": 881 }, { "clip_ratio/high_max": 0.0016176038116100244, "clip_ratio/high_mean": 0.000625743115961086, "clip_ratio/low_mean": 0.0006201532296472578, "clip_ratio/low_min": 3.0975835215940606e-05, "clip_ratio/region_mean": 0.0012458963210519869, "epoch": 8.466472303206997, "grad_norm": 0.19544947147369385, "learning_rate": 1e-06, "loss": -0.033, "step": 882 }, { "clip_ratio/high_max": 0.0021114897026563995, "clip_ratio/high_mean": 0.000817813091998687, "clip_ratio/low_mean": 0.0005196727765905962, "clip_ratio/low_min": 2.2653135602013208e-05, "clip_ratio/region_mean": 0.0013374859008763451, "epoch": 8.475801749271136, "grad_norm": 0.21922613680362701, "learning_rate": 1e-06, "loss": -0.0482, "step": 883 }, { "clip_ratio/high_max": 0.0022333392334985547, "clip_ratio/high_mean": 0.0008119487647491042, "clip_ratio/low_mean": 0.0006113037475188321, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014232525463739876, "epoch": 8.485131195335278, "grad_norm": 0.2235177904367447, "learning_rate": 1e-06, "loss": -0.0245, "step": 884 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0809151785714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3890.0, "completions/mean_length": 833.0653076171875, "completions/mean_terminated_length": 545.8002319335938, "completions/min_length": 93.0, "completions/min_terminated_length": 93.0, "epoch": 8.494460641399417, "grad_norm": 0.2312912940979004, "learning_rate": 1e-06, "loss": -0.0283, "num_tokens": 513182619.0, "reward": 0.643136203289032, "reward_std": 0.11885228008031845, "rewards/simpleverify_reward/mean": 0.6431361436843872, "rewards/simpleverify_reward/std": 0.4791409969329834, "step": 885 }, { "clip_ratio/high_max": 0.0014540614211000502, "clip_ratio/high_mean": 0.00044244544551474974, "clip_ratio/low_mean": 0.0003385451173016918, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007809905600879574, "epoch": 8.503790087463557, "grad_norm": 0.19003519415855408, "learning_rate": 1e-06, "loss": -0.0165, "step": 886 }, { "clip_ratio/high_max": 0.0022952827021072153, "clip_ratio/high_mean": 0.000690235816364293, "clip_ratio/low_mean": 0.0005100466555632011, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012002824623777997, "epoch": 8.513119533527696, "grad_norm": 0.18900182843208313, "learning_rate": 1e-06, "loss": -0.0264, "step": 887 }, { "clip_ratio/high_max": 0.0013909939698351081, "clip_ratio/high_mean": 0.0005425814642876503, "clip_ratio/low_mean": 0.0004578366251735133, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001000418082185206, "epoch": 8.522448979591836, "grad_norm": 0.26237261295318604, "learning_rate": 1e-06, "loss": -0.0312, "step": 888 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0786830357142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3509.0, "completions/mean_length": 838.0792846679688, "completions/mean_terminated_length": 559.8436889648438, "completions/min_length": 129.0, "completions/min_terminated_length": 129.0, "epoch": 8.531778425655977, "grad_norm": 0.19223347306251526, "learning_rate": 1e-06, "loss": -0.042, "num_tokens": 515398367.0, "reward": 0.6640625, "reward_std": 0.1287159025669098, "rewards/simpleverify_reward/mean": 0.6640625, "rewards/simpleverify_reward/std": 0.472383052110672, "step": 889 }, { "clip_ratio/high_max": 0.0019934093070332892, "clip_ratio/high_mean": 0.0007008639095147373, "clip_ratio/low_mean": 0.0003778454902203521, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010787093851831742, "epoch": 8.541107871720117, "grad_norm": 0.1899620145559311, "learning_rate": 1e-06, "loss": -0.0512, "step": 890 }, { "clip_ratio/high_max": 0.0018079662950185593, "clip_ratio/high_mean": 0.0006150932467789971, "clip_ratio/low_mean": 0.0004010315965388145, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.00101612485559599, "epoch": 8.550437317784256, "grad_norm": 0.20196662843227386, "learning_rate": 1e-06, "loss": -0.0424, "step": 891 }, { "clip_ratio/high_max": 0.0016551250373595394, "clip_ratio/high_mean": 0.0006170452033984475, "clip_ratio/low_mean": 0.00039987060972634936, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010169158122153021, "epoch": 8.559766763848396, "grad_norm": 0.29652684926986694, "learning_rate": 1e-06, "loss": -0.0501, "step": 892 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0814732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3953.0, "completions/mean_length": 843.136474609375, "completions/mean_terminated_length": 554.6078491210938, "completions/min_length": 141.0, "completions/min_terminated_length": 141.0, "epoch": 8.569096209912537, "grad_norm": 0.2309209555387497, "learning_rate": 1e-06, "loss": -0.056, "num_tokens": 517584768.0, "reward": 0.6593192219734192, "reward_std": 0.14020207524299622, "rewards/simpleverify_reward/mean": 0.6593192219734192, "rewards/simpleverify_reward/std": 0.4740042984485626, "step": 893 }, { "clip_ratio/high_max": 0.0017110658709498239, "clip_ratio/high_mean": 0.0006515546533591987, "clip_ratio/low_mean": 0.00033966197315749014, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009912165933201322, "epoch": 8.578425655976677, "grad_norm": 0.19787409901618958, "learning_rate": 1e-06, "loss": -0.0429, "step": 894 }, { "clip_ratio/high_max": 0.00191219736734638, "clip_ratio/high_mean": 0.0006868910986668197, "clip_ratio/low_mean": 0.0005384320215853222, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012253231179784052, "epoch": 8.587755102040816, "grad_norm": 0.21518060564994812, "learning_rate": 1e-06, "loss": -0.0108, "step": 895 }, { "clip_ratio/high_max": 0.0017851503725978546, "clip_ratio/high_mean": 0.0006872485446365317, "clip_ratio/low_mean": 0.0004837185801989108, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001170967112557264, "epoch": 8.597084548104956, "grad_norm": 0.1901208907365799, "learning_rate": 1e-06, "loss": -0.0397, "step": 896 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0904017857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3620.0, "completions/mean_length": 859.566162109375, "completions/mean_terminated_length": 537.9082641601562, "completions/min_length": 93.0, "completions/min_terminated_length": 93.0, "epoch": 8.606413994169095, "grad_norm": 0.19888818264007568, "learning_rate": 1e-06, "loss": -0.0432, "num_tokens": 519693645.0, "reward": 0.6679688096046448, "reward_std": 0.1378258913755417, "rewards/simpleverify_reward/mean": 0.66796875, "rewards/simpleverify_reward/std": 0.4710078537464142, "step": 897 }, { "clip_ratio/high_max": 0.001610101982805645, "clip_ratio/high_mean": 0.0006402952176358667, "clip_ratio/low_mean": 0.0003704963573909481, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001010791553198942, "epoch": 8.615743440233237, "grad_norm": 0.2331157624721527, "learning_rate": 1e-06, "loss": -0.0675, "step": 898 }, { "clip_ratio/high_max": 0.0018110648306901567, "clip_ratio/high_mean": 0.0006485739013442071, "clip_ratio/low_mean": 0.0004158576020927285, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010644314752425998, "epoch": 8.625072886297376, "grad_norm": 0.2172631323337555, "learning_rate": 1e-06, "loss": -0.0229, "step": 899 }, { "clip_ratio/high_max": 0.0018914249740191735, "clip_ratio/high_mean": 0.0006804288295825245, "clip_ratio/low_mean": 0.0005056909467384685, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011861197563121095, "epoch": 8.634402332361516, "grad_norm": 0.24722664058208466, "learning_rate": 1e-06, "loss": -0.0441, "step": 900 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1007254464285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3560.0, "completions/mean_length": 904.2232666015625, "completions/mean_terminated_length": 546.720458984375, "completions/min_length": 113.0, "completions/min_terminated_length": 113.0, "epoch": 8.643731778425655, "grad_norm": 0.22908325493335724, "learning_rate": 1e-06, "loss": -0.0236, "num_tokens": 521815901.0, "reward": 0.6456473469734192, "reward_std": 0.1361803114414215, "rewards/simpleverify_reward/mean": 0.6456473469734192, "rewards/simpleverify_reward/std": 0.4783834218978882, "step": 901 }, { "clip_ratio/high_max": 0.001612004343769513, "clip_ratio/high_mean": 0.0005772955737484153, "clip_ratio/low_mean": 0.0005040764972363831, "clip_ratio/low_min": 1.803751729312353e-05, "clip_ratio/region_mean": 0.0010813720728037879, "epoch": 8.653061224489797, "grad_norm": 0.19320201873779297, "learning_rate": 1e-06, "loss": -0.0337, "step": 902 }, { "clip_ratio/high_max": 0.0022080804665165488, "clip_ratio/high_mean": 0.0006840811129222857, "clip_ratio/low_mean": 0.0005215443966335442, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012056254854542203, "epoch": 8.662390670553936, "grad_norm": 0.25473472476005554, "learning_rate": 1e-06, "loss": -0.0408, "step": 903 }, { "clip_ratio/high_max": 0.0018996338876604568, "clip_ratio/high_mean": 0.0006457903709815582, "clip_ratio/low_mean": 0.0005678676752722822, "clip_ratio/low_min": 1.1745912161131855e-05, "clip_ratio/region_mean": 0.0012136580880905967, "epoch": 8.671720116618076, "grad_norm": 0.1797763556241989, "learning_rate": 1e-06, "loss": -0.0685, "step": 904 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0979352678571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3948.0, "completions/mean_length": 918.757568359375, "completions/mean_terminated_length": 573.81103515625, "completions/min_length": 138.0, "completions/min_terminated_length": 138.0, "epoch": 8.681049562682215, "grad_norm": 0.1975695639848709, "learning_rate": 1e-06, "loss": -0.0538, "num_tokens": 524033088.0, "reward": 0.6517857313156128, "reward_std": 0.1338600218296051, "rewards/simpleverify_reward/mean": 0.6517857313156128, "rewards/simpleverify_reward/std": 0.47647082805633545, "step": 905 }, { "clip_ratio/high_max": 0.0017009891198540572, "clip_ratio/high_mean": 0.0005709198258045944, "clip_ratio/low_mean": 0.0004454967865967774, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010164166014874354, "epoch": 8.690379008746355, "grad_norm": 0.1849793940782547, "learning_rate": 1e-06, "loss": -0.0414, "step": 906 }, { "clip_ratio/high_max": 0.0018794510178850032, "clip_ratio/high_mean": 0.00066065495411749, "clip_ratio/low_mean": 0.0003743076040336746, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010349625499657122, "epoch": 8.699708454810496, "grad_norm": 0.18855977058410645, "learning_rate": 1e-06, "loss": -0.0596, "step": 907 }, { "clip_ratio/high_max": 0.0018072333514282946, "clip_ratio/high_mean": 0.0006520048755191965, "clip_ratio/low_mean": 0.0004902902392132091, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011422951429267414, "epoch": 8.709037900874636, "grad_norm": 0.5042897462844849, "learning_rate": 1e-06, "loss": 0.0087, "step": 908 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.103515625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3891.0, "completions/mean_length": 931.61279296875, "completions/mean_terminated_length": 566.2259521484375, "completions/min_length": 90.0, "completions/min_terminated_length": 90.0, "epoch": 8.718367346938775, "grad_norm": 0.2388840913772583, "learning_rate": 1e-06, "loss": -0.0655, "num_tokens": 526194804.0, "reward": 0.6540178656578064, "reward_std": 0.12421358376741409, "rewards/simpleverify_reward/mean": 0.6540178656578064, "rewards/simpleverify_reward/std": 0.4757537841796875, "step": 909 }, { "clip_ratio/high_max": 0.0016156836718437262, "clip_ratio/high_mean": 0.0005781957879662514, "clip_ratio/low_mean": 0.00041708146727614803, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009952772252290742, "epoch": 8.727696793002915, "grad_norm": 0.18638484179973602, "learning_rate": 1e-06, "loss": -0.0327, "step": 910 }, { "clip_ratio/high_max": 0.0018071288141072728, "clip_ratio/high_mean": 0.0006696414857287891, "clip_ratio/low_mean": 0.000423831934313057, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010934734345937613, "epoch": 8.737026239067056, "grad_norm": 0.19814637303352356, "learning_rate": 1e-06, "loss": -0.0365, "step": 911 }, { "clip_ratio/high_max": 0.0014550860068993643, "clip_ratio/high_mean": 0.0004926396741211647, "clip_ratio/low_mean": 0.0004994988175894832, "clip_ratio/low_min": 1.0368281436967663e-05, "clip_ratio/region_mean": 0.0009921384771587327, "epoch": 8.746355685131196, "grad_norm": 0.19484324753284454, "learning_rate": 1e-06, "loss": 0.0114, "step": 912 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0934709821428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 4069.0, "completions/mean_length": 887.5301513671875, "completions/mean_terminated_length": 556.7091064453125, "completions/min_length": 127.0, "completions/min_terminated_length": 127.0, "epoch": 8.755685131195335, "grad_norm": 0.2104308307170868, "learning_rate": 1e-06, "loss": -0.0158, "num_tokens": 528361096.0, "reward": 0.6838728189468384, "reward_std": 0.13593780994415283, "rewards/simpleverify_reward/mean": 0.6838727593421936, "rewards/simpleverify_reward/std": 0.4650281071662903, "step": 913 }, { "clip_ratio/high_max": 0.001971578378288541, "clip_ratio/high_mean": 0.0007190934065874899, "clip_ratio/low_mean": 0.0004007334159723541, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011198268621228635, "epoch": 8.765014577259475, "grad_norm": 0.18971562385559082, "learning_rate": 1e-06, "loss": -0.0483, "step": 914 }, { "clip_ratio/high_max": 0.001914344604301732, "clip_ratio/high_mean": 0.0007972441653691931, "clip_ratio/low_mean": 0.00042155920982622774, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012188033797428943, "epoch": 8.774344023323614, "grad_norm": 0.2506207823753357, "learning_rate": 1e-06, "loss": -0.0853, "step": 915 }, { "clip_ratio/high_max": 0.001900760249554878, "clip_ratio/high_mean": 0.0006886893097544089, "clip_ratio/low_mean": 0.00047243086373782717, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011611201698542573, "epoch": 8.783673469387756, "grad_norm": 0.2095375657081604, "learning_rate": 1e-06, "loss": -0.0141, "step": 916 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0758928571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3567.0, "completions/mean_length": 826.2994384765625, "completions/mean_terminated_length": 557.7732543945312, "completions/min_length": 125.0, "completions/min_terminated_length": 125.0, "epoch": 8.793002915451895, "grad_norm": 0.19115404784679413, "learning_rate": 1e-06, "loss": -0.0173, "num_tokens": 530548329.0, "reward": 0.6908482313156128, "reward_std": 0.1253427118062973, "rewards/simpleverify_reward/mean": 0.6908482313156128, "rewards/simpleverify_reward/std": 0.4622083604335785, "step": 917 }, { "clip_ratio/high_max": 0.001460242234315956, "clip_ratio/high_mean": 0.0005861380450369325, "clip_ratio/low_mean": 0.00030831781327833596, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008944558794610202, "epoch": 8.802332361516035, "grad_norm": 0.16928203403949738, "learning_rate": 1e-06, "loss": -0.0457, "step": 918 }, { "clip_ratio/high_max": 0.0015419522896991111, "clip_ratio/high_mean": 0.0005793428044853499, "clip_ratio/low_mean": 0.0003970258503613877, "clip_ratio/low_min": 1.7334627045784146e-05, "clip_ratio/region_mean": 0.0009763686466612853, "epoch": 8.811661807580174, "grad_norm": 0.19915778934955597, "learning_rate": 1e-06, "loss": -0.0314, "step": 919 }, { "clip_ratio/high_max": 0.001817022177419858, "clip_ratio/high_mean": 0.0007430787900375435, "clip_ratio/low_mean": 0.0004013681609649211, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001144446898251772, "epoch": 8.820991253644316, "grad_norm": 0.6057918071746826, "learning_rate": 1e-06, "loss": -0.0628, "step": 920 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0890066964285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3804.0, "completions/mean_length": 874.5326538085938, "completions/mean_terminated_length": 559.785888671875, "completions/min_length": 130.0, "completions/min_terminated_length": 130.0, "epoch": 8.830320699708455, "grad_norm": 0.23311445116996765, "learning_rate": 1e-06, "loss": -0.0367, "num_tokens": 532716286.0, "reward": 0.6612723469734192, "reward_std": 0.13962063193321228, "rewards/simpleverify_reward/mean": 0.6612723469734192, "rewards/simpleverify_reward/std": 0.4733431935310364, "step": 921 }, { "clip_ratio/high_max": 0.0014807758961978834, "clip_ratio/high_mean": 0.0006396386970664025, "clip_ratio/low_mean": 0.00045767264691676246, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010973113567160908, "epoch": 8.839650145772595, "grad_norm": 0.3852095901966095, "learning_rate": 1e-06, "loss": -0.0118, "step": 922 }, { "clip_ratio/high_max": 0.0018937466229544953, "clip_ratio/high_mean": 0.0006836644115537638, "clip_ratio/low_mean": 0.00039433195161109325, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010779963668028358, "epoch": 8.848979591836734, "grad_norm": 0.22147031128406525, "learning_rate": 1e-06, "loss": -0.0721, "step": 923 }, { "clip_ratio/high_max": 0.0017295691068284214, "clip_ratio/high_mean": 0.0006488448889285792, "clip_ratio/low_mean": 0.0004855770459926134, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011344219237798825, "epoch": 8.858309037900874, "grad_norm": 0.2310793250799179, "learning_rate": 1e-06, "loss": -0.0257, "step": 924 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1007254464285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3961.0, "completions/mean_length": 917.6641235351562, "completions/mean_terminated_length": 561.6668090820312, "completions/min_length": 103.0, "completions/min_terminated_length": 103.0, "epoch": 8.867638483965015, "grad_norm": 0.19594378769397736, "learning_rate": 1e-06, "loss": -0.029, "num_tokens": 534887450.0, "reward": 0.6512277126312256, "reward_std": 0.1271919310092926, "rewards/simpleverify_reward/mean": 0.6512276530265808, "rewards/simpleverify_reward/std": 0.4766482710838318, "step": 925 }, { "clip_ratio/high_max": 0.0016192635339393746, "clip_ratio/high_mean": 0.000589761096307484, "clip_ratio/low_mean": 0.0003003119545610389, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008900730481400387, "epoch": 8.876967930029155, "grad_norm": 0.14752085506916046, "learning_rate": 1e-06, "loss": -0.0564, "step": 926 }, { "clip_ratio/high_max": 0.0014255113383114804, "clip_ratio/high_mean": 0.0005828959856444271, "clip_ratio/low_mean": 0.0004833077518924256, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010662037420843262, "epoch": 8.886297376093294, "grad_norm": 0.20855985581874847, "learning_rate": 1e-06, "loss": -0.0449, "step": 927 }, { "clip_ratio/high_max": 0.0021094378716952633, "clip_ratio/high_mean": 0.0006853170661997865, "clip_ratio/low_mean": 0.00048066431509141694, "clip_ratio/low_min": 1.5056612937769387e-05, "clip_ratio/region_mean": 0.0011659813681035303, "epoch": 8.895626822157434, "grad_norm": 0.17203618586063385, "learning_rate": 1e-06, "loss": -0.0354, "step": 928 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0926339285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 4042.0, "completions/mean_length": 878.84326171875, "completions/mean_terminated_length": 550.4003295898438, "completions/min_length": 120.0, "completions/min_terminated_length": 120.0, "epoch": 8.904956268221575, "grad_norm": 0.28298455476760864, "learning_rate": 1e-06, "loss": -0.0498, "num_tokens": 537033712.0, "reward": 0.6791294813156128, "reward_std": 0.12845245003700256, "rewards/simpleverify_reward/mean": 0.6791294813156128, "rewards/simpleverify_reward/std": 0.4668762683868408, "step": 929 }, { "clip_ratio/high_max": 0.0015493040591536555, "clip_ratio/high_mean": 0.0005798207748739514, "clip_ratio/low_mean": 0.00033244758901673777, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009122683786699781, "epoch": 8.914285714285715, "grad_norm": 0.21884870529174805, "learning_rate": 1e-06, "loss": -0.0338, "step": 930 }, { "clip_ratio/high_max": 0.0017000906300381757, "clip_ratio/high_mean": 0.0006038021110725822, "clip_ratio/low_mean": 0.0003833929526990687, "clip_ratio/low_min": 1.5458817870239727e-05, "clip_ratio/region_mean": 0.000987195098787197, "epoch": 8.923615160349854, "grad_norm": 0.21116183698177338, "learning_rate": 1e-06, "loss": -0.0393, "step": 931 }, { "clip_ratio/high_max": 0.001813647471863078, "clip_ratio/high_mean": 0.0006087573838158278, "clip_ratio/low_mean": 0.0004114027524337871, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001020160132611636, "epoch": 8.932944606413994, "grad_norm": 0.16516272723674774, "learning_rate": 1e-06, "loss": -0.0436, "step": 932 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1188616071428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3925.0, "completions/mean_length": 996.0449829101562, "completions/mean_terminated_length": 577.8749389648438, "completions/min_length": 118.0, "completions/min_terminated_length": 118.0, "epoch": 8.942274052478133, "grad_norm": 0.20127227902412415, "learning_rate": 1e-06, "loss": -0.0419, "num_tokens": 539233497.0, "reward": 0.6395089626312256, "reward_std": 0.13693320751190186, "rewards/simpleverify_reward/mean": 0.6395089030265808, "rewards/simpleverify_reward/std": 0.48020994663238525, "step": 933 }, { "clip_ratio/high_max": 0.0018692240919335745, "clip_ratio/high_mean": 0.0006784572524338728, "clip_ratio/low_mean": 0.00042697354865595116, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011054308015445713, "epoch": 8.951603498542275, "grad_norm": 0.1814921349287033, "learning_rate": 1e-06, "loss": -0.0189, "step": 934 }, { "clip_ratio/high_max": 0.0017994212503253948, "clip_ratio/high_mean": 0.0005892555172977154, "clip_ratio/low_mean": 0.0004341687067608291, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010234242254227865, "epoch": 8.960932944606414, "grad_norm": 0.2371896654367447, "learning_rate": 1e-06, "loss": -0.0491, "step": 935 }, { "clip_ratio/high_max": 0.002165219462767709, "clip_ratio/high_mean": 0.0007692927447351394, "clip_ratio/low_mean": 0.0004176553015895479, "clip_ratio/low_min": 1.544735459901858e-05, "clip_ratio/region_mean": 0.001186948054964887, "epoch": 8.970262390670554, "grad_norm": 0.19762632250785828, "learning_rate": 1e-06, "loss": -0.0675, "step": 936 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0943080357142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3960.0, "completions/mean_length": 894.388427734375, "completions/mean_terminated_length": 561.0104370117188, "completions/min_length": 133.0, "completions/min_terminated_length": 133.0, "epoch": 9.00932944606414, "grad_norm": 0.21884441375732422, "learning_rate": 1e-06, "loss": -0.0873, "num_tokens": 541419801.0, "reward": 0.6690848469734192, "reward_std": 0.14366835355758667, "rewards/simpleverify_reward/mean": 0.6690848469734192, "rewards/simpleverify_reward/std": 0.4706082344055176, "step": 937 }, { "clip_ratio/high_max": 0.0016857492337294389, "clip_ratio/high_mean": 0.0005917834914725972, "clip_ratio/low_mean": 0.00046555788549085264, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010573413928796072, "epoch": 9.018658892128279, "grad_norm": 0.25064513087272644, "learning_rate": 1e-06, "loss": 0.0112, "step": 938 }, { "clip_ratio/high_max": 0.0016941579251579242, "clip_ratio/high_mean": 0.0006363156267070735, "clip_ratio/low_mean": 0.0005123020291648572, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011486176372272894, "epoch": 9.02798833819242, "grad_norm": 0.2315865308046341, "learning_rate": 1e-06, "loss": -0.0205, "step": 939 }, { "clip_ratio/high_max": 0.0017965437582461163, "clip_ratio/high_mean": 0.00074233306440874, "clip_ratio/low_mean": 0.0004159017744314042, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011582348452066071, "epoch": 9.03731778425656, "grad_norm": 0.24782533943653107, "learning_rate": 1e-06, "loss": -0.0526, "step": 940 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0926339285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3946.0, "completions/mean_length": 888.9883422851562, "completions/mean_terminated_length": 561.5811767578125, "completions/min_length": 127.0, "completions/min_terminated_length": 127.0, "epoch": 9.0466472303207, "grad_norm": 0.19276177883148193, "learning_rate": 1e-06, "loss": -0.0467, "num_tokens": 543613487.0, "reward": 0.6682478189468384, "reward_std": 0.1308584064245224, "rewards/simpleverify_reward/mean": 0.6682477593421936, "rewards/simpleverify_reward/std": 0.4709082245826721, "step": 941 }, { "clip_ratio/high_max": 0.00253118514228845, "clip_ratio/high_mean": 0.0008279092162410961, "clip_ratio/low_mean": 0.000364655115390633, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001192564370285254, "epoch": 9.055976676384839, "grad_norm": 0.18487882614135742, "learning_rate": 1e-06, "loss": -0.0153, "step": 942 }, { "clip_ratio/high_max": 0.0019025085603061598, "clip_ratio/high_mean": 0.0007506451420340454, "clip_ratio/low_mean": 0.00045272684633346216, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012033719958708389, "epoch": 9.06530612244898, "grad_norm": 0.21149462461471558, "learning_rate": 1e-06, "loss": -0.0267, "step": 943 }, { "clip_ratio/high_max": 0.0017848788993433118, "clip_ratio/high_mean": 0.0006586623803741531, "clip_ratio/low_mean": 0.0003880248809764453, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010466872481629252, "epoch": 9.07463556851312, "grad_norm": 0.19536608457565308, "learning_rate": 1e-06, "loss": -0.0471, "step": 944 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0915178571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 4035.0, "completions/mean_length": 891.8245239257812, "completions/mean_terminated_length": 569.045166015625, "completions/min_length": 111.0, "completions/min_terminated_length": 111.0, "epoch": 9.08396501457726, "grad_norm": 0.23016257584095, "learning_rate": 1e-06, "loss": -0.0096, "num_tokens": 545822866.0, "reward": 0.656808078289032, "reward_std": 0.14722926914691925, "rewards/simpleverify_reward/mean": 0.6568080186843872, "rewards/simpleverify_reward/std": 0.4748412072658539, "step": 945 }, { "clip_ratio/high_max": 0.0020944806510669878, "clip_ratio/high_mean": 0.000835233070574759, "clip_ratio/low_mean": 0.0004925223790905875, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013277554462547414, "epoch": 9.093294460641399, "grad_norm": 0.19775530695915222, "learning_rate": 1e-06, "loss": -0.0781, "step": 946 }, { "clip_ratio/high_max": 0.0020655479092965834, "clip_ratio/high_mean": 0.0007818891826900654, "clip_ratio/low_mean": 0.00043350953092158306, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012153986972407438, "epoch": 9.102623906705539, "grad_norm": 0.23533985018730164, "learning_rate": 1e-06, "loss": -0.0427, "step": 947 }, { "clip_ratio/high_max": 0.0018016465692198835, "clip_ratio/high_mean": 0.0007574404589831829, "clip_ratio/low_mean": 0.0005633579621644458, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013207983756728936, "epoch": 9.11195335276968, "grad_norm": 0.2067180871963501, "learning_rate": 1e-06, "loss": -0.0284, "step": 948 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1060267857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3877.0, "completions/mean_length": 932.954833984375, "completions/mean_terminated_length": 557.8120727539062, "completions/min_length": 124.0, "completions/min_terminated_length": 124.0, "epoch": 9.12128279883382, "grad_norm": 0.2198241800069809, "learning_rate": 1e-06, "loss": -0.0371, "num_tokens": 547978784.0, "reward": 0.646484375, "reward_std": 0.1458766907453537, "rewards/simpleverify_reward/mean": 0.646484375, "rewards/simpleverify_reward/std": 0.47812771797180176, "step": 949 }, { "clip_ratio/high_max": 0.0021449292653414886, "clip_ratio/high_mean": 0.0008235534605773864, "clip_ratio/low_mean": 0.000371657890809729, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011952113563893363, "epoch": 9.130612244897959, "grad_norm": 0.40236303210258484, "learning_rate": 1e-06, "loss": -0.0344, "step": 950 }, { "clip_ratio/high_max": 0.002021735064772656, "clip_ratio/high_mean": 0.0007746630590190762, "clip_ratio/low_mean": 0.00038743004188290797, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011620930899880477, "epoch": 9.139941690962099, "grad_norm": 0.22769886255264282, "learning_rate": 1e-06, "loss": -0.0541, "step": 951 }, { "clip_ratio/high_max": 0.0018622556854097638, "clip_ratio/high_mean": 0.0007072714697642368, "clip_ratio/low_mean": 0.0002957687536309095, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010030402318079723, "epoch": 9.14927113702624, "grad_norm": 0.2218213975429535, "learning_rate": 1e-06, "loss": -0.053, "step": 952 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0823102678571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3871.0, "completions/mean_length": 837.019287109375, "completions/mean_terminated_length": 544.7117919921875, "completions/min_length": 101.0, "completions/min_terminated_length": 101.0, "epoch": 9.15860058309038, "grad_norm": 0.2679688334465027, "learning_rate": 1e-06, "loss": -0.0267, "num_tokens": 550123773.0, "reward": 0.6863839626312256, "reward_std": 0.14367155730724335, "rewards/simpleverify_reward/mean": 0.6863839030265808, "rewards/simpleverify_reward/std": 0.4640270471572876, "step": 953 }, { "clip_ratio/high_max": 0.0016905256343306974, "clip_ratio/high_mean": 0.0006417210897780024, "clip_ratio/low_mean": 0.0004172611470494303, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001058982254107832, "epoch": 9.167930029154519, "grad_norm": 0.27796509861946106, "learning_rate": 1e-06, "loss": -0.0286, "step": 954 }, { "clip_ratio/high_max": 0.0019863716152030975, "clip_ratio/high_mean": 0.0007600550743518397, "clip_ratio/low_mean": 0.0002610403560083796, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010210954405920347, "epoch": 9.177259475218658, "grad_norm": 4.136716365814209, "learning_rate": 1e-06, "loss": -0.083, "step": 955 }, { "clip_ratio/high_max": 0.002002372231800109, "clip_ratio/high_mean": 0.0008015406747290399, "clip_ratio/low_mean": 0.0003776885438355748, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011792292152676964, "epoch": 9.186588921282798, "grad_norm": 0.2038593739271164, "learning_rate": 1e-06, "loss": -0.0754, "step": 956 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0915178571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3586.0, "completions/mean_length": 879.6621704101562, "completions/mean_terminated_length": 555.6575317382812, "completions/min_length": 102.0, "completions/min_terminated_length": 102.0, "epoch": 9.19591836734694, "grad_norm": 0.2013709992170334, "learning_rate": 1e-06, "loss": -0.0548, "num_tokens": 552279458.0, "reward": 0.6833147406578064, "reward_std": 0.13015000522136688, "rewards/simpleverify_reward/mean": 0.6833147406578064, "rewards/simpleverify_reward/std": 0.4652484357357025, "step": 957 }, { "clip_ratio/high_max": 0.00181918301677797, "clip_ratio/high_mean": 0.0006394721367541933, "clip_ratio/low_mean": 0.0003771602873712254, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010166323954763357, "epoch": 9.205247813411079, "grad_norm": 0.25102630257606506, "learning_rate": 1e-06, "loss": -0.0132, "step": 958 }, { "clip_ratio/high_max": 0.0015308111796912272, "clip_ratio/high_mean": 0.0005999678833177313, "clip_ratio/low_mean": 0.0003734915246695891, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009734594204928726, "epoch": 9.214577259475218, "grad_norm": 0.17094235122203827, "learning_rate": 1e-06, "loss": -0.0549, "step": 959 }, { "clip_ratio/high_max": 0.001590189845956047, "clip_ratio/high_mean": 0.0005918488323004567, "clip_ratio/low_mean": 0.00038744301309634466, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.000979291837211349, "epoch": 9.223906705539358, "grad_norm": 0.17307482659816742, "learning_rate": 1e-06, "loss": -0.0414, "step": 960 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0867745535714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 4043.0, "completions/mean_length": 859.9336547851562, "completions/mean_terminated_length": 552.4429931640625, "completions/min_length": 122.0, "completions/min_terminated_length": 122.0, "epoch": 9.2332361516035, "grad_norm": 0.21398615837097168, "learning_rate": 1e-06, "loss": -0.0308, "num_tokens": 554470628.0, "reward": 0.6640625, "reward_std": 0.1381167769432068, "rewards/simpleverify_reward/mean": 0.6640625, "rewards/simpleverify_reward/std": 0.472383052110672, "step": 961 }, { "clip_ratio/high_max": 0.001970589812117396, "clip_ratio/high_mean": 0.0007021093088042107, "clip_ratio/low_mean": 0.000341609969837009, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010437193013785873, "epoch": 9.242565597667639, "grad_norm": 0.25221937894821167, "learning_rate": 1e-06, "loss": -0.0419, "step": 962 }, { "clip_ratio/high_max": 0.0020114830549573526, "clip_ratio/high_mean": 0.0007795877772878157, "clip_ratio/low_mean": 0.0005064794786449056, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012860672759416047, "epoch": 9.251895043731778, "grad_norm": 0.27175372838974, "learning_rate": 1e-06, "loss": -0.0284, "step": 963 }, { "clip_ratio/high_max": 0.001862900495325448, "clip_ratio/high_mean": 0.0007561722704849672, "clip_ratio/low_mean": 0.0004939209957228741, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001250093260750873, "epoch": 9.261224489795918, "grad_norm": 0.202055886387825, "learning_rate": 1e-06, "loss": -0.0265, "step": 964 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0825892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3933.0, "completions/mean_length": 860.0488891601562, "completions/mean_terminated_length": 568.7344970703125, "completions/min_length": 140.0, "completions/min_terminated_length": 140.0, "epoch": 9.270553935860057, "grad_norm": 0.23672665655612946, "learning_rate": 1e-06, "loss": -0.027, "num_tokens": 556701891.0, "reward": 0.6635044813156128, "reward_std": 0.14525863528251648, "rewards/simpleverify_reward/mean": 0.6635044813156128, "rewards/simpleverify_reward/std": 0.4725765585899353, "step": 965 }, { "clip_ratio/high_max": 0.001895419103675522, "clip_ratio/high_mean": 0.00066228686409886, "clip_ratio/low_mean": 0.00038792464692960493, "clip_ratio/low_min": 2.6299179808120243e-05, "clip_ratio/region_mean": 0.0010502115219424013, "epoch": 9.279883381924199, "grad_norm": 0.23101414740085602, "learning_rate": 1e-06, "loss": -0.0494, "step": 966 }, { "clip_ratio/high_max": 0.0016612650215392932, "clip_ratio/high_mean": 0.0006814213857069262, "clip_ratio/low_mean": 0.0005062744703536737, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011876958542416105, "epoch": 9.289212827988338, "grad_norm": 0.18930062651634216, "learning_rate": 1e-06, "loss": -0.0221, "step": 967 }, { "clip_ratio/high_max": 0.00200358984147897, "clip_ratio/high_mean": 0.0007551892613264499, "clip_ratio/low_mean": 0.00041611441565692076, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011713036874425597, "epoch": 9.298542274052478, "grad_norm": 0.3470248579978943, "learning_rate": 1e-06, "loss": -0.0736, "step": 968 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.068359375, "completions/max_length": 4096.0, "completions/max_terminated_length": 4077.0, "completions/mean_length": 771.851318359375, "completions/mean_terminated_length": 527.9410400390625, "completions/min_length": 93.0, "completions/min_terminated_length": 93.0, "epoch": 9.307871720116617, "grad_norm": 0.23096701502799988, "learning_rate": 1e-06, "loss": -0.0404, "num_tokens": 558813542.0, "reward": 0.697823703289032, "reward_std": 0.13914257287979126, "rewards/simpleverify_reward/mean": 0.6978236436843872, "rewards/simpleverify_reward/std": 0.4592653512954712, "step": 969 }, { "clip_ratio/high_max": 0.00200111115918844, "clip_ratio/high_mean": 0.0006639491339228698, "clip_ratio/low_mean": 0.0004349433993411367, "clip_ratio/low_min": 1.2852148756792303e-05, "clip_ratio/region_mean": 0.0010988925369019853, "epoch": 9.317201166180759, "grad_norm": 0.3378995656967163, "learning_rate": 1e-06, "loss": -0.0147, "step": 970 }, { "clip_ratio/high_max": 0.001881080028397264, "clip_ratio/high_mean": 0.0007100116872607032, "clip_ratio/low_mean": 0.00043074902714579366, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011407607271394227, "epoch": 9.326530612244898, "grad_norm": 0.24149802327156067, "learning_rate": 1e-06, "loss": -0.0172, "step": 971 }, { "clip_ratio/high_max": 0.0021498598798643798, "clip_ratio/high_mean": 0.0007118443754734471, "clip_ratio/low_mean": 0.00039386710386679624, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011057114897994325, "epoch": 9.335860058309038, "grad_norm": 0.24914467334747314, "learning_rate": 1e-06, "loss": -0.012, "step": 972 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0973772321428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 2829.0, "completions/mean_length": 873.7559204101562, "completions/mean_terminated_length": 526.1319580078125, "completions/min_length": 118.0, "completions/min_terminated_length": 118.0, "epoch": 9.345189504373177, "grad_norm": 0.22965632379055023, "learning_rate": 1e-06, "loss": -0.0505, "num_tokens": 560874539.0, "reward": 0.6732701063156128, "reward_std": 0.13126018643379211, "rewards/simpleverify_reward/mean": 0.6732701063156128, "rewards/simpleverify_reward/std": 0.4690830111503601, "step": 973 }, { "clip_ratio/high_max": 0.0018267904633830767, "clip_ratio/high_mean": 0.0007022654244792648, "clip_ratio/low_mean": 0.00038869471336511197, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010909601405728608, "epoch": 9.354518950437317, "grad_norm": 0.2182028740644455, "learning_rate": 1e-06, "loss": -0.0312, "step": 974 }, { "clip_ratio/high_max": 0.001855018535934505, "clip_ratio/high_mean": 0.0006637201804551296, "clip_ratio/low_mean": 0.00039076264192772214, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010544828164711362, "epoch": 9.363848396501458, "grad_norm": 0.21642588078975677, "learning_rate": 1e-06, "loss": -0.0371, "step": 975 }, { "clip_ratio/high_max": 0.0021966430722386576, "clip_ratio/high_mean": 0.0008434743431280367, "clip_ratio/low_mean": 0.00048517504046685644, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013286493849591352, "epoch": 9.373177842565598, "grad_norm": 0.2734016180038452, "learning_rate": 1e-06, "loss": -0.0641, "step": 976 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1046316964285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3716.0, "completions/mean_length": 919.5508422851562, "completions/mean_terminated_length": 548.3546142578125, "completions/min_length": 156.0, "completions/min_terminated_length": 156.0, "epoch": 9.382507288629737, "grad_norm": 0.24064119160175323, "learning_rate": 1e-06, "loss": -0.077, "num_tokens": 562987753.0, "reward": 0.66015625, "reward_std": 0.13917851448059082, "rewards/simpleverify_reward/mean": 0.66015625, "rewards/simpleverify_reward/std": 0.47372207045555115, "step": 977 }, { "clip_ratio/high_max": 0.001972307247342542, "clip_ratio/high_mean": 0.0006897103176015662, "clip_ratio/low_mean": 0.0005320585005392786, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012217688199598342, "epoch": 9.391836734693877, "grad_norm": 0.24734772741794586, "learning_rate": 1e-06, "loss": -0.0122, "step": 978 }, { "clip_ratio/high_max": 0.0017007102469506208, "clip_ratio/high_mean": 0.0005556209553105873, "clip_ratio/low_mean": 0.0004300247833270987, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009856457436399069, "epoch": 9.401166180758018, "grad_norm": 0.2656126022338867, "learning_rate": 1e-06, "loss": -0.0161, "step": 979 }, { "clip_ratio/high_max": 0.002048137001111172, "clip_ratio/high_mean": 0.0007699103789491346, "clip_ratio/low_mean": 0.00032082698362501105, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001090737346203241, "epoch": 9.410495626822158, "grad_norm": 8.72842025756836, "learning_rate": 1e-06, "loss": -0.064, "step": 980 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0984933035714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3496.0, "completions/mean_length": 892.2637329101562, "completions/mean_terminated_length": 542.2423706054688, "completions/min_length": 135.0, "completions/min_terminated_length": 135.0, "epoch": 9.419825072886297, "grad_norm": 0.21030248701572418, "learning_rate": 1e-06, "loss": -0.0519, "num_tokens": 565103922.0, "reward": 0.6702009439468384, "reward_std": 0.13675862550735474, "rewards/simpleverify_reward/mean": 0.6702008843421936, "rewards/simpleverify_reward/std": 0.4702056348323822, "step": 981 }, { "clip_ratio/high_max": 0.002039113598584663, "clip_ratio/high_mean": 0.0006513078415082418, "clip_ratio/low_mean": 0.0003571703609850374, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010084781824843958, "epoch": 9.429154518950437, "grad_norm": 0.19152407348155975, "learning_rate": 1e-06, "loss": -0.0302, "step": 982 }, { "clip_ratio/high_max": 0.0021348351911001373, "clip_ratio/high_mean": 0.0007940393079479691, "clip_ratio/low_mean": 0.000351678171682579, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011457174623501487, "epoch": 9.438483965014576, "grad_norm": 0.34433427453041077, "learning_rate": 1e-06, "loss": -0.0656, "step": 983 }, { "clip_ratio/high_max": 0.001799090729036834, "clip_ratio/high_mean": 0.0006896796330693178, "clip_ratio/low_mean": 0.00041150252263832954, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011011821552529, "epoch": 9.447813411078718, "grad_norm": 0.6332583427429199, "learning_rate": 1e-06, "loss": -0.0236, "step": 984 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0998883928571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 4051.0, "completions/mean_length": 925.8362426757812, "completions/mean_terminated_length": 574.0325317382812, "completions/min_length": 123.0, "completions/min_terminated_length": 123.0, "epoch": 9.457142857142857, "grad_norm": 0.21130548417568207, "learning_rate": 1e-06, "loss": 0.0029, "num_tokens": 567325895.0, "reward": 0.6300223469734192, "reward_std": 0.12953948974609375, "rewards/simpleverify_reward/mean": 0.6300223469734192, "rewards/simpleverify_reward/std": 0.4828656315803528, "step": 985 }, { "clip_ratio/high_max": 0.001641837243369082, "clip_ratio/high_mean": 0.0005836808722960996, "clip_ratio/low_mean": 0.00043188714971620357, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010155680356547236, "epoch": 9.466472303206997, "grad_norm": 0.21517911553382874, "learning_rate": 1e-06, "loss": -0.0398, "step": 986 }, { "clip_ratio/high_max": 0.0019028041970159393, "clip_ratio/high_mean": 0.0006999206489126664, "clip_ratio/low_mean": 0.00041799659356911434, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011179172252013814, "epoch": 9.475801749271136, "grad_norm": 0.22022195160388947, "learning_rate": 1e-06, "loss": -0.026, "step": 987 }, { "clip_ratio/high_max": 0.0016515790339326486, "clip_ratio/high_mean": 0.0006188408897287445, "clip_ratio/low_mean": 0.0003565603906281467, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.000975401300820522, "epoch": 9.485131195335278, "grad_norm": 0.16527847945690155, "learning_rate": 1e-06, "loss": -0.0514, "step": 988 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0915178571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3911.0, "completions/mean_length": 862.0313110351562, "completions/mean_terminated_length": 536.2506103515625, "completions/min_length": 111.0, "completions/min_terminated_length": 111.0, "epoch": 9.494460641399417, "grad_norm": 0.19490519165992737, "learning_rate": 1e-06, "loss": -0.0624, "num_tokens": 569408903.0, "reward": 0.7084263563156128, "reward_std": 0.13885828852653503, "rewards/simpleverify_reward/mean": 0.7084263563156128, "rewards/simpleverify_reward/std": 0.45455044507980347, "step": 989 }, { "clip_ratio/high_max": 0.0022060112532926723, "clip_ratio/high_mean": 0.0007728417240286944, "clip_ratio/low_mean": 0.00030079428279350395, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010736360345617868, "epoch": 9.503790087463557, "grad_norm": 0.2040756195783615, "learning_rate": 1e-06, "loss": -0.0469, "step": 990 }, { "clip_ratio/high_max": 0.002213452800788218, "clip_ratio/high_mean": 0.0007419771595778002, "clip_ratio/low_mean": 0.00038483896969410125, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001126816150645027, "epoch": 9.513119533527696, "grad_norm": 0.19896230101585388, "learning_rate": 1e-06, "loss": -0.054, "step": 991 }, { "clip_ratio/high_max": 0.0018503311512176879, "clip_ratio/high_mean": 0.000758176649469533, "clip_ratio/low_mean": 0.00048494439261048683, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012431210361683043, "epoch": 9.522448979591836, "grad_norm": 0.20803387463092804, "learning_rate": 1e-06, "loss": -0.0319, "step": 992 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1127232142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3970.0, "completions/mean_length": 966.0103759765625, "completions/mean_terminated_length": 568.3638305664062, "completions/min_length": 119.0, "completions/min_terminated_length": 119.0, "epoch": 9.531778425655977, "grad_norm": 0.21334201097488403, "learning_rate": 1e-06, "loss": -0.0233, "num_tokens": 571578236.0, "reward": 0.6395089626312256, "reward_std": 0.14549872279167175, "rewards/simpleverify_reward/mean": 0.6395089030265808, "rewards/simpleverify_reward/std": 0.48020994663238525, "step": 993 }, { "clip_ratio/high_max": 0.0021396190277300775, "clip_ratio/high_mean": 0.0008311366345878923, "clip_ratio/low_mean": 0.0003434861705500225, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001174622815597104, "epoch": 9.541107871720117, "grad_norm": 0.2388453334569931, "learning_rate": 1e-06, "loss": -0.0801, "step": 994 }, { "clip_ratio/high_max": 0.002030127987382002, "clip_ratio/high_mean": 0.0007654643286514329, "clip_ratio/low_mean": 0.00042331733584433096, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011887816599482903, "epoch": 9.550437317784256, "grad_norm": 0.5545074343681335, "learning_rate": 1e-06, "loss": -0.058, "step": 995 }, { "clip_ratio/high_max": 0.0018403357207716908, "clip_ratio/high_mean": 0.0006726698356942507, "clip_ratio/low_mean": 0.0005866788687853841, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012593487408594228, "epoch": 9.559766763848396, "grad_norm": 0.23592303693294525, "learning_rate": 1e-06, "loss": -0.0155, "step": 996 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0895647321428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3462.0, "completions/mean_length": 852.4378051757812, "completions/mean_terminated_length": 533.3499755859375, "completions/min_length": 144.0, "completions/min_terminated_length": 144.0, "epoch": 9.569096209912537, "grad_norm": 0.21926428377628326, "learning_rate": 1e-06, "loss": -0.0323, "num_tokens": 573684293.0, "reward": 0.6944754719734192, "reward_std": 0.11640949547290802, "rewards/simpleverify_reward/mean": 0.6944754719734192, "rewards/simpleverify_reward/std": 0.46069350838661194, "step": 997 }, { "clip_ratio/high_max": 0.0020619456845452078, "clip_ratio/high_mean": 0.0007171394900069572, "clip_ratio/low_mean": 0.00025480737940597464, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009719468653202057, "epoch": 9.578425655976677, "grad_norm": 0.23006583750247955, "learning_rate": 1e-06, "loss": -0.0656, "step": 998 }, { "clip_ratio/high_max": 0.0013113612549204845, "clip_ratio/high_mean": 0.000518829510838259, "clip_ratio/low_mean": 0.000345721604389837, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008645511115901172, "epoch": 9.587755102040816, "grad_norm": 0.18908536434173584, "learning_rate": 1e-06, "loss": -0.0552, "step": 999 }, { "clip_ratio/high_max": 0.0017949045723071322, "clip_ratio/high_mean": 0.0006325813192233909, "clip_ratio/low_mean": 0.0004635433379007736, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001096124658943154, "epoch": 9.597084548104956, "grad_norm": 0.20562255382537842, "learning_rate": 1e-06, "loss": -0.0191, "step": 1000 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1007254464285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3557.0, "completions/mean_length": 898.4249877929688, "completions/mean_terminated_length": 540.2727661132812, "completions/min_length": 126.0, "completions/min_terminated_length": 126.0, "epoch": 9.606413994169095, "grad_norm": 0.23870669305324554, "learning_rate": 1e-06, "loss": -0.0572, "num_tokens": 575789808.0, "reward": 0.670479953289032, "reward_std": 0.12050455808639526, "rewards/simpleverify_reward/mean": 0.6704798936843872, "rewards/simpleverify_reward/std": 0.47010454535484314, "step": 1001 }, { "clip_ratio/high_max": 0.0017722779120958876, "clip_ratio/high_mean": 0.0006982288291510486, "clip_ratio/low_mean": 0.00028723024888677173, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009854590571194421, "epoch": 9.615743440233237, "grad_norm": 0.18896161019802094, "learning_rate": 1e-06, "loss": -0.0507, "step": 1002 }, { "clip_ratio/high_max": 0.002017849088588264, "clip_ratio/high_mean": 0.0006891981192893581, "clip_ratio/low_mean": 0.00047712675495859, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011663248587865382, "epoch": 9.625072886297376, "grad_norm": 0.2220773994922638, "learning_rate": 1e-06, "loss": -0.017, "step": 1003 }, { "clip_ratio/high_max": 0.001976355037186295, "clip_ratio/high_mean": 0.0007283751911018044, "clip_ratio/low_mean": 0.0003322144407320593, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001060589638655074, "epoch": 9.634402332361516, "grad_norm": 0.31787022948265076, "learning_rate": 1e-06, "loss": -0.0756, "step": 1004 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.10546875, "completions/max_length": 4096.0, "completions/max_terminated_length": 3640.0, "completions/mean_length": 914.384521484375, "completions/mean_terminated_length": 539.259521484375, "completions/min_length": 129.0, "completions/min_terminated_length": 129.0, "epoch": 9.643731778425655, "grad_norm": 0.20841747522354126, "learning_rate": 1e-06, "loss": -0.009, "num_tokens": 577871594.0, "reward": 0.6671317219734192, "reward_std": 0.12261678278446198, "rewards/simpleverify_reward/mean": 0.6671317219734192, "rewards/simpleverify_reward/std": 0.47130560874938965, "step": 1005 }, { "clip_ratio/high_max": 0.001672542086453177, "clip_ratio/high_mean": 0.0006284160403993155, "clip_ratio/low_mean": 0.0003415890014366596, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009700050341052702, "epoch": 9.653061224489797, "grad_norm": 0.22904010117053986, "learning_rate": 1e-06, "loss": -0.0825, "step": 1006 }, { "clip_ratio/high_max": 0.0015915698568278458, "clip_ratio/high_mean": 0.0005688834198736004, "clip_ratio/low_mean": 0.00033365243211846973, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009025358631333802, "epoch": 9.662390670553936, "grad_norm": 0.16028210520744324, "learning_rate": 1e-06, "loss": -0.0192, "step": 1007 }, { "clip_ratio/high_max": 0.0016781541180534987, "clip_ratio/high_mean": 0.0005676038822457485, "clip_ratio/low_mean": 0.00042002574070920673, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009876296353468206, "epoch": 9.671720116618076, "grad_norm": 0.26709187030792236, "learning_rate": 1e-06, "loss": -0.0501, "step": 1008 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0959821428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3792.0, "completions/mean_length": 895.7598266601562, "completions/mean_terminated_length": 555.981201171875, "completions/min_length": 116.0, "completions/min_terminated_length": 116.0, "epoch": 9.681049562682215, "grad_norm": 0.20502828061580658, "learning_rate": 1e-06, "loss": -0.0291, "num_tokens": 580014213.0, "reward": 0.6679688096046448, "reward_std": 0.14232595264911652, "rewards/simpleverify_reward/mean": 0.66796875, "rewards/simpleverify_reward/std": 0.4710078537464142, "step": 1009 }, { "clip_ratio/high_max": 0.001902952259115409, "clip_ratio/high_mean": 0.0007361516982200556, "clip_ratio/low_mean": 0.00039890619245852577, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001135057882493129, "epoch": 9.690379008746355, "grad_norm": 0.22624221444129944, "learning_rate": 1e-06, "loss": -0.0443, "step": 1010 }, { "clip_ratio/high_max": 0.0016896459674171638, "clip_ratio/high_mean": 0.0006355045470627374, "clip_ratio/low_mean": 0.0004125285586269456, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010480330929567572, "epoch": 9.699708454810496, "grad_norm": 0.21253754198551178, "learning_rate": 1e-06, "loss": -0.0756, "step": 1011 }, { "clip_ratio/high_max": 0.002014700170548167, "clip_ratio/high_mean": 0.0007628978692082455, "clip_ratio/low_mean": 0.0005405655792856123, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013034634866926353, "epoch": 9.709037900874636, "grad_norm": 0.21113614737987518, "learning_rate": 1e-06, "loss": -0.0522, "step": 1012 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1032366071428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 4033.0, "completions/mean_length": 903.3214721679688, "completions/mean_terminated_length": 535.7760009765625, "completions/min_length": 129.0, "completions/min_terminated_length": 129.0, "epoch": 9.718367346938775, "grad_norm": 0.20672674477100372, "learning_rate": 1e-06, "loss": -0.0429, "num_tokens": 582081229.0, "reward": 0.678292453289032, "reward_std": 0.12834875285625458, "rewards/simpleverify_reward/mean": 0.6782923936843872, "rewards/simpleverify_reward/std": 0.46719664335250854, "step": 1013 }, { "clip_ratio/high_max": 0.0017428676583222114, "clip_ratio/high_mean": 0.0006932239957677666, "clip_ratio/low_mean": 0.0002860787733425241, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.000979302785708569, "epoch": 9.727696793002915, "grad_norm": 0.19711089134216309, "learning_rate": 1e-06, "loss": -0.0681, "step": 1014 }, { "clip_ratio/high_max": 0.0019185795536031947, "clip_ratio/high_mean": 0.0006803327851230279, "clip_ratio/low_mean": 0.0002907016319113609, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009710344256745884, "epoch": 9.737026239067056, "grad_norm": 0.2736588716506958, "learning_rate": 1e-06, "loss": -0.05, "step": 1015 }, { "clip_ratio/high_max": 0.0019863925517711323, "clip_ratio/high_mean": 0.0006916539286976331, "clip_ratio/low_mean": 0.0003273540535246866, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010190079829044407, "epoch": 9.746355685131196, "grad_norm": 0.2880162298679352, "learning_rate": 1e-06, "loss": -0.0041, "step": 1016 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0998883928571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3925.0, "completions/mean_length": 924.5296020507812, "completions/mean_terminated_length": 572.5809326171875, "completions/min_length": 130.0, "completions/min_terminated_length": 130.0, "epoch": 9.755685131195335, "grad_norm": 0.2028096616268158, "learning_rate": 1e-06, "loss": -0.0531, "num_tokens": 584280775.0, "reward": 0.6752232313156128, "reward_std": 0.14170511066913605, "rewards/simpleverify_reward/mean": 0.6752232313156128, "rewards/simpleverify_reward/std": 0.468356728553772, "step": 1017 }, { "clip_ratio/high_max": 0.0015010207316663582, "clip_ratio/high_mean": 0.0005891229684493737, "clip_ratio/low_mean": 0.00043540896331251133, "clip_ratio/low_min": 1.0174181625188794e-05, "clip_ratio/region_mean": 0.0010245319390378427, "epoch": 9.765014577259475, "grad_norm": 0.24759797751903534, "learning_rate": 1e-06, "loss": -0.0235, "step": 1018 }, { "clip_ratio/high_max": 0.0017481584291090257, "clip_ratio/high_mean": 0.0006667748384643346, "clip_ratio/low_mean": 0.00043755503020292963, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011043298727599904, "epoch": 9.774344023323614, "grad_norm": 0.2618403434753418, "learning_rate": 1e-06, "loss": -0.0564, "step": 1019 }, { "clip_ratio/high_max": 0.0021284245085553266, "clip_ratio/high_mean": 0.0008048811523622135, "clip_ratio/low_mean": 0.0004263320479367394, "clip_ratio/low_min": 1.7361111531499773e-05, "clip_ratio/region_mean": 0.0012312131948419847, "epoch": 9.783673469387756, "grad_norm": 0.20214572548866272, "learning_rate": 1e-06, "loss": -0.0261, "step": 1020 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1032366071428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3925.0, "completions/mean_length": 928.275146484375, "completions/mean_terminated_length": 563.6023559570312, "completions/min_length": 92.0, "completions/min_terminated_length": 92.0, "epoch": 9.793002915451895, "grad_norm": 0.2495395392179489, "learning_rate": 1e-06, "loss": -0.0247, "num_tokens": 586441865.0, "reward": 0.6785714626312256, "reward_std": 0.15079587697982788, "rewards/simpleverify_reward/mean": 0.6785714030265808, "rewards/simpleverify_reward/std": 0.46709004044532776, "step": 1021 }, { "clip_ratio/high_max": 0.001956416672328487, "clip_ratio/high_mean": 0.0006819209170316753, "clip_ratio/low_mean": 0.00040039757686827215, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001082318489352474, "epoch": 9.802332361516035, "grad_norm": 0.1884034425020218, "learning_rate": 1e-06, "loss": -0.0803, "step": 1022 }, { "clip_ratio/high_max": 0.0018323895783396438, "clip_ratio/high_mean": 0.0007151539302867604, "clip_ratio/low_mean": 0.0005210879812693747, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001236241889273515, "epoch": 9.811661807580174, "grad_norm": 0.2511901259422302, "learning_rate": 1e-06, "loss": -0.0299, "step": 1023 }, { "clip_ratio/high_max": 0.0018315360102860723, "clip_ratio/high_mean": 0.0007510808900406118, "clip_ratio/low_mean": 0.0003707823534568888, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001121863278967794, "epoch": 9.820991253644316, "grad_norm": 0.21524089574813843, "learning_rate": 1e-06, "loss": -0.0587, "step": 1024 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1188616071428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3844.0, "completions/mean_length": 977.433349609375, "completions/mean_terminated_length": 556.752685546875, "completions/min_length": 104.0, "completions/min_terminated_length": 104.0, "epoch": 9.830320699708455, "grad_norm": 0.21767926216125488, "learning_rate": 1e-06, "loss": -0.0919, "num_tokens": 588569386.0, "reward": 0.6383928656578064, "reward_std": 0.13345801830291748, "rewards/simpleverify_reward/mean": 0.6383928656578064, "rewards/simpleverify_reward/std": 0.4805328845977783, "step": 1025 }, { "clip_ratio/high_max": 0.0017355596864945255, "clip_ratio/high_mean": 0.0006142329875729047, "clip_ratio/low_mean": 0.0003462283025328361, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009604612805560464, "epoch": 9.839650145772595, "grad_norm": 0.299602746963501, "learning_rate": 1e-06, "loss": -0.0641, "step": 1026 }, { "clip_ratio/high_max": 0.0017424189391022082, "clip_ratio/high_mean": 0.0006452282459576963, "clip_ratio/low_mean": 0.0003424710114359186, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009876992808131035, "epoch": 9.848979591836734, "grad_norm": 0.24865947663784027, "learning_rate": 1e-06, "loss": -0.0426, "step": 1027 }, { "clip_ratio/high_max": 0.001589829131262377, "clip_ratio/high_mean": 0.0006317471707006916, "clip_ratio/low_mean": 0.0003868186536237772, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010185658247792162, "epoch": 9.858309037900874, "grad_norm": 0.18604271113872528, "learning_rate": 1e-06, "loss": -0.0343, "step": 1028 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1252790178571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 4030.0, "completions/mean_length": 1000.0572509765625, "completions/mean_terminated_length": 556.6510009765625, "completions/min_length": 126.0, "completions/min_terminated_length": 126.0, "epoch": 9.867638483965015, "grad_norm": 0.24027350544929504, "learning_rate": 1e-06, "loss": -0.0571, "num_tokens": 590676159.0, "reward": 0.6342076063156128, "reward_std": 0.15732692182064056, "rewards/simpleverify_reward/mean": 0.6342076063156128, "rewards/simpleverify_reward/std": 0.48171886801719666, "step": 1029 }, { "clip_ratio/high_max": 0.0017720584401104134, "clip_ratio/high_mean": 0.0007292578491160384, "clip_ratio/low_mean": 0.0004649925631383667, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011942504243052099, "epoch": 9.876967930029155, "grad_norm": 0.2116895318031311, "learning_rate": 1e-06, "loss": -0.0396, "step": 1030 }, { "clip_ratio/high_max": 0.0019051886119996198, "clip_ratio/high_mean": 0.000840656412037788, "clip_ratio/low_mean": 0.0005348382144347852, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001375494604872074, "epoch": 9.886297376093294, "grad_norm": 0.2946925461292267, "learning_rate": 1e-06, "loss": -0.045, "step": 1031 }, { "clip_ratio/high_max": 0.001989103195228381, "clip_ratio/high_mean": 0.0008200674665204133, "clip_ratio/low_mean": 0.0005460035063151736, "clip_ratio/low_min": 1.578282899572514e-05, "clip_ratio/region_mean": 0.0013660709810210392, "epoch": 9.895626822157434, "grad_norm": 0.3186889588832855, "learning_rate": 1e-06, "loss": -0.0827, "step": 1032 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0940290178571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3296.0, "completions/mean_length": 848.8108520507812, "completions/mean_terminated_length": 511.7911682128906, "completions/min_length": 116.0, "completions/min_terminated_length": 116.0, "epoch": 9.904956268221575, "grad_norm": 0.21444948017597198, "learning_rate": 1e-06, "loss": -0.0401, "num_tokens": 592689209.0, "reward": 0.6905692219734192, "reward_std": 0.1064693033695221, "rewards/simpleverify_reward/mean": 0.6905692219734192, "rewards/simpleverify_reward/std": 0.46232348680496216, "step": 1033 }, { "clip_ratio/high_max": 0.001582995642820606, "clip_ratio/high_mean": 0.0005204223280088627, "clip_ratio/low_mean": 0.00038761763971706387, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009080399504455272, "epoch": 9.914285714285715, "grad_norm": 0.20122960209846497, "learning_rate": 1e-06, "loss": -0.0469, "step": 1034 }, { "clip_ratio/high_max": 0.0015957939613144845, "clip_ratio/high_mean": 0.000529778686541249, "clip_ratio/low_mean": 0.00037176025807639235, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009015389405249152, "epoch": 9.923615160349854, "grad_norm": 0.19715788960456848, "learning_rate": 1e-06, "loss": -0.019, "step": 1035 }, { "clip_ratio/high_max": 0.001503815787145868, "clip_ratio/high_mean": 0.0004895871579719824, "clip_ratio/low_mean": 0.0003957119474762294, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008852991013554856, "epoch": 9.932944606413994, "grad_norm": 0.2120196372270584, "learning_rate": 1e-06, "loss": -0.0397, "step": 1036 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1049107142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3884.0, "completions/mean_length": 937.2179565429688, "completions/mean_terminated_length": 566.9866333007812, "completions/min_length": 84.0, "completions/min_terminated_length": 84.0, "epoch": 9.942274052478133, "grad_norm": 0.2229129821062088, "learning_rate": 1e-06, "loss": -0.0323, "num_tokens": 594863806.0, "reward": 0.6688058376312256, "reward_std": 0.13752393424510956, "rewards/simpleverify_reward/mean": 0.6688057780265808, "rewards/simpleverify_reward/std": 0.4707084000110626, "step": 1037 }, { "clip_ratio/high_max": 0.0015835034610063303, "clip_ratio/high_mean": 0.0006406751726899529, "clip_ratio/low_mean": 0.00037521229364756437, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010158874720218591, "epoch": 9.951603498542275, "grad_norm": 0.20900478959083557, "learning_rate": 1e-06, "loss": 0.0053, "step": 1038 }, { "clip_ratio/high_max": 0.0016739363709348254, "clip_ratio/high_mean": 0.0006129913363110973, "clip_ratio/low_mean": 0.0003309459630145284, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009439373134227935, "epoch": 9.960932944606414, "grad_norm": 0.21144139766693115, "learning_rate": 1e-06, "loss": -0.0584, "step": 1039 }, { "clip_ratio/high_max": 0.0017623825988266617, "clip_ratio/high_mean": 0.0007038916392048122, "clip_ratio/low_mean": 0.0003262960785832547, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010301877300662454, "epoch": 9.970262390670554, "grad_norm": 0.1827440708875656, "learning_rate": 1e-06, "loss": -0.0928, "step": 1040 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1177455357142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3428.0, "completions/mean_length": 965.613037109375, "completions/mean_terminated_length": 547.83203125, "completions/min_length": 131.0, "completions/min_terminated_length": 131.0, "epoch": 10.00932944606414, "grad_norm": 0.2063363939523697, "learning_rate": 1e-06, "loss": -0.077, "num_tokens": 596958843.0, "reward": 0.6400669813156128, "reward_std": 0.1399751901626587, "rewards/simpleverify_reward/mean": 0.6400669813156128, "rewards/simpleverify_reward/std": 0.48004743456840515, "step": 1041 }, { "clip_ratio/high_max": 0.001575538521137787, "clip_ratio/high_mean": 0.0006675898066532682, "clip_ratio/low_mean": 0.00032275710873364005, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009903469235723605, "epoch": 10.018658892128279, "grad_norm": 0.20350565016269684, "learning_rate": 1e-06, "loss": -0.0728, "step": 1042 }, { "clip_ratio/high_max": 0.002151468328520423, "clip_ratio/high_mean": 0.0006695262618450215, "clip_ratio/low_mean": 0.0003548467135487954, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010243729848298244, "epoch": 10.02798833819242, "grad_norm": 0.190528005361557, "learning_rate": 1e-06, "loss": -0.0224, "step": 1043 }, { "clip_ratio/high_max": 0.0017263609988731332, "clip_ratio/high_mean": 0.000649884467748052, "clip_ratio/low_mean": 0.0005582304738709354, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012081149234290933, "epoch": 10.03731778425656, "grad_norm": 0.2305830717086792, "learning_rate": 1e-06, "loss": -0.0121, "step": 1044 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.095703125, "completions/max_length": 4096.0, "completions/max_terminated_length": 3723.0, "completions/mean_length": 902.37060546875, "completions/mean_terminated_length": 564.3837890625, "completions/min_length": 102.0, "completions/min_terminated_length": 102.0, "epoch": 10.0466472303207, "grad_norm": 0.19696617126464844, "learning_rate": 1e-06, "loss": -0.0509, "num_tokens": 599139843.0, "reward": 0.6727120876312256, "reward_std": 0.14179861545562744, "rewards/simpleverify_reward/mean": 0.6727120280265808, "rewards/simpleverify_reward/std": 0.46928879618644714, "step": 1045 }, { "clip_ratio/high_max": 0.0015228074553306215, "clip_ratio/high_mean": 0.0006177956211104174, "clip_ratio/low_mean": 0.0004650406290238607, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001082836250134278, "epoch": 10.055976676384839, "grad_norm": 29.420034408569336, "learning_rate": 1e-06, "loss": -0.0107, "step": 1046 }, { "clip_ratio/high_max": 0.0018838858813978732, "clip_ratio/high_mean": 0.0007009841483522905, "clip_ratio/low_mean": 0.0004164821775702876, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011174663159181364, "epoch": 10.06530612244898, "grad_norm": 0.15963737666606903, "learning_rate": 1e-06, "loss": -0.0673, "step": 1047 }, { "clip_ratio/high_max": 0.0016107785850181244, "clip_ratio/high_mean": 0.0005997960761305876, "clip_ratio/low_mean": 0.0004931828225380741, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010929788841167465, "epoch": 10.07463556851312, "grad_norm": 0.24775461852550507, "learning_rate": 1e-06, "loss": -0.0341, "step": 1048 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0993303571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 4092.0, "completions/mean_length": 910.501708984375, "completions/mean_terminated_length": 559.18896484375, "completions/min_length": 115.0, "completions/min_terminated_length": 115.0, "epoch": 10.08396501457726, "grad_norm": 0.2328295260667801, "learning_rate": 1e-06, "loss": -0.0608, "num_tokens": 601337497.0, "reward": 0.6559709906578064, "reward_std": 0.14151887595653534, "rewards/simpleverify_reward/mean": 0.6559709906578064, "rewards/simpleverify_reward/std": 0.47511687874794006, "step": 1049 }, { "clip_ratio/high_max": 0.0016304531309287995, "clip_ratio/high_mean": 0.0007213209360088513, "clip_ratio/low_mean": 0.0004183759874649695, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001139696905738674, "epoch": 10.093294460641399, "grad_norm": 0.19206124544143677, "learning_rate": 1e-06, "loss": -0.0432, "step": 1050 }, { "clip_ratio/high_max": 0.0018635236374393571, "clip_ratio/high_mean": 0.0007566447911813157, "clip_ratio/low_mean": 0.00044111572969995905, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001197760526338243, "epoch": 10.102623906705539, "grad_norm": 0.20569536089897156, "learning_rate": 1e-06, "loss": -0.0792, "step": 1051 }, { "clip_ratio/high_max": 0.0019312429940328002, "clip_ratio/high_mean": 0.0007279176679730881, "clip_ratio/low_mean": 0.0006054905388737097, "clip_ratio/low_min": 2.3629489078302868e-05, "clip_ratio/region_mean": 0.001333408221398713, "epoch": 10.11195335276968, "grad_norm": 2.0385513305664062, "learning_rate": 1e-06, "loss": -0.0133, "step": 1052 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.095703125, "completions/max_length": 4096.0, "completions/max_terminated_length": 4069.0, "completions/mean_length": 888.9930419921875, "completions/mean_terminated_length": 549.5905151367188, "completions/min_length": 116.0, "completions/min_terminated_length": 116.0, "epoch": 10.12128279883382, "grad_norm": 0.2090703696012497, "learning_rate": 1e-06, "loss": -0.0672, "num_tokens": 603480376.0, "reward": 0.6710379719734192, "reward_std": 0.13404197990894318, "rewards/simpleverify_reward/mean": 0.6710379719734192, "rewards/simpleverify_reward/std": 0.4699017107486725, "step": 1053 }, { "clip_ratio/high_max": 0.0019801697999355383, "clip_ratio/high_mean": 0.0006774560861231294, "clip_ratio/low_mean": 0.0004051818787047523, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.00108263795482344, "epoch": 10.130612244897959, "grad_norm": 0.3119916617870331, "learning_rate": 1e-06, "loss": -0.0135, "step": 1054 }, { "clip_ratio/high_max": 0.0018764674568956252, "clip_ratio/high_mean": 0.0006153261701911106, "clip_ratio/low_mean": 0.0003918311920187989, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010071573833556613, "epoch": 10.139941690962099, "grad_norm": 0.20676939189434052, "learning_rate": 1e-06, "loss": -0.0424, "step": 1055 }, { "clip_ratio/high_max": 0.001666960379225202, "clip_ratio/high_mean": 0.000660544031234167, "clip_ratio/low_mean": 0.0003667214778033667, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010272655090375338, "epoch": 10.14927113702624, "grad_norm": 0.20835131406784058, "learning_rate": 1e-06, "loss": -0.0558, "step": 1056 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1171875, "completions/max_length": 4096.0, "completions/max_terminated_length": 3630.0, "completions/mean_length": 960.66552734375, "completions/mean_terminated_length": 544.4706420898438, "completions/min_length": 129.0, "completions/min_terminated_length": 129.0, "epoch": 10.15860058309038, "grad_norm": 0.22161857783794403, "learning_rate": 1e-06, "loss": -0.0444, "num_tokens": 605560177.0, "reward": 0.684151828289032, "reward_std": 0.13229307532310486, "rewards/simpleverify_reward/mean": 0.6841517686843872, "rewards/simpleverify_reward/std": 0.46491768956184387, "step": 1057 }, { "clip_ratio/high_max": 0.0018891063300543465, "clip_ratio/high_mean": 0.0006888865573273506, "clip_ratio/low_mean": 0.000465277435068856, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011541639978531748, "epoch": 10.167930029154519, "grad_norm": 0.2026614397764206, "learning_rate": 1e-06, "loss": -0.0379, "step": 1058 }, { "clip_ratio/high_max": 0.002178335555072408, "clip_ratio/high_mean": 0.0008184495900422917, "clip_ratio/low_mean": 0.00039600482523383107, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012144544161856174, "epoch": 10.177259475218658, "grad_norm": 0.280152827501297, "learning_rate": 1e-06, "loss": -0.0352, "step": 1059 }, { "clip_ratio/high_max": 0.0018862835713662207, "clip_ratio/high_mean": 0.0007185249123722315, "clip_ratio/low_mean": 0.00046821534078844707, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011867402681673411, "epoch": 10.186588921282798, "grad_norm": 0.2382468730211258, "learning_rate": 1e-06, "loss": -0.1012, "step": 1060 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1130022321428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3975.0, "completions/mean_length": 956.2589721679688, "completions/mean_terminated_length": 556.2604370117188, "completions/min_length": 132.0, "completions/min_terminated_length": 132.0, "epoch": 10.19591836734694, "grad_norm": 0.2238246202468872, "learning_rate": 1e-06, "loss": -0.0427, "num_tokens": 607684353.0, "reward": 0.6428571939468384, "reward_std": 0.14097049832344055, "rewards/simpleverify_reward/mean": 0.6428571343421936, "rewards/simpleverify_reward/std": 0.4792242646217346, "step": 1061 }, { "clip_ratio/high_max": 0.001489348982431693, "clip_ratio/high_mean": 0.0005565742394537665, "clip_ratio/low_mean": 0.000475219142572314, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010317933993064798, "epoch": 10.205247813411079, "grad_norm": 0.27846816182136536, "learning_rate": 1e-06, "loss": -0.0365, "step": 1062 }, { "clip_ratio/high_max": 0.0019374503172002733, "clip_ratio/high_mean": 0.0006432631766983832, "clip_ratio/low_mean": 0.0007110196538633318, "clip_ratio/low_min": 3.357206878717989e-05, "clip_ratio/region_mean": 0.0013542828310164623, "epoch": 10.214577259475218, "grad_norm": 0.24660052359104156, "learning_rate": 1e-06, "loss": -0.0336, "step": 1063 }, { "clip_ratio/high_max": 0.00209760033612838, "clip_ratio/high_mean": 0.0006851688394817756, "clip_ratio/low_mean": 0.00048496710860490566, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011701359708240489, "epoch": 10.223906705539358, "grad_norm": 0.8047247529029846, "learning_rate": 1e-06, "loss": -0.0429, "step": 1064 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1118861607142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 4036.0, "completions/mean_length": 958.3836669921875, "completions/mean_terminated_length": 563.1011352539062, "completions/min_length": 138.0, "completions/min_terminated_length": 138.0, "epoch": 10.2332361516035, "grad_norm": 0.23671136796474457, "learning_rate": 1e-06, "loss": -0.0522, "num_tokens": 609824088.0, "reward": 0.666015625, "reward_std": 0.14544223248958588, "rewards/simpleverify_reward/mean": 0.666015625, "rewards/simpleverify_reward/std": 0.4717000126838684, "step": 1065 }, { "clip_ratio/high_max": 0.0014468788431258872, "clip_ratio/high_mean": 0.000580408609494043, "clip_ratio/low_mean": 0.0004980999565304955, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010785085796669591, "epoch": 10.242565597667639, "grad_norm": 0.6944945454597473, "learning_rate": 1e-06, "loss": -0.022, "step": 1066 }, { "clip_ratio/high_max": 0.0016401046814280562, "clip_ratio/high_mean": 0.0006663356343779014, "clip_ratio/low_mean": 0.00047736391661601374, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001143699530075537, "epoch": 10.251895043731778, "grad_norm": 0.26724451780319214, "learning_rate": 1e-06, "loss": -0.055, "step": 1067 }, { "clip_ratio/high_max": 0.0022050139559723902, "clip_ratio/high_mean": 0.000773677597862843, "clip_ratio/low_mean": 0.0004727539026134764, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012464314786484465, "epoch": 10.261224489795918, "grad_norm": 0.24772419035434723, "learning_rate": 1e-06, "loss": -0.0386, "step": 1068 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.10546875, "completions/max_length": 4096.0, "completions/max_terminated_length": 4076.0, "completions/mean_length": 915.61865234375, "completions/mean_terminated_length": 540.6390991210938, "completions/min_length": 124.0, "completions/min_terminated_length": 124.0, "epoch": 10.270553935860057, "grad_norm": 0.21589867770671844, "learning_rate": 1e-06, "loss": -0.0448, "num_tokens": 611934737.0, "reward": 0.6526228189468384, "reward_std": 0.12522843480110168, "rewards/simpleverify_reward/mean": 0.6526227593421936, "rewards/simpleverify_reward/std": 0.4762032926082611, "step": 1069 }, { "clip_ratio/high_max": 0.0015955822782416362, "clip_ratio/high_mean": 0.0006416838759832899, "clip_ratio/low_mean": 0.0004922575776618032, "clip_ratio/low_min": 2.8623770049307495e-05, "clip_ratio/region_mean": 0.0011339414122630842, "epoch": 10.279883381924199, "grad_norm": 0.2788504362106323, "learning_rate": 1e-06, "loss": -0.0504, "step": 1070 }, { "clip_ratio/high_max": 0.0018292315107828472, "clip_ratio/high_mean": 0.0007140792422433151, "clip_ratio/low_mean": 0.0005068053997092647, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012208846455905586, "epoch": 10.289212827988338, "grad_norm": 0.20734438300132751, "learning_rate": 1e-06, "loss": -0.0329, "step": 1071 }, { "clip_ratio/high_max": 0.0015924478502711281, "clip_ratio/high_mean": 0.0005934360797255067, "clip_ratio/low_mean": 0.000381268611363339, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009747046715347096, "epoch": 10.298542274052478, "grad_norm": 0.5740536451339722, "learning_rate": 1e-06, "loss": -0.0313, "step": 1072 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1026785714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3875.0, "completions/mean_length": 917.474365234375, "completions/mean_terminated_length": 553.762451171875, "completions/min_length": 119.0, "completions/min_terminated_length": 119.0, "epoch": 10.307871720116617, "grad_norm": 0.2343902438879013, "learning_rate": 1e-06, "loss": -0.0272, "num_tokens": 614062461.0, "reward": 0.6612723469734192, "reward_std": 0.13776062428951263, "rewards/simpleverify_reward/mean": 0.6612723469734192, "rewards/simpleverify_reward/std": 0.4733431935310364, "step": 1073 }, { "clip_ratio/high_max": 0.0017735839501256123, "clip_ratio/high_mean": 0.0006897526236571139, "clip_ratio/low_mean": 0.00037626573362103954, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010660183434083592, "epoch": 10.317201166180759, "grad_norm": 0.17806874215602875, "learning_rate": 1e-06, "loss": -0.0786, "step": 1074 }, { "clip_ratio/high_max": 0.0019453849927231204, "clip_ratio/high_mean": 0.0007067843080221792, "clip_ratio/low_mean": 0.0005364550193007744, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012432393014023546, "epoch": 10.326530612244898, "grad_norm": 0.2157558798789978, "learning_rate": 1e-06, "loss": -0.0241, "step": 1075 }, { "clip_ratio/high_max": 0.0017436561502108816, "clip_ratio/high_mean": 0.0006758922518201871, "clip_ratio/low_mean": 0.0004412304310790205, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011171226869919337, "epoch": 10.335860058309038, "grad_norm": 0.1748340129852295, "learning_rate": 1e-06, "loss": -0.0574, "step": 1076 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0951450892857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 2608.0, "completions/mean_length": 857.6981201171875, "completions/mean_terminated_length": 517.1920776367188, "completions/min_length": 141.0, "completions/min_terminated_length": 141.0, "epoch": 10.345189504373177, "grad_norm": 0.23391227424144745, "learning_rate": 1e-06, "loss": -0.0403, "num_tokens": 616082699.0, "reward": 0.6735491156578064, "reward_std": 0.13601155579090118, "rewards/simpleverify_reward/mean": 0.6735491156578064, "rewards/simpleverify_reward/std": 0.4689798355102539, "step": 1077 }, { "clip_ratio/high_max": 0.0016489262598042842, "clip_ratio/high_mean": 0.0006085603135943529, "clip_ratio/low_mean": 0.00035777391985902796, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009663342534622643, "epoch": 10.354518950437317, "grad_norm": 0.20055118203163147, "learning_rate": 1e-06, "loss": -0.0574, "step": 1078 }, { "clip_ratio/high_max": 0.0018639272420841735, "clip_ratio/high_mean": 0.0007029997841527802, "clip_ratio/low_mean": 0.0003557264581104391, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010587262295302935, "epoch": 10.363848396501458, "grad_norm": 0.21989864110946655, "learning_rate": 1e-06, "loss": -0.0542, "step": 1079 }, { "clip_ratio/high_max": 0.0017526353749417467, "clip_ratio/high_mean": 0.000710411865838978, "clip_ratio/low_mean": 0.00045193254140940553, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001162344400654547, "epoch": 10.373177842565598, "grad_norm": 0.5249748826026917, "learning_rate": 1e-06, "loss": -0.0335, "step": 1080 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1205357142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3519.0, "completions/mean_length": 960.8711547851562, "completions/mean_terminated_length": 531.183349609375, "completions/min_length": 101.0, "completions/min_terminated_length": 101.0, "epoch": 10.382507288629737, "grad_norm": 0.24224554002285004, "learning_rate": 1e-06, "loss": -0.0499, "num_tokens": 618110381.0, "reward": 0.6799665689468384, "reward_std": 0.12171872705221176, "rewards/simpleverify_reward/mean": 0.6799665093421936, "rewards/simpleverify_reward/std": 0.4665541648864746, "step": 1081 }, { "clip_ratio/high_max": 0.0016327167722920422, "clip_ratio/high_mean": 0.0005922325699430075, "clip_ratio/low_mean": 0.00036380640131028485, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009560389735270292, "epoch": 10.391836734693877, "grad_norm": 0.18721261620521545, "learning_rate": 1e-06, "loss": -0.035, "step": 1082 }, { "clip_ratio/high_max": 0.002511173632228747, "clip_ratio/high_mean": 0.0008783279881754424, "clip_ratio/low_mean": 0.00034660730034374865, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012249352730577812, "epoch": 10.401166180758018, "grad_norm": 0.2692002058029175, "learning_rate": 1e-06, "loss": -0.1034, "step": 1083 }, { "clip_ratio/high_max": 0.0016880671682883985, "clip_ratio/high_mean": 0.0006254826566873817, "clip_ratio/low_mean": 0.000455683717973443, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001081166350559215, "epoch": 10.410495626822158, "grad_norm": 0.2443450689315796, "learning_rate": 1e-06, "loss": -0.0263, "step": 1084 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1007254464285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3943.0, "completions/mean_length": 904.7492065429688, "completions/mean_terminated_length": 547.3052978515625, "completions/min_length": 134.0, "completions/min_terminated_length": 134.0, "epoch": 10.419825072886297, "grad_norm": 0.21404823660850525, "learning_rate": 1e-06, "loss": -0.0741, "num_tokens": 620217058.0, "reward": 0.6721540689468384, "reward_std": 0.14051271975040436, "rewards/simpleverify_reward/mean": 0.6721540093421936, "rewards/simpleverify_reward/std": 0.4694938659667969, "step": 1085 }, { "clip_ratio/high_max": 0.0018483225503587164, "clip_ratio/high_mean": 0.0007090803064784268, "clip_ratio/low_mean": 0.0004185480929663754, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011276284203631803, "epoch": 10.429154518950437, "grad_norm": 0.3079920709133148, "learning_rate": 1e-06, "loss": -0.0577, "step": 1086 }, { "clip_ratio/high_max": 0.0018847078317776322, "clip_ratio/high_mean": 0.0006691395483358065, "clip_ratio/low_mean": 0.0005246346854619333, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011937742383452132, "epoch": 10.438483965014576, "grad_norm": 0.39122503995895386, "learning_rate": 1e-06, "loss": -0.0644, "step": 1087 }, { "clip_ratio/high_max": 0.0018993280318682082, "clip_ratio/high_mean": 0.0007056453468976542, "clip_ratio/low_mean": 0.0005247889062047761, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012304342599236406, "epoch": 10.447813411078718, "grad_norm": 0.25033101439476013, "learning_rate": 1e-06, "loss": 0.006, "step": 1088 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1135602678571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 4031.0, "completions/mean_length": 947.18896484375, "completions/mean_terminated_length": 543.8001098632812, "completions/min_length": 113.0, "completions/min_terminated_length": 113.0, "epoch": 10.457142857142857, "grad_norm": 0.2428867220878601, "learning_rate": 1e-06, "loss": -0.0511, "num_tokens": 622300847.0, "reward": 0.6565290689468384, "reward_std": 0.13465997576713562, "rewards/simpleverify_reward/mean": 0.6565290093421936, "rewards/simpleverify_reward/std": 0.4749332666397095, "step": 1089 }, { "clip_ratio/high_max": 0.0018173754069721326, "clip_ratio/high_mean": 0.0006742630448570708, "clip_ratio/low_mean": 0.0003918997592791129, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001066162840288598, "epoch": 10.466472303206997, "grad_norm": 0.20796075463294983, "learning_rate": 1e-06, "loss": -0.0407, "step": 1090 }, { "clip_ratio/high_max": 0.0018332462277612649, "clip_ratio/high_mean": 0.0007418448331009131, "clip_ratio/low_mean": 0.0003101204447375494, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010519652860239148, "epoch": 10.475801749271136, "grad_norm": 0.18470880389213562, "learning_rate": 1e-06, "loss": -0.0676, "step": 1091 }, { "clip_ratio/high_max": 0.0020870785156148486, "clip_ratio/high_mean": 0.000772705951021635, "clip_ratio/low_mean": 0.0003472081955351314, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011199141245015198, "epoch": 10.485131195335278, "grad_norm": 0.18690967559814453, "learning_rate": 1e-06, "loss": -0.0687, "step": 1092 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1118861607142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3686.0, "completions/mean_length": 921.5441284179688, "completions/mean_terminated_length": 521.6204833984375, "completions/min_length": 129.0, "completions/min_terminated_length": 129.0, "epoch": 10.494460641399417, "grad_norm": 0.2556957006454468, "learning_rate": 1e-06, "loss": -0.0305, "num_tokens": 624317173.0, "reward": 0.6718750596046448, "reward_std": 0.12252514064311981, "rewards/simpleverify_reward/mean": 0.671875, "rewards/simpleverify_reward/std": 0.46959611773490906, "step": 1093 }, { "clip_ratio/high_max": 0.0018831433517334517, "clip_ratio/high_mean": 0.0006619889227295062, "clip_ratio/low_mean": 0.00030795971110819664, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009699486345198238, "epoch": 10.503790087463557, "grad_norm": 1.154140591621399, "learning_rate": 1e-06, "loss": -0.0663, "step": 1094 }, { "clip_ratio/high_max": 0.0017949679167941213, "clip_ratio/high_mean": 0.0007236329383886186, "clip_ratio/low_mean": 0.0002987818688779953, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001022414782710257, "epoch": 10.513119533527696, "grad_norm": 0.16127803921699524, "learning_rate": 1e-06, "loss": -0.0607, "step": 1095 }, { "clip_ratio/high_max": 0.0013749625031778123, "clip_ratio/high_mean": 0.0004423226646395051, "clip_ratio/low_mean": 0.000375640352103801, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008179630162885587, "epoch": 10.522448979591836, "grad_norm": 0.28966042399406433, "learning_rate": 1e-06, "loss": -0.0267, "step": 1096 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.09765625, "completions/max_length": 4096.0, "completions/max_terminated_length": 2803.0, "completions/mean_length": 865.1668701171875, "completions/mean_terminated_length": 515.5095825195312, "completions/min_length": 137.0, "completions/min_terminated_length": 137.0, "epoch": 10.531778425655977, "grad_norm": 0.21840274333953857, "learning_rate": 1e-06, "loss": -0.0562, "num_tokens": 626353355.0, "reward": 0.6950334906578064, "reward_std": 0.12981335818767548, "rewards/simpleverify_reward/mean": 0.6950334906578064, "rewards/simpleverify_reward/std": 0.4604575037956238, "step": 1097 }, { "clip_ratio/high_max": 0.0016746792698540958, "clip_ratio/high_mean": 0.000626659616955294, "clip_ratio/low_mean": 0.000369583308838628, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009962429267034167, "epoch": 10.541107871720117, "grad_norm": 0.21702076494693756, "learning_rate": 1e-06, "loss": -0.076, "step": 1098 }, { "clip_ratio/high_max": 0.0017955837938643526, "clip_ratio/high_mean": 0.0007279919846041594, "clip_ratio/low_mean": 0.00037092897082402487, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001098920960430405, "epoch": 10.550437317784256, "grad_norm": 0.23994813859462738, "learning_rate": 1e-06, "loss": -0.0836, "step": 1099 }, { "clip_ratio/high_max": 0.0016689493495505303, "clip_ratio/high_mean": 0.0006518896316265455, "clip_ratio/low_mean": 0.0005384091589348827, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011902987971552648, "epoch": 10.559766763848396, "grad_norm": 0.22060418128967285, "learning_rate": 1e-06, "loss": 0.0024, "step": 1100 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1082589285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 4001.0, "completions/mean_length": 941.279052734375, "completions/mean_terminated_length": 558.2903442382812, "completions/min_length": 138.0, "completions/min_terminated_length": 138.0, "epoch": 10.569096209912537, "grad_norm": 0.21173614263534546, "learning_rate": 1e-06, "loss": -0.066, "num_tokens": 628500715.0, "reward": 0.658761203289032, "reward_std": 0.13065952062606812, "rewards/simpleverify_reward/mean": 0.6587611436843872, "rewards/simpleverify_reward/std": 0.4741915464401245, "step": 1101 }, { "clip_ratio/high_max": 0.0017516264015284833, "clip_ratio/high_mean": 0.0006840990081400378, "clip_ratio/low_mean": 0.00047726943193993066, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011613684073381592, "epoch": 10.578425655976677, "grad_norm": 0.49139463901519775, "learning_rate": 1e-06, "loss": -0.0475, "step": 1102 }, { "clip_ratio/high_max": 0.0016541470977244899, "clip_ratio/high_mean": 0.0006117263255873695, "clip_ratio/low_mean": 0.00042731165058285114, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010390379648015369, "epoch": 10.587755102040816, "grad_norm": 0.21816465258598328, "learning_rate": 1e-06, "loss": -0.0191, "step": 1103 }, { "clip_ratio/high_max": 0.0017132332650362514, "clip_ratio/high_mean": 0.0006391222013917286, "clip_ratio/low_mean": 0.00039702067215330317, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001036142877637758, "epoch": 10.597084548104956, "grad_norm": 0.1819458305835724, "learning_rate": 1e-06, "loss": -0.0761, "step": 1104 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1121651785714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 4062.0, "completions/mean_length": 930.4992065429688, "completions/mean_terminated_length": 530.5836181640625, "completions/min_length": 89.0, "completions/min_terminated_length": 89.0, "epoch": 10.606413994169095, "grad_norm": 0.23714500665664673, "learning_rate": 1e-06, "loss": -0.0278, "num_tokens": 630550704.0, "reward": 0.6835938096046448, "reward_std": 0.1386522501707077, "rewards/simpleverify_reward/mean": 0.68359375, "rewards/simpleverify_reward/std": 0.46513837575912476, "step": 1105 }, { "clip_ratio/high_max": 0.002079588550259359, "clip_ratio/high_mean": 0.000724632375749934, "clip_ratio/low_mean": 0.0003017038227426383, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001026336218274082, "epoch": 10.615743440233237, "grad_norm": 0.21084706485271454, "learning_rate": 1e-06, "loss": -0.0838, "step": 1106 }, { "clip_ratio/high_max": 0.0025324898815597408, "clip_ratio/high_mean": 0.0008938673563534394, "clip_ratio/low_mean": 0.0003434873633523239, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001237354754266562, "epoch": 10.625072886297376, "grad_norm": 0.6478009223937988, "learning_rate": 1e-06, "loss": -0.0608, "step": 1107 }, { "clip_ratio/high_max": 0.0019812576138065197, "clip_ratio/high_mean": 0.0007094501343090087, "clip_ratio/low_mean": 0.00043829474088852294, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011477448933874257, "epoch": 10.634402332361516, "grad_norm": 0.19106754660606384, "learning_rate": 1e-06, "loss": -0.0404, "step": 1108 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1121651785714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3625.0, "completions/mean_length": 945.5851440429688, "completions/mean_terminated_length": 547.575439453125, "completions/min_length": 106.0, "completions/min_terminated_length": 106.0, "epoch": 10.643731778425655, "grad_norm": 0.21422149240970612, "learning_rate": 1e-06, "loss": -0.0541, "num_tokens": 632647329.0, "reward": 0.6777344346046448, "reward_std": 0.1235387921333313, "rewards/simpleverify_reward/mean": 0.677734375, "rewards/simpleverify_reward/std": 0.46740928292274475, "step": 1109 }, { "clip_ratio/high_max": 0.0017790565798350144, "clip_ratio/high_mean": 0.0006647348400292685, "clip_ratio/low_mean": 0.0004041415045321628, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010688763541111257, "epoch": 10.653061224489797, "grad_norm": 13.308205604553223, "learning_rate": 1e-06, "loss": -0.0441, "step": 1110 }, { "clip_ratio/high_max": 0.001662448117713211, "clip_ratio/high_mean": 0.0006773781760784914, "clip_ratio/low_mean": 0.0002969532642964623, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009743314112711232, "epoch": 10.662390670553936, "grad_norm": 0.19844625890254974, "learning_rate": 1e-06, "loss": -0.0543, "step": 1111 }, { "clip_ratio/high_max": 0.002335883713385556, "clip_ratio/high_mean": 0.0008139452402247116, "clip_ratio/low_mean": 0.00024156338122338639, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010555086355452659, "epoch": 10.671720116618076, "grad_norm": 0.19574064016342163, "learning_rate": 1e-06, "loss": -0.0563, "step": 1112 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.10546875, "completions/max_length": 4096.0, "completions/max_terminated_length": 4043.0, "completions/mean_length": 901.3499145507812, "completions/mean_terminated_length": 524.6880493164062, "completions/min_length": 125.0, "completions/min_terminated_length": 125.0, "epoch": 10.681049562682215, "grad_norm": 0.2548856735229492, "learning_rate": 1e-06, "loss": -0.0676, "num_tokens": 634688759.0, "reward": 0.6713169813156128, "reward_std": 0.12482420355081558, "rewards/simpleverify_reward/mean": 0.6713169813156128, "rewards/simpleverify_reward/std": 0.46980005502700806, "step": 1113 }, { "clip_ratio/high_max": 0.001909071364934789, "clip_ratio/high_mean": 0.0006688388621114427, "clip_ratio/low_mean": 0.00037324181016629154, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010420806484034983, "epoch": 10.690379008746355, "grad_norm": 0.20548781752586365, "learning_rate": 1e-06, "loss": -0.0537, "step": 1114 }, { "clip_ratio/high_max": 0.001730348521959968, "clip_ratio/high_mean": 0.0005997706539346837, "clip_ratio/low_mean": 0.00034001357380475383, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.000939784207730554, "epoch": 10.699708454810496, "grad_norm": 0.2911704480648041, "learning_rate": 1e-06, "loss": -0.08, "step": 1115 }, { "clip_ratio/high_max": 0.0017525884868518915, "clip_ratio/high_mean": 0.0005882920222575194, "clip_ratio/low_mean": 0.0004579514325087075, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010462434856890468, "epoch": 10.709037900874636, "grad_norm": 0.2061307579278946, "learning_rate": 1e-06, "loss": -0.0262, "step": 1116 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1082589285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3907.0, "completions/mean_length": 951.1861572265625, "completions/mean_terminated_length": 569.4002075195312, "completions/min_length": 114.0, "completions/min_terminated_length": 114.0, "epoch": 10.718367346938775, "grad_norm": 0.22230473160743713, "learning_rate": 1e-06, "loss": -0.0738, "num_tokens": 636860738.0, "reward": 0.6727120876312256, "reward_std": 0.14487037062644958, "rewards/simpleverify_reward/mean": 0.6727120280265808, "rewards/simpleverify_reward/std": 0.46928882598876953, "step": 1117 }, { "clip_ratio/high_max": 0.0018886330472014379, "clip_ratio/high_mean": 0.0006578785096280626, "clip_ratio/low_mean": 0.0003701359228216461, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010280144269927405, "epoch": 10.727696793002915, "grad_norm": 0.2045716792345047, "learning_rate": 1e-06, "loss": -0.0311, "step": 1118 }, { "clip_ratio/high_max": 0.001970694796909811, "clip_ratio/high_mean": 0.0006845843572591548, "clip_ratio/low_mean": 0.0005025309442316939, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011871152964886278, "epoch": 10.737026239067056, "grad_norm": 0.19867418706417084, "learning_rate": 1e-06, "loss": -0.0637, "step": 1119 }, { "clip_ratio/high_max": 0.0018884613909904147, "clip_ratio/high_mean": 0.0006933932077117788, "clip_ratio/low_mean": 0.00044236291910237924, "clip_ratio/low_min": 2.991862129420042e-05, "clip_ratio/region_mean": 0.0011357561179465847, "epoch": 10.746355685131196, "grad_norm": 0.2314470410346985, "learning_rate": 1e-06, "loss": -0.0557, "step": 1120 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1183035714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3922.0, "completions/mean_length": 970.9386596679688, "completions/mean_terminated_length": 551.6265869140625, "completions/min_length": 137.0, "completions/min_terminated_length": 137.0, "epoch": 10.755685131195335, "grad_norm": 0.25781917572021484, "learning_rate": 1e-06, "loss": -0.0181, "num_tokens": 638968134.0, "reward": 0.6517857313156128, "reward_std": 0.1375451385974884, "rewards/simpleverify_reward/mean": 0.6517857313156128, "rewards/simpleverify_reward/std": 0.47647082805633545, "step": 1121 }, { "clip_ratio/high_max": 0.00195054363575764, "clip_ratio/high_mean": 0.0008022249712666962, "clip_ratio/low_mean": 0.000305215229445821, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011074401973019121, "epoch": 10.765014577259475, "grad_norm": 0.19178429245948792, "learning_rate": 1e-06, "loss": -0.0873, "step": 1122 }, { "clip_ratio/high_max": 0.0019126111947116442, "clip_ratio/high_mean": 0.000665628889692016, "clip_ratio/low_mean": 0.00038178192926352494, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010474108239577617, "epoch": 10.774344023323614, "grad_norm": 0.2289872169494629, "learning_rate": 1e-06, "loss": -0.022, "step": 1123 }, { "clip_ratio/high_max": 0.0016885034674487542, "clip_ratio/high_mean": 0.0006636865236941958, "clip_ratio/low_mean": 0.00043145664312760346, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010951431759167463, "epoch": 10.783673469387756, "grad_norm": 0.30223163962364197, "learning_rate": 1e-06, "loss": -0.0365, "step": 1124 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1096540178571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 4082.0, "completions/mean_length": 931.3516235351562, "completions/mean_terminated_length": 541.5969848632812, "completions/min_length": 119.0, "completions/min_terminated_length": 119.0, "epoch": 10.793002915451895, "grad_norm": 0.23876233398914337, "learning_rate": 1e-06, "loss": -0.038, "num_tokens": 641062842.0, "reward": 0.6576451063156128, "reward_std": 0.13844870030879974, "rewards/simpleverify_reward/mean": 0.6576451063156128, "rewards/simpleverify_reward/std": 0.474563866853714, "step": 1125 }, { "clip_ratio/high_max": 0.0018462353727954905, "clip_ratio/high_mean": 0.0007324269372475101, "clip_ratio/low_mean": 0.0003612125274230493, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010936394755844958, "epoch": 10.802332361516035, "grad_norm": 0.3835388422012329, "learning_rate": 1e-06, "loss": -0.0662, "step": 1126 }, { "clip_ratio/high_max": 0.002207749232184142, "clip_ratio/high_mean": 0.0008236218545789598, "clip_ratio/low_mean": 0.00041035387766896747, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001233975744980853, "epoch": 10.811661807580174, "grad_norm": 0.34456756711006165, "learning_rate": 1e-06, "loss": -0.0195, "step": 1127 }, { "clip_ratio/high_max": 0.0020284763086237945, "clip_ratio/high_mean": 0.0007700658698013285, "clip_ratio/low_mean": 0.0004911719724987051, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012612378450285178, "epoch": 10.820991253644316, "grad_norm": 0.22815994918346405, "learning_rate": 1e-06, "loss": -0.0308, "step": 1128 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1010044642857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 4038.0, "completions/mean_length": 888.5276489257812, "completions/mean_terminated_length": 528.1598510742188, "completions/min_length": 100.0, "completions/min_terminated_length": 100.0, "epoch": 10.830320699708455, "grad_norm": 0.2458016574382782, "learning_rate": 1e-06, "loss": -0.0709, "num_tokens": 643113205.0, "reward": 0.6830357313156128, "reward_std": 0.15252605080604553, "rewards/simpleverify_reward/mean": 0.6830357313156128, "rewards/simpleverify_reward/std": 0.46535831689834595, "step": 1129 }, { "clip_ratio/high_max": 0.001884718396468088, "clip_ratio/high_mean": 0.0007275825337274, "clip_ratio/low_mean": 0.00035965480583399767, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010872373240999877, "epoch": 10.839650145772595, "grad_norm": 0.22774936258792877, "learning_rate": 1e-06, "loss": -0.0834, "step": 1130 }, { "clip_ratio/high_max": 0.002036425026744837, "clip_ratio/high_mean": 0.0007056854883558117, "clip_ratio/low_mean": 0.0004546713105355593, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011603567872953136, "epoch": 10.848979591836734, "grad_norm": 0.28384846448898315, "learning_rate": 1e-06, "loss": -0.0263, "step": 1131 }, { "clip_ratio/high_max": 0.00209779938813881, "clip_ratio/high_mean": 0.0008959144934124197, "clip_ratio/low_mean": 0.0004379177898954367, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013338322860363405, "epoch": 10.858309037900874, "grad_norm": 0.21785777807235718, "learning_rate": 1e-06, "loss": -0.0692, "step": 1132 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1060267857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3644.0, "completions/mean_length": 912.634521484375, "completions/mean_terminated_length": 535.081787109375, "completions/min_length": 129.0, "completions/min_terminated_length": 129.0, "epoch": 10.867638483965015, "grad_norm": 0.22755523025989532, "learning_rate": 1e-06, "loss": -0.0611, "num_tokens": 645173311.0, "reward": 0.6944754719734192, "reward_std": 0.12248320877552032, "rewards/simpleverify_reward/mean": 0.6944754719734192, "rewards/simpleverify_reward/std": 0.46069350838661194, "step": 1133 }, { "clip_ratio/high_max": 0.0014948458556318656, "clip_ratio/high_mean": 0.0005626316433335887, "clip_ratio/low_mean": 0.0004491192648856668, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010117508718394674, "epoch": 10.876967930029155, "grad_norm": 0.21762646734714508, "learning_rate": 1e-06, "loss": -0.0063, "step": 1134 }, { "clip_ratio/high_max": 0.0016774241594248451, "clip_ratio/high_mean": 0.0006283397378865629, "clip_ratio/low_mean": 0.0003375227101969358, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009658624585426878, "epoch": 10.886297376093294, "grad_norm": 1.501908302307129, "learning_rate": 1e-06, "loss": -0.0428, "step": 1135 }, { "clip_ratio/high_max": 0.0017362074177071918, "clip_ratio/high_mean": 0.0006436579278670251, "clip_ratio/low_mean": 0.00029669874220417114, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009403566727996804, "epoch": 10.895626822157434, "grad_norm": 0.36548808217048645, "learning_rate": 1e-06, "loss": -0.0673, "step": 1136 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1090959821428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3662.0, "completions/mean_length": 925.687255859375, "completions/mean_terminated_length": 537.4653930664062, "completions/min_length": 141.0, "completions/min_terminated_length": 141.0, "epoch": 10.904956268221575, "grad_norm": 0.23635277152061462, "learning_rate": 1e-06, "loss": -0.0715, "num_tokens": 647246382.0, "reward": 0.6774553656578064, "reward_std": 0.14183902740478516, "rewards/simpleverify_reward/mean": 0.6774553656578064, "rewards/simpleverify_reward/std": 0.46751531958580017, "step": 1137 }, { "clip_ratio/high_max": 0.0018572387707536109, "clip_ratio/high_mean": 0.0007617279788973974, "clip_ratio/low_mean": 0.0005211290290390025, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012828570070269052, "epoch": 10.914285714285715, "grad_norm": 0.3456311821937561, "learning_rate": 1e-06, "loss": -0.0375, "step": 1138 }, { "clip_ratio/high_max": 0.002557222222094424, "clip_ratio/high_mean": 0.0009409029735252261, "clip_ratio/low_mean": 0.0003902337361978425, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013311367183632683, "epoch": 10.923615160349854, "grad_norm": 0.1941908895969391, "learning_rate": 1e-06, "loss": -0.0886, "step": 1139 }, { "clip_ratio/high_max": 0.0021762069409305695, "clip_ratio/high_mean": 0.0008063262939685956, "clip_ratio/low_mean": 0.0006432084010157268, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014495347168121953, "epoch": 10.932944606413994, "grad_norm": 0.33589014410972595, "learning_rate": 1e-06, "loss": 0.0192, "step": 1140 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1110491071428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3732.0, "completions/mean_length": 975.4344482421875, "completions/mean_terminated_length": 585.6085815429688, "completions/min_length": 84.0, "completions/min_terminated_length": 84.0, "epoch": 10.942274052478133, "grad_norm": 0.1906060427427292, "learning_rate": 1e-06, "loss": -0.014, "num_tokens": 649465883.0, "reward": 0.6233259439468384, "reward_std": 0.13947807252407074, "rewards/simpleverify_reward/mean": 0.6233258843421936, "rewards/simpleverify_reward/std": 0.48461970686912537, "step": 1141 }, { "clip_ratio/high_max": 0.001770864357240498, "clip_ratio/high_mean": 0.0007027601823210716, "clip_ratio/low_mean": 0.000412215691540041, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011149758374813246, "epoch": 10.951603498542275, "grad_norm": 0.23571066558361053, "learning_rate": 1e-06, "loss": -0.0642, "step": 1142 }, { "clip_ratio/high_max": 0.0016884820324776229, "clip_ratio/high_mean": 0.0006424428775062552, "clip_ratio/low_mean": 0.0004550187732093036, "clip_ratio/low_min": 1.605445686436724e-05, "clip_ratio/region_mean": 0.0010974616270686965, "epoch": 10.960932944606414, "grad_norm": 0.2323278784751892, "learning_rate": 1e-06, "loss": -0.0896, "step": 1143 }, { "clip_ratio/high_max": 0.002017040882492438, "clip_ratio/high_mean": 0.0006456600658566458, "clip_ratio/low_mean": 0.0005117703285577591, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011574303898669314, "epoch": 10.970262390670554, "grad_norm": 13.536758422851562, "learning_rate": 1e-06, "loss": -0.0347, "step": 1144 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.12109375, "completions/max_length": 4096.0, "completions/max_terminated_length": 4008.0, "completions/mean_length": 998.19873046875, "completions/mean_terminated_length": 571.3904418945312, "completions/min_length": 139.0, "completions/min_terminated_length": 139.0, "epoch": 11.00932944606414, "grad_norm": 0.2660578787326813, "learning_rate": 1e-06, "loss": -0.0538, "num_tokens": 651618451.0, "reward": 0.6623884439468384, "reward_std": 0.14298579096794128, "rewards/simpleverify_reward/mean": 0.6623883843421936, "rewards/simpleverify_reward/std": 0.47296130657196045, "step": 1145 }, { "clip_ratio/high_max": 0.0019262420246377587, "clip_ratio/high_mean": 0.0006326842794805998, "clip_ratio/low_mean": 0.00034974225241057866, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009824265471252147, "epoch": 11.018658892128279, "grad_norm": 0.19772863388061523, "learning_rate": 1e-06, "loss": -0.0461, "step": 1146 }, { "clip_ratio/high_max": 0.002086080094159115, "clip_ratio/high_mean": 0.0007780600171827246, "clip_ratio/low_mean": 0.00036154304029878404, "clip_ratio/low_min": 9.568278983351775e-06, "clip_ratio/region_mean": 0.0011396030131436419, "epoch": 11.02798833819242, "grad_norm": 0.25732144713401794, "learning_rate": 1e-06, "loss": -0.0519, "step": 1147 }, { "clip_ratio/high_max": 0.002057760073512327, "clip_ratio/high_mean": 0.0007742320758552523, "clip_ratio/low_mean": 0.0003781663344852859, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011523984012455912, "epoch": 11.03731778425656, "grad_norm": 0.22632527351379395, "learning_rate": 1e-06, "loss": -0.0574, "step": 1148 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1010044642857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 4048.0, "completions/mean_length": 916.5142822265625, "completions/mean_terminated_length": 559.2908325195312, "completions/min_length": 123.0, "completions/min_terminated_length": 123.0, "epoch": 11.0466472303207, "grad_norm": 0.25528112053871155, "learning_rate": 1e-06, "loss": -0.0472, "num_tokens": 653781822.0, "reward": 0.6648995876312256, "reward_std": 0.1497739851474762, "rewards/simpleverify_reward/mean": 0.6648995280265808, "rewards/simpleverify_reward/std": 0.4720914363861084, "step": 1149 }, { "clip_ratio/high_max": 0.002316030873771524, "clip_ratio/high_mean": 0.0007730625839030836, "clip_ratio/low_mean": 0.00040950472612166777, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001182567299110815, "epoch": 11.055976676384839, "grad_norm": 0.2386631816625595, "learning_rate": 1e-06, "loss": -0.061, "step": 1150 }, { "clip_ratio/high_max": 0.0018812222697306424, "clip_ratio/high_mean": 0.0007851537175156409, "clip_ratio/low_mean": 0.0004146012756791606, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011997550063824747, "epoch": 11.06530612244898, "grad_norm": 0.23401418328285217, "learning_rate": 1e-06, "loss": -0.0659, "step": 1151 }, { "clip_ratio/high_max": 0.0018388854296063073, "clip_ratio/high_mean": 0.0007553278610430425, "clip_ratio/low_mean": 0.0003976170351052133, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001152944900240982, "epoch": 11.07463556851312, "grad_norm": 3.5208704471588135, "learning_rate": 1e-06, "loss": -0.0724, "step": 1152 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1127232142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3988.0, "completions/mean_length": 931.86279296875, "completions/mean_terminated_length": 529.8779907226562, "completions/min_length": 109.0, "completions/min_terminated_length": 109.0, "epoch": 11.08396501457726, "grad_norm": 0.18501169979572296, "learning_rate": 1e-06, "loss": -0.0536, "num_tokens": 655830186.0, "reward": 0.6531808376312256, "reward_std": 0.12630003690719604, "rewards/simpleverify_reward/mean": 0.6531807780265808, "rewards/simpleverify_reward/std": 0.4760240316390991, "step": 1153 }, { "clip_ratio/high_max": 0.0015219873057503719, "clip_ratio/high_mean": 0.000563051082281163, "clip_ratio/low_mean": 0.00039648726442464977, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009595383598934859, "epoch": 11.093294460641399, "grad_norm": 0.2915429174900055, "learning_rate": 1e-06, "loss": -0.0421, "step": 1154 }, { "clip_ratio/high_max": 0.0019647673761937767, "clip_ratio/high_mean": 0.0007468629910363234, "clip_ratio/low_mean": 0.0003412500359445403, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010881130037887488, "epoch": 11.102623906705539, "grad_norm": 0.21821662783622742, "learning_rate": 1e-06, "loss": -0.0606, "step": 1155 }, { "clip_ratio/high_max": 0.001756072857460822, "clip_ratio/high_mean": 0.0005888136292924173, "clip_ratio/low_mean": 0.00046800334439467406, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001056816987329512, "epoch": 11.11195335276968, "grad_norm": 145.55953979492188, "learning_rate": 1e-06, "loss": -0.0468, "step": 1156 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1314174107142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3731.0, "completions/mean_length": 1017.66357421875, "completions/mean_terminated_length": 551.9081420898438, "completions/min_length": 103.0, "completions/min_terminated_length": 103.0, "epoch": 11.12128279883382, "grad_norm": 0.21798564493656158, "learning_rate": 1e-06, "loss": -0.0747, "num_tokens": 657907180.0, "reward": 0.6487165689468384, "reward_std": 0.13007645308971405, "rewards/simpleverify_reward/mean": 0.6487165093421936, "rewards/simpleverify_reward/std": 0.47743797302246094, "step": 1157 }, { "clip_ratio/high_max": 0.001837852549215313, "clip_ratio/high_mean": 0.0007148277909436729, "clip_ratio/low_mean": 0.0002415750041109277, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009564027895976324, "epoch": 11.130612244897959, "grad_norm": 0.19328831136226654, "learning_rate": 1e-06, "loss": -0.0705, "step": 1158 }, { "clip_ratio/high_max": 0.0018421797394694295, "clip_ratio/high_mean": 0.0006077241819184565, "clip_ratio/low_mean": 0.00042473405937926145, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010324582690373063, "epoch": 11.139941690962099, "grad_norm": 0.25739210844039917, "learning_rate": 1e-06, "loss": 0.0035, "step": 1159 }, { "clip_ratio/high_max": 0.002074034928227775, "clip_ratio/high_mean": 0.0006794317996536847, "clip_ratio/low_mean": 0.0003764828320527158, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010559146758168936, "epoch": 11.14927113702624, "grad_norm": 0.21593590080738068, "learning_rate": 1e-06, "loss": -0.0561, "step": 1160 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1088169642857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 4051.0, "completions/mean_length": 900.2500610351562, "completions/mean_terminated_length": 510.0363464355469, "completions/min_length": 101.0, "completions/min_terminated_length": 101.0, "epoch": 11.15860058309038, "grad_norm": 0.22161923348903656, "learning_rate": 1e-06, "loss": -0.0921, "num_tokens": 659888100.0, "reward": 0.691964328289032, "reward_std": 0.14042788743972778, "rewards/simpleverify_reward/mean": 0.6919642686843872, "rewards/simpleverify_reward/std": 0.46174582839012146, "step": 1161 }, { "clip_ratio/high_max": 0.0017984925761993509, "clip_ratio/high_mean": 0.0007200952022685669, "clip_ratio/low_mean": 0.0004505320725911588, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011706272707669996, "epoch": 11.167930029154519, "grad_norm": 0.2710535228252411, "learning_rate": 1e-06, "loss": -0.0323, "step": 1162 }, { "clip_ratio/high_max": 0.002120373053912772, "clip_ratio/high_mean": 0.0007169705786509439, "clip_ratio/low_mean": 0.0003894864532867359, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011064570280723274, "epoch": 11.177259475218658, "grad_norm": 0.21402309834957123, "learning_rate": 1e-06, "loss": -0.0864, "step": 1163 }, { "clip_ratio/high_max": 0.0017268488591071218, "clip_ratio/high_mean": 0.0006394960873876698, "clip_ratio/low_mean": 0.00042368770118628163, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010631837940309197, "epoch": 11.186588921282798, "grad_norm": 0.26626667380332947, "learning_rate": 1e-06, "loss": -0.0632, "step": 1164 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1079799107142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 4064.0, "completions/mean_length": 923.8446044921875, "completions/mean_terminated_length": 539.85205078125, "completions/min_length": 93.0, "completions/min_terminated_length": 93.0, "epoch": 11.19591836734694, "grad_norm": 0.2382846623659134, "learning_rate": 1e-06, "loss": -0.0172, "num_tokens": 661972039.0, "reward": 0.6727120876312256, "reward_std": 0.14032761752605438, "rewards/simpleverify_reward/mean": 0.6727120280265808, "rewards/simpleverify_reward/std": 0.46928879618644714, "step": 1165 }, { "clip_ratio/high_max": 0.002044300257693976, "clip_ratio/high_mean": 0.0007501553100155434, "clip_ratio/low_mean": 0.0003330205827296595, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010831758900167188, "epoch": 11.205247813411079, "grad_norm": 0.20358645915985107, "learning_rate": 1e-06, "loss": -0.0786, "step": 1166 }, { "clip_ratio/high_max": 0.0015472250925085973, "clip_ratio/high_mean": 0.0006088471764087444, "clip_ratio/low_mean": 0.00039900146248328383, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010078486156999134, "epoch": 11.214577259475218, "grad_norm": 0.21963578462600708, "learning_rate": 1e-06, "loss": -0.041, "step": 1167 }, { "clip_ratio/high_max": 0.0023490974126616493, "clip_ratio/high_mean": 0.0008148546075972263, "clip_ratio/low_mean": 0.0004063307587784948, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012211853827466257, "epoch": 11.223906705539358, "grad_norm": 0.21174179017543793, "learning_rate": 1e-06, "loss": -0.0568, "step": 1168 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1163504464285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3668.0, "completions/mean_length": 947.4656982421875, "completions/mean_terminated_length": 532.8970947265625, "completions/min_length": 124.0, "completions/min_terminated_length": 124.0, "epoch": 11.2332361516035, "grad_norm": 0.2265067994594574, "learning_rate": 1e-06, "loss": -0.0781, "num_tokens": 664035956.0, "reward": 0.693917453289032, "reward_std": 0.13215479254722595, "rewards/simpleverify_reward/mean": 0.6939173936843872, "rewards/simpleverify_reward/std": 0.460928738117218, "step": 1169 }, { "clip_ratio/high_max": 0.002076767530525103, "clip_ratio/high_mean": 0.0007552662091256934, "clip_ratio/low_mean": 0.00037307001593944733, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011283362218819093, "epoch": 11.242565597667639, "grad_norm": 0.2170780897140503, "learning_rate": 1e-06, "loss": -0.0416, "step": 1170 }, { "clip_ratio/high_max": 0.0018340502501814626, "clip_ratio/high_mean": 0.0006826469907537103, "clip_ratio/low_mean": 0.0002866722898033913, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009693192951090168, "epoch": 11.251895043731778, "grad_norm": 0.2131679654121399, "learning_rate": 1e-06, "loss": -0.0781, "step": 1171 }, { "clip_ratio/high_max": 0.0022895872352819424, "clip_ratio/high_mean": 0.000757409702600853, "clip_ratio/low_mean": 0.00041467531127636903, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011720850052370224, "epoch": 11.261224489795918, "grad_norm": 0.22461941838264465, "learning_rate": 1e-06, "loss": -0.0416, "step": 1172 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1135602678571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3305.0, "completions/mean_length": 904.8206176757812, "completions/mean_terminated_length": 496.00408935546875, "completions/min_length": 78.0, "completions/min_terminated_length": 78.0, "epoch": 11.270553935860057, "grad_norm": 0.23261193931102753, "learning_rate": 1e-06, "loss": -0.0714, "num_tokens": 665969481.0, "reward": 0.6713169813156128, "reward_std": 0.14235344529151917, "rewards/simpleverify_reward/mean": 0.6713169813156128, "rewards/simpleverify_reward/std": 0.46980005502700806, "step": 1173 }, { "clip_ratio/high_max": 0.001884893801616272, "clip_ratio/high_mean": 0.0008093069482129067, "clip_ratio/low_mean": 0.0004758702939398063, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012851772480644286, "epoch": 11.279883381924199, "grad_norm": 0.24927107989788055, "learning_rate": 1e-06, "loss": -0.0767, "step": 1174 }, { "clip_ratio/high_max": 0.002178678427299019, "clip_ratio/high_mean": 0.0007701385902691982, "clip_ratio/low_mean": 0.0004794289449137068, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012495675546233542, "epoch": 11.289212827988338, "grad_norm": 0.22305598855018616, "learning_rate": 1e-06, "loss": -0.0583, "step": 1175 }, { "clip_ratio/high_max": 0.001627208745048847, "clip_ratio/high_mean": 0.0006353894395942916, "clip_ratio/low_mean": 0.0004066005449203658, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010419899845146574, "epoch": 11.298542274052478, "grad_norm": 0.5935447812080383, "learning_rate": 1e-06, "loss": -0.0362, "step": 1176 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1071428571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3997.0, "completions/mean_length": 902.5558471679688, "completions/mean_terminated_length": 519.3424682617188, "completions/min_length": 141.0, "completions/min_terminated_length": 141.0, "epoch": 11.307871720116617, "grad_norm": 0.2550044059753418, "learning_rate": 1e-06, "loss": -0.0418, "num_tokens": 667974681.0, "reward": 0.695870578289032, "reward_std": 0.12608478963375092, "rewards/simpleverify_reward/mean": 0.6958705186843872, "rewards/simpleverify_reward/std": 0.4601019620895386, "step": 1177 }, { "clip_ratio/high_max": 0.0022932347710593604, "clip_ratio/high_mean": 0.0007057837046886561, "clip_ratio/low_mean": 0.0004975666474820173, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012033503226120956, "epoch": 11.317201166180759, "grad_norm": 0.3246391713619232, "learning_rate": 1e-06, "loss": -0.024, "step": 1178 }, { "clip_ratio/high_max": 0.0018343565679970197, "clip_ratio/high_mean": 0.0007236095389089314, "clip_ratio/low_mean": 0.00038269146853053826, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011063010060752276, "epoch": 11.326530612244898, "grad_norm": 0.3980807363986969, "learning_rate": 1e-06, "loss": -0.0524, "step": 1179 }, { "clip_ratio/high_max": 0.0027491214095789474, "clip_ratio/high_mean": 0.0008531142557330895, "clip_ratio/low_mean": 0.00044170410501465085, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012948183903063182, "epoch": 11.335860058309038, "grad_norm": 0.22702981531620026, "learning_rate": 1e-06, "loss": -0.0456, "step": 1180 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.123046875, "completions/max_length": 4096.0, "completions/max_terminated_length": 3892.0, "completions/mean_length": 977.6222534179688, "completions/mean_terminated_length": 540.0769653320312, "completions/min_length": 115.0, "completions/min_terminated_length": 115.0, "epoch": 11.345189504373177, "grad_norm": 0.21815049648284912, "learning_rate": 1e-06, "loss": -0.0611, "num_tokens": 670036887.0, "reward": 0.6565290689468384, "reward_std": 0.14386743307113647, "rewards/simpleverify_reward/mean": 0.6565290093421936, "rewards/simpleverify_reward/std": 0.4749332368373871, "step": 1181 }, { "clip_ratio/high_max": 0.0023980786791071296, "clip_ratio/high_mean": 0.0008312934460263932, "clip_ratio/low_mean": 0.000424421325988078, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012557147529150825, "epoch": 11.354518950437317, "grad_norm": 0.24279053509235382, "learning_rate": 1e-06, "loss": -0.0502, "step": 1182 }, { "clip_ratio/high_max": 0.0019682301062857732, "clip_ratio/high_mean": 0.0007944326534925494, "clip_ratio/low_mean": 0.0003296024624432903, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011240351268497761, "epoch": 11.363848396501458, "grad_norm": 0.20387424528598785, "learning_rate": 1e-06, "loss": -0.0864, "step": 1183 }, { "clip_ratio/high_max": 0.0019713773508556187, "clip_ratio/high_mean": 0.0007444394077538163, "clip_ratio/low_mean": 0.000545293660252355, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012897330743726343, "epoch": 11.373177842565598, "grad_norm": 0.2980876564979553, "learning_rate": 1e-06, "loss": -0.0429, "step": 1184 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1261160714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3772.0, "completions/mean_length": 977.99755859375, "completions/mean_terminated_length": 528.017578125, "completions/min_length": 130.0, "completions/min_terminated_length": 130.0, "epoch": 11.382507288629737, "grad_norm": 0.21087335050106049, "learning_rate": 1e-06, "loss": -0.0206, "num_tokens": 672045982.0, "reward": 0.660714328289032, "reward_std": 0.13850362598896027, "rewards/simpleverify_reward/mean": 0.6607142686843872, "rewards/simpleverify_reward/std": 0.4735329747200012, "step": 1185 }, { "clip_ratio/high_max": 0.0019762677293329034, "clip_ratio/high_mean": 0.0007292888185475022, "clip_ratio/low_mean": 0.000363272903541656, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010925617170869373, "epoch": 11.391836734693877, "grad_norm": 0.24355439841747284, "learning_rate": 1e-06, "loss": -0.07, "step": 1186 }, { "clip_ratio/high_max": 0.0018814944487530738, "clip_ratio/high_mean": 0.00064841027779039, "clip_ratio/low_mean": 0.00028354379196571244, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009319540622527711, "epoch": 11.401166180758018, "grad_norm": 0.2341708242893219, "learning_rate": 1e-06, "loss": -0.0729, "step": 1187 }, { "clip_ratio/high_max": 0.0018693080201046541, "clip_ratio/high_mean": 0.0006966649925743695, "clip_ratio/low_mean": 0.0003979288067057496, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010945938156510238, "epoch": 11.410495626822158, "grad_norm": 0.24351142346858978, "learning_rate": 1e-06, "loss": -0.0513, "step": 1188 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1157924107142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3856.0, "completions/mean_length": 958.4063110351562, "completions/mean_terminated_length": 547.5191040039062, "completions/min_length": 123.0, "completions/min_terminated_length": 123.0, "epoch": 11.419825072886297, "grad_norm": 0.25501641631126404, "learning_rate": 1e-06, "loss": -0.0977, "num_tokens": 674124670.0, "reward": 0.672433078289032, "reward_std": 0.13786324858665466, "rewards/simpleverify_reward/mean": 0.6724330186843872, "rewards/simpleverify_reward/std": 0.469391405582428, "step": 1189 }, { "clip_ratio/high_max": 0.0015676323346269783, "clip_ratio/high_mean": 0.0006958685544304899, "clip_ratio/low_mean": 0.00039667392854880745, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010925424830929842, "epoch": 11.429154518950437, "grad_norm": 0.29325780272483826, "learning_rate": 1e-06, "loss": -0.0211, "step": 1190 }, { "clip_ratio/high_max": 0.00194908058620058, "clip_ratio/high_mean": 0.0007372027575911488, "clip_ratio/low_mean": 0.000464748182821495, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001201950912218308, "epoch": 11.438483965014576, "grad_norm": 0.2387930154800415, "learning_rate": 1e-06, "loss": -0.0379, "step": 1191 }, { "clip_ratio/high_max": 0.0015870848401391413, "clip_ratio/high_mean": 0.0006174709333208739, "clip_ratio/low_mean": 0.0005806199869766715, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011980909148405772, "epoch": 11.447813411078718, "grad_norm": 0.26432734727859497, "learning_rate": 1e-06, "loss": -0.0246, "step": 1192 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.126953125, "completions/max_length": 4096.0, "completions/max_terminated_length": 3902.0, "completions/mean_length": 959.9844360351562, "completions/mean_terminated_length": 503.9642333984375, "completions/min_length": 132.0, "completions/min_terminated_length": 132.0, "epoch": 11.457142857142857, "grad_norm": 0.2390488088130951, "learning_rate": 1e-06, "loss": -0.0471, "num_tokens": 676016670.0, "reward": 0.6741071939468384, "reward_std": 0.1323675811290741, "rewards/simpleverify_reward/mean": 0.6741071343421936, "rewards/simpleverify_reward/std": 0.46877285838127136, "step": 1193 }, { "clip_ratio/high_max": 0.0018411080491205212, "clip_ratio/high_mean": 0.0006838567041995702, "clip_ratio/low_mean": 0.00037486863084268407, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010587253182166023, "epoch": 11.466472303206997, "grad_norm": 1.060755729675293, "learning_rate": 1e-06, "loss": -0.0662, "step": 1194 }, { "clip_ratio/high_max": 0.0016800486628198996, "clip_ratio/high_mean": 0.0006654414173681289, "clip_ratio/low_mean": 0.0004042981095153664, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010697395155148115, "epoch": 11.475801749271136, "grad_norm": 0.2166784256696701, "learning_rate": 1e-06, "loss": -0.0329, "step": 1195 }, { "clip_ratio/high_max": 0.001569077547173947, "clip_ratio/high_mean": 0.0005932325866524479, "clip_ratio/low_mean": 0.00043117350594457093, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010244060831610113, "epoch": 11.485131195335278, "grad_norm": 0.2661961019039154, "learning_rate": 1e-06, "loss": -0.0594, "step": 1196 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1046316964285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3972.0, "completions/mean_length": 906.81396484375, "completions/mean_terminated_length": 534.1293334960938, "completions/min_length": 107.0, "completions/min_terminated_length": 107.0, "epoch": 11.494460641399417, "grad_norm": 0.2402757853269577, "learning_rate": 1e-06, "loss": -0.0851, "num_tokens": 678089547.0, "reward": 0.662667453289032, "reward_std": 0.14366576075553894, "rewards/simpleverify_reward/mean": 0.6626673936843872, "rewards/simpleverify_reward/std": 0.47286540269851685, "step": 1197 }, { "clip_ratio/high_max": 0.001849315518484218, "clip_ratio/high_mean": 0.0006886883220431628, "clip_ratio/low_mean": 0.0003595154062168149, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010482037578185555, "epoch": 11.503790087463557, "grad_norm": 0.2320890575647354, "learning_rate": 1e-06, "loss": -0.049, "step": 1198 }, { "clip_ratio/high_max": 0.0017499229179520626, "clip_ratio/high_mean": 0.0007414824704028433, "clip_ratio/low_mean": 0.0004050747825203871, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011465572242741473, "epoch": 11.513119533527696, "grad_norm": 0.2285231202840805, "learning_rate": 1e-06, "loss": -0.0617, "step": 1199 }, { "clip_ratio/high_max": 0.0019740462594199926, "clip_ratio/high_mean": 0.0007934259665489662, "clip_ratio/low_mean": 0.0004467637140805891, "clip_ratio/low_min": 3.165358430123888e-05, "clip_ratio/region_mean": 0.0012401896783558186, "epoch": 11.522448979591836, "grad_norm": 0.24287335574626923, "learning_rate": 1e-06, "loss": -0.0475, "step": 1200 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1330915178571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3818.0, "completions/mean_length": 1033.128662109375, "completions/mean_terminated_length": 562.9034423828125, "completions/min_length": 108.0, "completions/min_terminated_length": 108.0, "epoch": 11.531778425655977, "grad_norm": 0.2578519284725189, "learning_rate": 1e-06, "loss": -0.0953, "num_tokens": 680207376.0, "reward": 0.6590402126312256, "reward_std": 0.13429374992847443, "rewards/simpleverify_reward/mean": 0.6590401530265808, "rewards/simpleverify_reward/std": 0.47409799695014954, "step": 1201 }, { "clip_ratio/high_max": 0.0016841028409544379, "clip_ratio/high_mean": 0.0007443894828611519, "clip_ratio/low_mean": 0.0003967875618400285, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011411770574341062, "epoch": 11.541107871720117, "grad_norm": 0.23179516196250916, "learning_rate": 1e-06, "loss": -0.0718, "step": 1202 }, { "clip_ratio/high_max": 0.0016796643103589304, "clip_ratio/high_mean": 0.0006977123957767617, "clip_ratio/low_mean": 0.0005389084471971728, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012366208175080828, "epoch": 11.550437317784256, "grad_norm": 1.0140125751495361, "learning_rate": 1e-06, "loss": -0.0486, "step": 1203 }, { "clip_ratio/high_max": 0.001794734111172147, "clip_ratio/high_mean": 0.0007042584875307512, "clip_ratio/low_mean": 0.0005693412163054745, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012735997152049094, "epoch": 11.559766763848396, "grad_norm": 0.22755128145217896, "learning_rate": 1e-06, "loss": -0.041, "step": 1204 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1116071428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 4035.0, "completions/mean_length": 928.5910034179688, "completions/mean_terminated_length": 530.6752319335938, "completions/min_length": 116.0, "completions/min_terminated_length": 116.0, "epoch": 11.569096209912537, "grad_norm": 0.22911261022090912, "learning_rate": 1e-06, "loss": -0.0292, "num_tokens": 682272214.0, "reward": 0.6755022406578064, "reward_std": 0.12393363565206528, "rewards/simpleverify_reward/mean": 0.6755022406578064, "rewards/simpleverify_reward/std": 0.4682522118091583, "step": 1205 }, { "clip_ratio/high_max": 0.0020964210780221038, "clip_ratio/high_mean": 0.0007948264928927529, "clip_ratio/low_mean": 0.0003052474385185633, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011000739123119274, "epoch": 11.578425655976677, "grad_norm": 0.21966910362243652, "learning_rate": 1e-06, "loss": -0.0808, "step": 1206 }, { "clip_ratio/high_max": 0.0020548733409668785, "clip_ratio/high_mean": 0.0007264740197570063, "clip_ratio/low_mean": 0.000458666338090552, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011851403542095795, "epoch": 11.587755102040816, "grad_norm": 0.3267291784286499, "learning_rate": 1e-06, "loss": -0.0481, "step": 1207 }, { "clip_ratio/high_max": 0.0017627425368118566, "clip_ratio/high_mean": 0.0006927621689101215, "clip_ratio/low_mean": 0.000563006402899191, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001255768547707703, "epoch": 11.597084548104956, "grad_norm": 0.7842003703117371, "learning_rate": 1e-06, "loss": -0.0295, "step": 1208 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1051897321428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 4012.0, "completions/mean_length": 922.1710815429688, "completions/mean_terminated_length": 549.0704345703125, "completions/min_length": 123.0, "completions/min_terminated_length": 123.0, "epoch": 11.606413994169095, "grad_norm": 0.2007438987493515, "learning_rate": 1e-06, "loss": -0.0568, "num_tokens": 684400003.0, "reward": 0.6780134439468384, "reward_std": 0.13139788806438446, "rewards/simpleverify_reward/mean": 0.6780133843421936, "rewards/simpleverify_reward/std": 0.467303067445755, "step": 1209 }, { "clip_ratio/high_max": 0.0017339064106636215, "clip_ratio/high_mean": 0.0005766396334365709, "clip_ratio/low_mean": 0.00039060537119439687, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009672449978097575, "epoch": 11.615743440233237, "grad_norm": 20.795166015625, "learning_rate": 1e-06, "loss": -0.0436, "step": 1210 }, { "clip_ratio/high_max": 0.001998539752094075, "clip_ratio/high_mean": 0.0006802816442359472, "clip_ratio/low_mean": 0.00032386259499617154, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010041442365036346, "epoch": 11.625072886297376, "grad_norm": 0.2117414027452469, "learning_rate": 1e-06, "loss": -0.0566, "step": 1211 }, { "clip_ratio/high_max": 0.0022375775915861595, "clip_ratio/high_mean": 0.0007431976082443725, "clip_ratio/low_mean": 0.0005393989827098267, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001282596593227936, "epoch": 11.634402332361516, "grad_norm": 0.26438140869140625, "learning_rate": 1e-06, "loss": -0.0394, "step": 1212 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1333705357142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3962.0, "completions/mean_length": 1018.5086669921875, "completions/mean_terminated_length": 544.8960571289062, "completions/min_length": 128.0, "completions/min_terminated_length": 128.0, "epoch": 11.643731778425655, "grad_norm": 0.24019083380699158, "learning_rate": 1e-06, "loss": -0.0712, "num_tokens": 686434674.0, "reward": 0.6693638563156128, "reward_std": 0.1524939090013504, "rewards/simpleverify_reward/mean": 0.6693638563156128, "rewards/simpleverify_reward/std": 0.4705078601837158, "step": 1213 }, { "clip_ratio/high_max": 0.0021831078265677206, "clip_ratio/high_mean": 0.0008550636339350604, "clip_ratio/low_mean": 0.0004700737270013633, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013251373675302602, "epoch": 11.653061224489797, "grad_norm": 0.3391661047935486, "learning_rate": 1e-06, "loss": -0.0803, "step": 1214 }, { "clip_ratio/high_max": 0.0020499260863289237, "clip_ratio/high_mean": 0.0008020752084121341, "clip_ratio/low_mean": 0.000410157361329766, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001212232582474826, "epoch": 11.662390670553936, "grad_norm": 0.23344071209430695, "learning_rate": 1e-06, "loss": -0.0471, "step": 1215 }, { "clip_ratio/high_max": 0.002294759702635929, "clip_ratio/high_mean": 0.000910094768187264, "clip_ratio/low_mean": 0.0006070247018215014, "clip_ratio/low_min": 2.2045855075703003e-05, "clip_ratio/region_mean": 0.0015171194754657336, "epoch": 11.671720116618076, "grad_norm": 0.2733834981918335, "learning_rate": 1e-06, "loss": -0.0748, "step": 1216 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1185825892857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 4050.0, "completions/mean_length": 971.32373046875, "completions/mean_terminated_length": 550.9414672851562, "completions/min_length": 135.0, "completions/min_terminated_length": 135.0, "epoch": 11.681049562682215, "grad_norm": 0.24143916368484497, "learning_rate": 1e-06, "loss": -0.0586, "num_tokens": 688542130.0, "reward": 0.6495535969734192, "reward_std": 0.13918136060237885, "rewards/simpleverify_reward/mean": 0.6495535969734192, "rewards/simpleverify_reward/std": 0.47717633843421936, "step": 1217 }, { "clip_ratio/high_max": 0.0015135040957829915, "clip_ratio/high_mean": 0.0005649326694765477, "clip_ratio/low_mean": 0.00037792730472574476, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009428599933016812, "epoch": 11.690379008746355, "grad_norm": 0.2609913647174835, "learning_rate": 1e-06, "loss": -0.0386, "step": 1218 }, { "clip_ratio/high_max": 0.0019706044004124124, "clip_ratio/high_mean": 0.0007794085850036936, "clip_ratio/low_mean": 0.0004498509943005047, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012292595747567248, "epoch": 11.699708454810496, "grad_norm": 0.27026495337486267, "learning_rate": 1e-06, "loss": -0.0844, "step": 1219 }, { "clip_ratio/high_max": 0.0016357317326765042, "clip_ratio/high_mean": 0.0006598753843718441, "clip_ratio/low_mean": 0.0004774851036017935, "clip_ratio/low_min": 2.7839643735205755e-05, "clip_ratio/region_mean": 0.001137360497523332, "epoch": 11.709037900874636, "grad_norm": 0.7214836478233337, "learning_rate": 1e-06, "loss": -0.0334, "step": 1220 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1135602678571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 4069.0, "completions/mean_length": 956.6133422851562, "completions/mean_terminated_length": 554.4318237304688, "completions/min_length": 127.0, "completions/min_terminated_length": 127.0, "epoch": 11.718367346938775, "grad_norm": 0.2346077412366867, "learning_rate": 1e-06, "loss": -0.0677, "num_tokens": 690671144.0, "reward": 0.6381138563156128, "reward_std": 0.12498386949300766, "rewards/simpleverify_reward/mean": 0.6381138563156128, "rewards/simpleverify_reward/std": 0.48061317205429077, "step": 1221 }, { "clip_ratio/high_max": 0.0017573612349224277, "clip_ratio/high_mean": 0.0006102740971982712, "clip_ratio/low_mean": 0.00027743848499994783, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008877125928847818, "epoch": 11.727696793002915, "grad_norm": 0.21073271334171295, "learning_rate": 1e-06, "loss": -0.0668, "step": 1222 }, { "clip_ratio/high_max": 0.001849101208790671, "clip_ratio/high_mean": 0.0006310919197858311, "clip_ratio/low_mean": 0.0004165632799413288, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001047655186994234, "epoch": 11.737026239067056, "grad_norm": 3.2627639770507812, "learning_rate": 1e-06, "loss": -0.0254, "step": 1223 }, { "clip_ratio/high_max": 0.00179421218126663, "clip_ratio/high_mean": 0.000532350697540096, "clip_ratio/low_mean": 0.0004519227650234825, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009842734543781262, "epoch": 11.746355685131196, "grad_norm": 0.1906217783689499, "learning_rate": 1e-06, "loss": -0.0313, "step": 1224 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1163504464285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 4008.0, "completions/mean_length": 936.1582641601562, "completions/mean_terminated_length": 520.1007690429688, "completions/min_length": 98.0, "completions/min_terminated_length": 98.0, "epoch": 11.755685131195335, "grad_norm": 0.2822015583515167, "learning_rate": 1e-06, "loss": -0.0585, "num_tokens": 692671967.0, "reward": 0.6635044813156128, "reward_std": 0.14463520050048828, "rewards/simpleverify_reward/mean": 0.6635044813156128, "rewards/simpleverify_reward/std": 0.4725765585899353, "step": 1225 }, { "clip_ratio/high_max": 0.0020905911515001208, "clip_ratio/high_mean": 0.000794445786596043, "clip_ratio/low_mean": 0.0003816087419181713, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011760545457946137, "epoch": 11.765014577259475, "grad_norm": 0.23929756879806519, "learning_rate": 1e-06, "loss": -0.0533, "step": 1226 }, { "clip_ratio/high_max": 0.0020159636369498912, "clip_ratio/high_mean": 0.000747657770261867, "clip_ratio/low_mean": 0.00039630204116747336, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011439598238212056, "epoch": 11.774344023323614, "grad_norm": 0.19693991541862488, "learning_rate": 1e-06, "loss": -0.063, "step": 1227 }, { "clip_ratio/high_max": 0.0019639405218185857, "clip_ratio/high_mean": 0.0007902413162810262, "clip_ratio/low_mean": 0.0005316206525094458, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013218619860708714, "epoch": 11.783673469387756, "grad_norm": 0.23446951806545258, "learning_rate": 1e-06, "loss": -0.0647, "step": 1228 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1272321428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 4070.0, "completions/mean_length": 993.505615234375, "completions/mean_terminated_length": 541.2237548828125, "completions/min_length": 123.0, "completions/min_terminated_length": 123.0, "epoch": 11.793002915451895, "grad_norm": 0.25716808438301086, "learning_rate": 1e-06, "loss": -0.0888, "num_tokens": 694712683.0, "reward": 0.6813616156578064, "reward_std": 0.13661260902881622, "rewards/simpleverify_reward/mean": 0.6813616156578064, "rewards/simpleverify_reward/std": 0.46601349115371704, "step": 1229 }, { "clip_ratio/high_max": 0.0017572257911524503, "clip_ratio/high_mean": 0.0006532784373121103, "clip_ratio/low_mean": 0.00036160992613076814, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010148883702640887, "epoch": 11.802332361516035, "grad_norm": 2.3302762508392334, "learning_rate": 1e-06, "loss": -0.0365, "step": 1230 }, { "clip_ratio/high_max": 0.001766176657838514, "clip_ratio/high_mean": 0.0006897444482092396, "clip_ratio/low_mean": 0.0003608240454013867, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010505684822419425, "epoch": 11.811661807580174, "grad_norm": 0.2042463719844818, "learning_rate": 1e-06, "loss": -0.0528, "step": 1231 }, { "clip_ratio/high_max": 0.0020641683222493157, "clip_ratio/high_mean": 0.0008259652877313783, "clip_ratio/low_mean": 0.00033659326527413214, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011625585648289416, "epoch": 11.820991253644316, "grad_norm": 0.24184617400169373, "learning_rate": 1e-06, "loss": -0.0995, "step": 1232 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1088169642857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3791.0, "completions/mean_length": 930.044677734375, "completions/mean_terminated_length": 543.468994140625, "completions/min_length": 103.0, "completions/min_terminated_length": 103.0, "epoch": 11.830320699708455, "grad_norm": 0.23845639824867249, "learning_rate": 1e-06, "loss": -0.0408, "num_tokens": 696814971.0, "reward": 0.6570870876312256, "reward_std": 0.13125130534172058, "rewards/simpleverify_reward/mean": 0.6570870280265808, "rewards/simpleverify_reward/std": 0.4747489392757416, "step": 1233 }, { "clip_ratio/high_max": 0.001983990710868966, "clip_ratio/high_mean": 0.0007280048794200411, "clip_ratio/low_mean": 0.0003390373663023638, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010670422379917, "epoch": 11.839650145772595, "grad_norm": 0.22825616598129272, "learning_rate": 1e-06, "loss": -0.0689, "step": 1234 }, { "clip_ratio/high_max": 0.0020249644658179022, "clip_ratio/high_mean": 0.0006729262704538996, "clip_ratio/low_mean": 0.00038392779970308766, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010568540637905244, "epoch": 11.848979591836734, "grad_norm": 0.22084516286849976, "learning_rate": 1e-06, "loss": -0.0265, "step": 1235 }, { "clip_ratio/high_max": 0.002084510590066202, "clip_ratio/high_mean": 0.0007263058705575531, "clip_ratio/low_mean": 0.0003523071259223798, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010786130042106379, "epoch": 11.858309037900874, "grad_norm": 0.22283309698104858, "learning_rate": 1e-06, "loss": -0.0529, "step": 1236 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1183035714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3615.0, "completions/mean_length": 964.331787109375, "completions/mean_terminated_length": 544.1332397460938, "completions/min_length": 141.0, "completions/min_terminated_length": 141.0, "epoch": 11.867638483965015, "grad_norm": 0.2626735270023346, "learning_rate": 1e-06, "loss": -0.0565, "num_tokens": 698906144.0, "reward": 0.6755022406578064, "reward_std": 0.1448434591293335, "rewards/simpleverify_reward/mean": 0.6755022406578064, "rewards/simpleverify_reward/std": 0.4682522118091583, "step": 1237 }, { "clip_ratio/high_max": 0.002163731351174647, "clip_ratio/high_mean": 0.0007386386878351914, "clip_ratio/low_mean": 0.0003764933421734895, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001115132015911513, "epoch": 11.876967930029155, "grad_norm": 0.2505413293838501, "learning_rate": 1e-06, "loss": -0.0732, "step": 1238 }, { "clip_ratio/high_max": 0.0017819832937675528, "clip_ratio/high_mean": 0.0006597545188924414, "clip_ratio/low_mean": 0.00034642337277546176, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001006177903036587, "epoch": 11.886297376093294, "grad_norm": 0.2106020748615265, "learning_rate": 1e-06, "loss": -0.0739, "step": 1239 }, { "clip_ratio/high_max": 0.0016499762514285976, "clip_ratio/high_mean": 0.0006468926592333446, "clip_ratio/low_mean": 0.00042715538279480825, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001074048026566743, "epoch": 11.895626822157434, "grad_norm": 0.2647061347961426, "learning_rate": 1e-06, "loss": -0.0016, "step": 1240 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1263950892857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3790.0, "completions/mean_length": 1006.804443359375, "completions/mean_terminated_length": 559.852783203125, "completions/min_length": 120.0, "completions/min_terminated_length": 120.0, "epoch": 11.904956268221575, "grad_norm": 0.24664278328418732, "learning_rate": 1e-06, "loss": -0.0331, "num_tokens": 701021699.0, "reward": 0.648995578289032, "reward_std": 0.12855355441570282, "rewards/simpleverify_reward/mean": 0.6489955186843872, "rewards/simpleverify_reward/std": 0.4773509204387665, "step": 1241 }, { "clip_ratio/high_max": 0.001549923970742384, "clip_ratio/high_mean": 0.0006029294963809662, "clip_ratio/low_mean": 0.00035818565856970963, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009611151654098649, "epoch": 11.914285714285715, "grad_norm": 0.23569589853286743, "learning_rate": 1e-06, "loss": -0.0488, "step": 1242 }, { "clip_ratio/high_max": 0.0015603531901433598, "clip_ratio/high_mean": 0.0005722887835872825, "clip_ratio/low_mean": 0.00048463346138305496, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010569222577032633, "epoch": 11.923615160349854, "grad_norm": 0.30251407623291016, "learning_rate": 1e-06, "loss": -0.0445, "step": 1243 }, { "clip_ratio/high_max": 0.0015316918725147843, "clip_ratio/high_mean": 0.0005907043250772404, "clip_ratio/low_mean": 0.00048603586674289545, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010767401690827683, "epoch": 11.932944606413994, "grad_norm": 0.1989983767271042, "learning_rate": 1e-06, "loss": -0.0332, "step": 1244 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1277901785714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3772.0, "completions/mean_length": 1004.0201416015625, "completions/mean_terminated_length": 551.0044555664062, "completions/min_length": 138.0, "completions/min_terminated_length": 138.0, "epoch": 11.942274052478133, "grad_norm": 0.2930147647857666, "learning_rate": 1e-06, "loss": -0.0422, "num_tokens": 703095723.0, "reward": 0.6788504719734192, "reward_std": 0.13374054431915283, "rewards/simpleverify_reward/mean": 0.6788504719734192, "rewards/simpleverify_reward/std": 0.46698322892189026, "step": 1245 }, { "clip_ratio/high_max": 0.0015560597239527851, "clip_ratio/high_mean": 0.0005549377028728486, "clip_ratio/low_mean": 0.0003729725467565004, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009279102541768225, "epoch": 11.951603498542275, "grad_norm": 0.2511458396911621, "learning_rate": 1e-06, "loss": -0.0864, "step": 1246 }, { "clip_ratio/high_max": 0.0017752261155692395, "clip_ratio/high_mean": 0.00062733000049775, "clip_ratio/low_mean": 0.0004911806245218031, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011185106232005637, "epoch": 11.960932944606414, "grad_norm": 0.24846087396144867, "learning_rate": 1e-06, "loss": -0.0476, "step": 1247 }, { "clip_ratio/high_max": 0.0017881247513287235, "clip_ratio/high_mean": 0.0006702216742269229, "clip_ratio/low_mean": 0.0004059281604895659, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010761498197098263, "epoch": 11.970262390670554, "grad_norm": 0.3307588994503021, "learning_rate": 1e-06, "loss": -0.0734, "step": 1248 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1124441964285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3721.0, "completions/mean_length": 929.8499145507812, "completions/mean_terminated_length": 528.731201171875, "completions/min_length": 115.0, "completions/min_terminated_length": 115.0, "epoch": 12.00932944606414, "grad_norm": 0.2231827676296234, "learning_rate": 1e-06, "loss": -0.0382, "num_tokens": 705132881.0, "reward": 0.684151828289032, "reward_std": 0.12965616583824158, "rewards/simpleverify_reward/mean": 0.6841517686843872, "rewards/simpleverify_reward/std": 0.4649176597595215, "step": 1249 }, { "clip_ratio/high_max": 0.002398605847702129, "clip_ratio/high_mean": 0.0008292603088193573, "clip_ratio/low_mean": 0.00039873685000202386, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012279971670068335, "epoch": 12.018658892128279, "grad_norm": 0.40304437279701233, "learning_rate": 1e-06, "loss": -0.0446, "step": 1250 }, { "clip_ratio/high_max": 0.0020612555454135872, "clip_ratio/high_mean": 0.0007979273032105993, "clip_ratio/low_mean": 0.0003986096053267829, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011965369194513187, "epoch": 12.02798833819242, "grad_norm": 0.26291176676750183, "learning_rate": 1e-06, "loss": -0.0453, "step": 1251 }, { "clip_ratio/high_max": 0.0019109571185254026, "clip_ratio/high_mean": 0.0007763011581118917, "clip_ratio/low_mean": 0.00043063551265731803, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001206936671223957, "epoch": 12.03731778425656, "grad_norm": 0.6168349385261536, "learning_rate": 1e-06, "loss": -0.0542, "step": 1252 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.12109375, "completions/max_length": 4096.0, "completions/max_terminated_length": 3600.0, "completions/mean_length": 962.1593627929688, "completions/mean_terminated_length": 530.3856811523438, "completions/min_length": 122.0, "completions/min_terminated_length": 122.0, "epoch": 12.0466472303207, "grad_norm": 0.2506181299686432, "learning_rate": 1e-06, "loss": -0.0415, "num_tokens": 707153268.0, "reward": 0.684151828289032, "reward_std": 0.1397693157196045, "rewards/simpleverify_reward/mean": 0.6841517686843872, "rewards/simpleverify_reward/std": 0.4649176597595215, "step": 1253 }, { "clip_ratio/high_max": 0.0019077740762440953, "clip_ratio/high_mean": 0.0006904124393258826, "clip_ratio/low_mean": 0.00032799281098050415, "clip_ratio/low_min": 2.4210730771301314e-05, "clip_ratio/region_mean": 0.0010184052862314275, "epoch": 12.055976676384839, "grad_norm": 0.2330017238855362, "learning_rate": 1e-06, "loss": -0.0518, "step": 1254 }, { "clip_ratio/high_max": 0.002226164891908411, "clip_ratio/high_mean": 0.0007814583350409521, "clip_ratio/low_mean": 0.00036017872298543807, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011416370471124537, "epoch": 12.06530612244898, "grad_norm": 0.26715797185897827, "learning_rate": 1e-06, "loss": -0.071, "step": 1255 }, { "clip_ratio/high_max": 0.0017615488068258855, "clip_ratio/high_mean": 0.0007265892272698693, "clip_ratio/low_mean": 0.0004092145370577782, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011358037481841166, "epoch": 12.07463556851312, "grad_norm": 0.25384482741355896, "learning_rate": 1e-06, "loss": -0.0535, "step": 1256 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1213727678571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 4048.0, "completions/mean_length": 985.5103759765625, "completions/mean_terminated_length": 555.830078125, "completions/min_length": 118.0, "completions/min_terminated_length": 118.0, "epoch": 12.08396501457726, "grad_norm": 0.2425101101398468, "learning_rate": 1e-06, "loss": -0.0174, "num_tokens": 709264065.0, "reward": 0.6752232313156128, "reward_std": 0.1367669403553009, "rewards/simpleverify_reward/mean": 0.6752232313156128, "rewards/simpleverify_reward/std": 0.468356728553772, "step": 1257 }, { "clip_ratio/high_max": 0.0018617934438225348, "clip_ratio/high_mean": 0.0007325130518438527, "clip_ratio/low_mean": 0.0002862336975795188, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010187467414652929, "epoch": 12.093294460641399, "grad_norm": 0.24479129910469055, "learning_rate": 1e-06, "loss": -0.063, "step": 1258 }, { "clip_ratio/high_max": 0.001642331866605673, "clip_ratio/high_mean": 0.0006830819365859497, "clip_ratio/low_mean": 0.00032791653347885585, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010109984905284364, "epoch": 12.102623906705539, "grad_norm": 0.21644043922424316, "learning_rate": 1e-06, "loss": -0.0703, "step": 1259 }, { "clip_ratio/high_max": 0.0021325051529856864, "clip_ratio/high_mean": 0.00083203727081127, "clip_ratio/low_mean": 0.0002739735305112845, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011060108299716376, "epoch": 12.11195335276968, "grad_norm": 0.2796313166618347, "learning_rate": 1e-06, "loss": -0.1008, "step": 1260 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1160714285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 4009.0, "completions/mean_length": 982.4364013671875, "completions/mean_terminated_length": 573.5845947265625, "completions/min_length": 135.0, "completions/min_terminated_length": 135.0, "epoch": 12.12128279883382, "grad_norm": 0.2292819321155548, "learning_rate": 1e-06, "loss": -0.0899, "num_tokens": 711446133.0, "reward": 0.6657366156578064, "reward_std": 0.13817116618156433, "rewards/simpleverify_reward/mean": 0.6657366156578064, "rewards/simpleverify_reward/std": 0.4717981219291687, "step": 1261 }, { "clip_ratio/high_max": 0.0020490833958319854, "clip_ratio/high_mean": 0.0007491680626117159, "clip_ratio/low_mean": 0.0003760690116223486, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011252370532019995, "epoch": 12.130612244897959, "grad_norm": 1.071768045425415, "learning_rate": 1e-06, "loss": -0.0396, "step": 1262 }, { "clip_ratio/high_max": 0.001451378902856959, "clip_ratio/high_mean": 0.0005034734203945845, "clip_ratio/low_mean": 0.00023502964154431538, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007385030712612206, "epoch": 12.139941690962099, "grad_norm": 0.27985697984695435, "learning_rate": 1e-06, "loss": -0.0606, "step": 1263 }, { "clip_ratio/high_max": 0.0016940483183134347, "clip_ratio/high_mean": 0.0006864805627628812, "clip_ratio/low_mean": 0.00040153921690944117, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010880198060476687, "epoch": 12.14927113702624, "grad_norm": 0.29092323780059814, "learning_rate": 1e-06, "loss": -0.0548, "step": 1264 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0990513392857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3999.0, "completions/mean_length": 880.74560546875, "completions/mean_terminated_length": 527.2567138671875, "completions/min_length": 124.0, "completions/min_terminated_length": 124.0, "epoch": 12.15860058309038, "grad_norm": 0.22867155075073242, "learning_rate": 1e-06, "loss": -0.0117, "num_tokens": 713486005.0, "reward": 0.7209821939468384, "reward_std": 0.11470909416675568, "rewards/simpleverify_reward/mean": 0.7209821343421936, "rewards/simpleverify_reward/std": 0.4485788941383362, "step": 1265 }, { "clip_ratio/high_max": 0.0018663315022422466, "clip_ratio/high_mean": 0.0007363288086708053, "clip_ratio/low_mean": 0.00033780481248868455, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010741336081991903, "epoch": 12.167930029154519, "grad_norm": 0.2466728836297989, "learning_rate": 1e-06, "loss": -0.071, "step": 1266 }, { "clip_ratio/high_max": 0.0019005324429599568, "clip_ratio/high_mean": 0.0007055781370581826, "clip_ratio/low_mean": 0.0003858792535993416, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010914573867921717, "epoch": 12.177259475218658, "grad_norm": 0.3681514263153076, "learning_rate": 1e-06, "loss": -0.0248, "step": 1267 }, { "clip_ratio/high_max": 0.0017266688992094714, "clip_ratio/high_mean": 0.0006184405374369817, "clip_ratio/low_mean": 0.00024882792672542564, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008672684580233181, "epoch": 12.186588921282798, "grad_norm": 0.25798606872558594, "learning_rate": 1e-06, "loss": -0.0761, "step": 1268 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1163504464285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3624.0, "completions/mean_length": 969.1582641601562, "completions/mean_terminated_length": 557.4458618164062, "completions/min_length": 125.0, "completions/min_terminated_length": 125.0, "epoch": 12.19591836734694, "grad_norm": 0.2539548873901367, "learning_rate": 1e-06, "loss": -0.0477, "num_tokens": 715603036.0, "reward": 0.670479953289032, "reward_std": 0.12338918447494507, "rewards/simpleverify_reward/mean": 0.6704798936843872, "rewards/simpleverify_reward/std": 0.47010454535484314, "step": 1269 }, { "clip_ratio/high_max": 0.0016551966728002299, "clip_ratio/high_mean": 0.0005415271698439028, "clip_ratio/low_mean": 0.00022397671409635223, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007655038807570236, "epoch": 12.205247813411079, "grad_norm": 0.1858983039855957, "learning_rate": 1e-06, "loss": -0.0636, "step": 1270 }, { "clip_ratio/high_max": 0.0016392281286243815, "clip_ratio/high_mean": 0.0006024983695169794, "clip_ratio/low_mean": 0.00023868728590059618, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008411856560996966, "epoch": 12.214577259475218, "grad_norm": 0.23073597252368927, "learning_rate": 1e-06, "loss": -0.0821, "step": 1271 }, { "clip_ratio/high_max": 0.0019250465447839815, "clip_ratio/high_mean": 0.0007205996644188417, "clip_ratio/low_mean": 0.00048626565285303514, "clip_ratio/low_min": 2.153687091777101e-05, "clip_ratio/region_mean": 0.0012068653377355076, "epoch": 12.223906705539358, "grad_norm": 0.19350990653038025, "learning_rate": 1e-06, "loss": -0.0322, "step": 1272 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1227678571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3221.0, "completions/mean_length": 982.5265502929688, "completions/mean_terminated_length": 546.7986450195312, "completions/min_length": 101.0, "completions/min_terminated_length": 101.0, "epoch": 12.2332361516035, "grad_norm": 0.2386411428451538, "learning_rate": 1e-06, "loss": -0.0265, "num_tokens": 717668939.0, "reward": 0.6648995876312256, "reward_std": 0.13921605050563812, "rewards/simpleverify_reward/mean": 0.6648995280265808, "rewards/simpleverify_reward/std": 0.4720914363861084, "step": 1273 }, { "clip_ratio/high_max": 0.001724118679703679, "clip_ratio/high_mean": 0.0006421479829441523, "clip_ratio/low_mean": 0.00041053171116800513, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010526796995691257, "epoch": 12.242565597667639, "grad_norm": 0.6863195896148682, "learning_rate": 1e-06, "loss": -0.0429, "step": 1274 }, { "clip_ratio/high_max": 0.0019318272097734734, "clip_ratio/high_mean": 0.0007584266750200186, "clip_ratio/low_mean": 0.00030508674421980686, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010635134276526514, "epoch": 12.251895043731778, "grad_norm": 0.20440053939819336, "learning_rate": 1e-06, "loss": -0.0699, "step": 1275 }, { "clip_ratio/high_max": 0.0021591903278022073, "clip_ratio/high_mean": 0.000779032952777925, "clip_ratio/low_mean": 0.0005076886909591849, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012867216719314456, "epoch": 12.261224489795918, "grad_norm": 0.2042524516582489, "learning_rate": 1e-06, "loss": -0.0642, "step": 1276 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0982142857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 4069.0, "completions/mean_length": 900.63037109375, "completions/mean_terminated_length": 552.6197509765625, "completions/min_length": 126.0, "completions/min_terminated_length": 126.0, "epoch": 12.270553935860057, "grad_norm": 0.2379431277513504, "learning_rate": 1e-06, "loss": -0.0298, "num_tokens": 719804550.0, "reward": 0.6830357313156128, "reward_std": 0.1313086450099945, "rewards/simpleverify_reward/mean": 0.6830357313156128, "rewards/simpleverify_reward/std": 0.46535831689834595, "step": 1277 }, { "clip_ratio/high_max": 0.0016225189647229854, "clip_ratio/high_mean": 0.0005903219316678587, "clip_ratio/low_mean": 0.000419357405462506, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010096793266711757, "epoch": 12.279883381924199, "grad_norm": 0.22346104681491852, "learning_rate": 1e-06, "loss": -0.0177, "step": 1278 }, { "clip_ratio/high_max": 0.0019001064938493073, "clip_ratio/high_mean": 0.0007242753854370676, "clip_ratio/low_mean": 0.00032884184656722937, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001053117241099244, "epoch": 12.289212827988338, "grad_norm": 0.26189884543418884, "learning_rate": 1e-06, "loss": -0.0254, "step": 1279 }, { "clip_ratio/high_max": 0.0019330107388668694, "clip_ratio/high_mean": 0.000704747822965146, "clip_ratio/low_mean": 0.000350712267390918, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010554600776231382, "epoch": 12.298542274052478, "grad_norm": 0.24854397773742676, "learning_rate": 1e-06, "loss": -0.0895, "step": 1280 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1043526785714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 4076.0, "completions/mean_length": 928.630615234375, "completions/mean_terminated_length": 559.5974731445312, "completions/min_length": 128.0, "completions/min_terminated_length": 128.0, "epoch": 12.307871720116617, "grad_norm": 0.22335444390773773, "learning_rate": 1e-06, "loss": -0.056, "num_tokens": 721957242.0, "reward": 0.6774553656578064, "reward_std": 0.12520244717597961, "rewards/simpleverify_reward/mean": 0.6774553656578064, "rewards/simpleverify_reward/std": 0.46751531958580017, "step": 1281 }, { "clip_ratio/high_max": 0.0018389314791420475, "clip_ratio/high_mean": 0.0007260669590323232, "clip_ratio/low_mean": 0.00038050642569942283, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011065734070143662, "epoch": 12.317201166180759, "grad_norm": 0.1905175894498825, "learning_rate": 1e-06, "loss": -0.0668, "step": 1282 }, { "clip_ratio/high_max": 0.0014693653320136946, "clip_ratio/high_mean": 0.0005437621935016068, "clip_ratio/low_mean": 0.00039417065590896527, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.000937932831220678, "epoch": 12.326530612244898, "grad_norm": 0.23832479119300842, "learning_rate": 1e-06, "loss": -0.0399, "step": 1283 }, { "clip_ratio/high_max": 0.001465594246838009, "clip_ratio/high_mean": 0.0005152727626409614, "clip_ratio/low_mean": 0.00041762627824937226, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009328990199719556, "epoch": 12.335860058309038, "grad_norm": 0.22756369411945343, "learning_rate": 1e-06, "loss": -0.0382, "step": 1284 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1236049107142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3702.0, "completions/mean_length": 999.6473388671875, "completions/mean_terminated_length": 562.9442749023438, "completions/min_length": 125.0, "completions/min_terminated_length": 125.0, "epoch": 12.345189504373177, "grad_norm": 0.21279095113277435, "learning_rate": 1e-06, "loss": -0.0663, "num_tokens": 724080258.0, "reward": 0.6403459906578064, "reward_std": 0.12274950742721558, "rewards/simpleverify_reward/mean": 0.6403459906578064, "rewards/simpleverify_reward/std": 0.4799659252166748, "step": 1285 }, { "clip_ratio/high_max": 0.001572587121700053, "clip_ratio/high_mean": 0.000597658819060598, "clip_ratio/low_mean": 0.0004920489764117519, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010897077991103288, "epoch": 12.354518950437317, "grad_norm": 0.2955663502216339, "learning_rate": 1e-06, "loss": -0.0406, "step": 1286 }, { "clip_ratio/high_max": 0.00189039062752272, "clip_ratio/high_mean": 0.0006911153038799966, "clip_ratio/low_mean": 0.00045541083409261773, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001146526136835746, "epoch": 12.363848396501458, "grad_norm": 0.21810047328472137, "learning_rate": 1e-06, "loss": -0.0634, "step": 1287 }, { "clip_ratio/high_max": 0.0017299803039350081, "clip_ratio/high_mean": 0.0005749617503170157, "clip_ratio/low_mean": 0.0005824574764119461, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011574192067200784, "epoch": 12.373177842565598, "grad_norm": 0.4335756301879883, "learning_rate": 1e-06, "loss": -0.0251, "step": 1288 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1255580357142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3591.0, "completions/mean_length": 1029.2679443359375, "completions/mean_terminated_length": 588.9265747070312, "completions/min_length": 132.0, "completions/min_terminated_length": 132.0, "epoch": 12.382507288629737, "grad_norm": 0.23153842985630035, "learning_rate": 1e-06, "loss": -0.0451, "num_tokens": 726272522.0, "reward": 0.6322544813156128, "reward_std": 0.13037002086639404, "rewards/simpleverify_reward/mean": 0.6322544813156128, "rewards/simpleverify_reward/std": 0.4822588860988617, "step": 1289 }, { "clip_ratio/high_max": 0.0016766552253102418, "clip_ratio/high_mean": 0.0006680502265226096, "clip_ratio/low_mean": 0.00040109413021127693, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010691443476389395, "epoch": 12.391836734693877, "grad_norm": 0.20474453270435333, "learning_rate": 1e-06, "loss": -0.0806, "step": 1290 }, { "clip_ratio/high_max": 0.0019164115365128964, "clip_ratio/high_mean": 0.0006749793683411554, "clip_ratio/low_mean": 0.0003179405854325523, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009929199404723477, "epoch": 12.401166180758018, "grad_norm": 0.208735391497612, "learning_rate": 1e-06, "loss": -0.0724, "step": 1291 }, { "clip_ratio/high_max": 0.0017588855480425991, "clip_ratio/high_mean": 0.0006403326487998129, "clip_ratio/low_mean": 0.00039401093749802385, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.00103434359516541, "epoch": 12.410495626822158, "grad_norm": 0.18221713602542877, "learning_rate": 1e-06, "loss": -0.0172, "step": 1292 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0993303571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 4050.0, "completions/mean_length": 879.1883544921875, "completions/mean_terminated_length": 524.4222412109375, "completions/min_length": 130.0, "completions/min_terminated_length": 130.0, "epoch": 12.419825072886297, "grad_norm": 0.21326975524425507, "learning_rate": 1e-06, "loss": -0.0487, "num_tokens": 728330221.0, "reward": 0.7067522406578064, "reward_std": 0.1360798180103302, "rewards/simpleverify_reward/mean": 0.7067522406578064, "rewards/simpleverify_reward/std": 0.4553145468235016, "step": 1293 }, { "clip_ratio/high_max": 0.0018630346530699171, "clip_ratio/high_mean": 0.0007202712049547699, "clip_ratio/low_mean": 0.000362049908062545, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010823211414390244, "epoch": 12.429154518950437, "grad_norm": 0.21157598495483398, "learning_rate": 1e-06, "loss": -0.0679, "step": 1294 }, { "clip_ratio/high_max": 0.001872005290351808, "clip_ratio/high_mean": 0.0008696200275153387, "clip_ratio/low_mean": 0.00037996437731635524, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012495844021032099, "epoch": 12.438483965014576, "grad_norm": 0.2253352701663971, "learning_rate": 1e-06, "loss": -0.0929, "step": 1295 }, { "clip_ratio/high_max": 0.0018210394682682818, "clip_ratio/high_mean": 0.0006900493081047898, "clip_ratio/low_mean": 0.00037122663161426317, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010612759706418728, "epoch": 12.447813411078718, "grad_norm": 0.232676699757576, "learning_rate": 1e-06, "loss": -0.0537, "step": 1296 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1104910714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3896.0, "completions/mean_length": 938.894287109375, "completions/mean_terminated_length": 546.7318115234375, "completions/min_length": 124.0, "completions/min_terminated_length": 124.0, "epoch": 12.457142857142857, "grad_norm": 0.239477276802063, "learning_rate": 1e-06, "loss": -0.0668, "num_tokens": 730441650.0, "reward": 0.680245578289032, "reward_std": 0.1408700942993164, "rewards/simpleverify_reward/mean": 0.6802455186843872, "rewards/simpleverify_reward/std": 0.4664463996887207, "step": 1297 }, { "clip_ratio/high_max": 0.0023558984175906517, "clip_ratio/high_mean": 0.0007626629085280001, "clip_ratio/low_mean": 0.0003728952228811977, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011355581409588922, "epoch": 12.466472303206997, "grad_norm": 107.35285186767578, "learning_rate": 1e-06, "loss": -0.055, "step": 1298 }, { "clip_ratio/high_max": 0.0027310416699037887, "clip_ratio/high_mean": 0.0009972390362236183, "clip_ratio/low_mean": 0.0003484772166757466, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013457162567647174, "epoch": 12.475801749271136, "grad_norm": 0.2552579641342163, "learning_rate": 1e-06, "loss": -0.0766, "step": 1299 }, { "clip_ratio/high_max": 0.0022728233379893936, "clip_ratio/high_mean": 0.0008005165964277694, "clip_ratio/low_mean": 0.00036768093013961334, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011681975338433404, "epoch": 12.485131195335278, "grad_norm": 0.37824690341949463, "learning_rate": 1e-06, "loss": -0.0611, "step": 1300 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1149553571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3006.0, "completions/mean_length": 969.179443359375, "completions/mean_terminated_length": 563.047607421875, "completions/min_length": 152.0, "completions/min_terminated_length": 152.0, "epoch": 12.494460641399417, "grad_norm": 0.25493043661117554, "learning_rate": 1e-06, "loss": -0.031, "num_tokens": 732600701.0, "reward": 0.6696428656578064, "reward_std": 0.13009706139564514, "rewards/simpleverify_reward/mean": 0.6696428656578064, "rewards/simpleverify_reward/std": 0.47040730714797974, "step": 1301 }, { "clip_ratio/high_max": 0.0018461671934346668, "clip_ratio/high_mean": 0.0006261978960537817, "clip_ratio/low_mean": 0.00036300236240549566, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009892002690321533, "epoch": 12.503790087463557, "grad_norm": 0.4876163601875305, "learning_rate": 1e-06, "loss": -0.0498, "step": 1302 }, { "clip_ratio/high_max": 0.0019461731717456132, "clip_ratio/high_mean": 0.0006932239321031375, "clip_ratio/low_mean": 0.0003960746544180438, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010892985756072449, "epoch": 12.513119533527696, "grad_norm": 0.2224876880645752, "learning_rate": 1e-06, "loss": -0.0406, "step": 1303 }, { "clip_ratio/high_max": 0.0017853828649094794, "clip_ratio/high_mean": 0.0007110504066076828, "clip_ratio/low_mean": 0.0004603740271704737, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011714244319591671, "epoch": 12.522448979591836, "grad_norm": 0.23488450050354004, "learning_rate": 1e-06, "loss": -0.0611, "step": 1304 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.107421875, "completions/max_length": 4096.0, "completions/max_terminated_length": 3865.0, "completions/mean_length": 932.59716796875, "completions/mean_terminated_length": 551.8812255859375, "completions/min_length": 116.0, "completions/min_terminated_length": 116.0, "epoch": 12.531778425655977, "grad_norm": 0.24183179438114166, "learning_rate": 1e-06, "loss": -0.0979, "num_tokens": 734701009.0, "reward": 0.6696428656578064, "reward_std": 0.1416795253753662, "rewards/simpleverify_reward/mean": 0.6696428656578064, "rewards/simpleverify_reward/std": 0.47040730714797974, "step": 1305 }, { "clip_ratio/high_max": 0.001823642298404593, "clip_ratio/high_mean": 0.0007022723916634277, "clip_ratio/low_mean": 0.000442579053924419, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011448514396761311, "epoch": 12.541107871720117, "grad_norm": 1.2548359632492065, "learning_rate": 1e-06, "loss": -0.0611, "step": 1306 }, { "clip_ratio/high_max": 0.0018860747477447148, "clip_ratio/high_mean": 0.000748317066609161, "clip_ratio/low_mean": 0.00036969806933484506, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011180151559528895, "epoch": 12.550437317784256, "grad_norm": 0.20761752128601074, "learning_rate": 1e-06, "loss": -0.0522, "step": 1307 }, { "clip_ratio/high_max": 0.0016822202451294288, "clip_ratio/high_mean": 0.0006853677386970958, "clip_ratio/low_mean": 0.0005248292445685365, "clip_ratio/low_min": 3.724117414094508e-05, "clip_ratio/region_mean": 0.0012101969859941164, "epoch": 12.559766763848396, "grad_norm": 0.27934694290161133, "learning_rate": 1e-06, "loss": -0.0073, "step": 1308 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0970982142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 4074.0, "completions/mean_length": 879.74560546875, "completions/mean_terminated_length": 533.8689575195312, "completions/min_length": 106.0, "completions/min_terminated_length": 106.0, "epoch": 12.569096209912537, "grad_norm": 0.26976892352104187, "learning_rate": 1e-06, "loss": -0.049, "num_tokens": 736794617.0, "reward": 0.6944754719734192, "reward_std": 0.1312851905822754, "rewards/simpleverify_reward/mean": 0.6944754719734192, "rewards/simpleverify_reward/std": 0.46069350838661194, "step": 1309 }, { "clip_ratio/high_max": 0.0018481198203517124, "clip_ratio/high_mean": 0.0006991917489358457, "clip_ratio/low_mean": 0.00046800800623714167, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001167199730844004, "epoch": 12.578425655976677, "grad_norm": 0.3017846345901489, "learning_rate": 1e-06, "loss": -0.032, "step": 1310 }, { "clip_ratio/high_max": 0.0017495794782007579, "clip_ratio/high_mean": 0.0006113078261478222, "clip_ratio/low_mean": 0.000524272147231386, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011355799870216288, "epoch": 12.587755102040816, "grad_norm": 0.28359663486480713, "learning_rate": 1e-06, "loss": -0.0452, "step": 1311 }, { "clip_ratio/high_max": 0.0017819429267547093, "clip_ratio/high_mean": 0.000639069794488023, "clip_ratio/low_mean": 0.0003933494490411249, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001032419255352579, "epoch": 12.597084548104956, "grad_norm": 0.2505611777305603, "learning_rate": 1e-06, "loss": -0.0195, "step": 1312 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1130022321428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3972.0, "completions/mean_length": 946.6864013671875, "completions/mean_terminated_length": 545.4683837890625, "completions/min_length": 139.0, "completions/min_terminated_length": 139.0, "epoch": 12.606413994169095, "grad_norm": 0.25851377844810486, "learning_rate": 1e-06, "loss": -0.0551, "num_tokens": 738897565.0, "reward": 0.6506696939468384, "reward_std": 0.1531117558479309, "rewards/simpleverify_reward/mean": 0.6506696343421936, "rewards/simpleverify_reward/std": 0.4768250286579132, "step": 1313 }, { "clip_ratio/high_max": 0.002559328844654374, "clip_ratio/high_mean": 0.0009075291854969691, "clip_ratio/low_mean": 0.0004822192540814285, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013897484786866698, "epoch": 12.615743440233237, "grad_norm": 0.25496387481689453, "learning_rate": 1e-06, "loss": -0.0846, "step": 1314 }, { "clip_ratio/high_max": 0.0017654788971412927, "clip_ratio/high_mean": 0.0007239379538077628, "clip_ratio/low_mean": 0.0003981178310823452, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011220557826163713, "epoch": 12.625072886297376, "grad_norm": 0.2161339372396469, "learning_rate": 1e-06, "loss": -0.0119, "step": 1315 }, { "clip_ratio/high_max": 0.002187176847655792, "clip_ratio/high_mean": 0.0007699867765040835, "clip_ratio/low_mean": 0.0005363230084185489, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001306309743085876, "epoch": 12.634402332361516, "grad_norm": 0.23278069496154785, "learning_rate": 1e-06, "loss": -0.0365, "step": 1316 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1171875, "completions/max_length": 4096.0, "completions/max_terminated_length": 3463.0, "completions/mean_length": 951.0689697265625, "completions/mean_terminated_length": 533.6002197265625, "completions/min_length": 92.0, "completions/min_terminated_length": 92.0, "epoch": 12.643731778425655, "grad_norm": 0.2619081735610962, "learning_rate": 1e-06, "loss": -0.0527, "num_tokens": 740952076.0, "reward": 0.6679688096046448, "reward_std": 0.1302664428949356, "rewards/simpleverify_reward/mean": 0.66796875, "rewards/simpleverify_reward/std": 0.4710078537464142, "step": 1317 }, { "clip_ratio/high_max": 0.0015812250967428554, "clip_ratio/high_mean": 0.0006613955238208291, "clip_ratio/low_mean": 0.0004142019550954501, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010755974835774396, "epoch": 12.653061224489797, "grad_norm": 0.21849589049816132, "learning_rate": 1e-06, "loss": -0.0574, "step": 1318 }, { "clip_ratio/high_max": 0.001927997509483248, "clip_ratio/high_mean": 0.0007155969556151831, "clip_ratio/low_mean": 0.0003078950578583317, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010234919991489733, "epoch": 12.662390670553936, "grad_norm": 0.19900375604629517, "learning_rate": 1e-06, "loss": -0.0838, "step": 1319 }, { "clip_ratio/high_max": 0.0019449072497081943, "clip_ratio/high_mean": 0.0007181679184213863, "clip_ratio/low_mean": 0.0003698528306586013, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010880207482841797, "epoch": 12.671720116618076, "grad_norm": 0.2292470484972, "learning_rate": 1e-06, "loss": -0.0528, "step": 1320 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1018415178571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 4072.0, "completions/mean_length": 906.76708984375, "completions/mean_terminated_length": 545.1422729492188, "completions/min_length": 103.0, "completions/min_terminated_length": 103.0, "epoch": 12.681049562682215, "grad_norm": 0.31547871232032776, "learning_rate": 1e-06, "loss": -0.0413, "num_tokens": 743053241.0, "reward": 0.6936384439468384, "reward_std": 0.14445742964744568, "rewards/simpleverify_reward/mean": 0.6936383843421936, "rewards/simpleverify_reward/std": 0.4610460698604584, "step": 1321 }, { "clip_ratio/high_max": 0.0021243333867460024, "clip_ratio/high_mean": 0.0007707750919507816, "clip_ratio/low_mean": 0.0005002920474908024, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012710671617242042, "epoch": 12.690379008746355, "grad_norm": 0.3447440266609192, "learning_rate": 1e-06, "loss": -0.0387, "step": 1322 }, { "clip_ratio/high_max": 0.001828438944357913, "clip_ratio/high_mean": 0.0007354973567998968, "clip_ratio/low_mean": 0.00041896207267200225, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001154459412646247, "epoch": 12.699708454810496, "grad_norm": 0.2516617476940155, "learning_rate": 1e-06, "loss": -0.0316, "step": 1323 }, { "clip_ratio/high_max": 0.002056352430372499, "clip_ratio/high_mean": 0.0008982214676507283, "clip_ratio/low_mean": 0.0005282272322801873, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014264486890169792, "epoch": 12.709037900874636, "grad_norm": 0.2673415243625641, "learning_rate": 1e-06, "loss": -0.0479, "step": 1324 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1099330357142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 4044.0, "completions/mean_length": 955.833740234375, "completions/mean_terminated_length": 567.9887084960938, "completions/min_length": 117.0, "completions/min_terminated_length": 117.0, "epoch": 12.718367346938775, "grad_norm": 0.2727755904197693, "learning_rate": 1e-06, "loss": -0.0301, "num_tokens": 745232277.0, "reward": 0.6291853189468384, "reward_std": 0.1503455489873886, "rewards/simpleverify_reward/mean": 0.6291852593421936, "rewards/simpleverify_reward/std": 0.483090341091156, "step": 1325 }, { "clip_ratio/high_max": 0.002059935482975561, "clip_ratio/high_mean": 0.0008465205282846, "clip_ratio/low_mean": 0.00040003404797062103, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012465545441955328, "epoch": 12.727696793002915, "grad_norm": 0.2502662241458893, "learning_rate": 1e-06, "loss": -0.1071, "step": 1326 }, { "clip_ratio/high_max": 0.0020268320513423532, "clip_ratio/high_mean": 0.0008032614496187307, "clip_ratio/low_mean": 0.000520584694641002, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013238461688160896, "epoch": 12.737026239067056, "grad_norm": 0.37645119428634644, "learning_rate": 1e-06, "loss": -0.0176, "step": 1327 }, { "clip_ratio/high_max": 0.0018176734665757976, "clip_ratio/high_mean": 0.0006451717326854123, "clip_ratio/low_mean": 0.0004869836579928233, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011321553993184352, "epoch": 12.746355685131196, "grad_norm": 0.24082686007022858, "learning_rate": 1e-06, "loss": -0.0184, "step": 1328 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1127232142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3800.0, "completions/mean_length": 936.24951171875, "completions/mean_terminated_length": 534.822021484375, "completions/min_length": 93.0, "completions/min_terminated_length": 93.0, "epoch": 12.755685131195335, "grad_norm": 0.2694149315357208, "learning_rate": 1e-06, "loss": -0.0171, "num_tokens": 747275779.0, "reward": 0.6643415689468384, "reward_std": 0.12435756623744965, "rewards/simpleverify_reward/mean": 0.6643415093421936, "rewards/simpleverify_reward/std": 0.47228604555130005, "step": 1329 }, { "clip_ratio/high_max": 0.0018224549639853649, "clip_ratio/high_mean": 0.0007198817711469019, "clip_ratio/low_mean": 0.00044723340988639393, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011671152060444001, "epoch": 12.765014577259475, "grad_norm": 0.6247207522392273, "learning_rate": 1e-06, "loss": -0.0529, "step": 1330 }, { "clip_ratio/high_max": 0.0018361405018367805, "clip_ratio/high_mean": 0.0007004160815995419, "clip_ratio/low_mean": 0.00036829385885539523, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010687099311326165, "epoch": 12.774344023323614, "grad_norm": 0.23346687853336334, "learning_rate": 1e-06, "loss": -0.0551, "step": 1331 }, { "clip_ratio/high_max": 0.0019796898777713068, "clip_ratio/high_mean": 0.0006946107223484432, "clip_ratio/low_mean": 0.0005141870269653737, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012087977556802798, "epoch": 12.783673469387756, "grad_norm": 0.23564288020133972, "learning_rate": 1e-06, "loss": -0.0318, "step": 1332 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1085379464285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3968.0, "completions/mean_length": 955.4422607421875, "completions/mean_terminated_length": 573.0707397460938, "completions/min_length": 122.0, "completions/min_terminated_length": 122.0, "epoch": 12.793002915451895, "grad_norm": 0.24304738640785217, "learning_rate": 1e-06, "loss": -0.0478, "num_tokens": 749495716.0, "reward": 0.65234375, "reward_std": 0.138991117477417, "rewards/simpleverify_reward/mean": 0.65234375, "rewards/simpleverify_reward/std": 0.4762926399707794, "step": 1333 }, { "clip_ratio/high_max": 0.0016322195951943286, "clip_ratio/high_mean": 0.0006877040668769041, "clip_ratio/low_mean": 0.0004419226843310753, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011296267512079794, "epoch": 12.802332361516035, "grad_norm": 0.49920040369033813, "learning_rate": 1e-06, "loss": -0.0303, "step": 1334 }, { "clip_ratio/high_max": 0.0018872529690270312, "clip_ratio/high_mean": 0.000746536088627181, "clip_ratio/low_mean": 0.0003631767730212232, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001109712859033607, "epoch": 12.811661807580174, "grad_norm": 0.35314661264419556, "learning_rate": 1e-06, "loss": -0.0914, "step": 1335 }, { "clip_ratio/high_max": 0.0019300323474453762, "clip_ratio/high_mean": 0.0007743179548924672, "clip_ratio/low_mean": 0.0005125927036715439, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012869106394646224, "epoch": 12.820991253644316, "grad_norm": 1.4999711513519287, "learning_rate": 1e-06, "loss": -0.0287, "step": 1336 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0970982142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3757.0, "completions/mean_length": 889.4063110351562, "completions/mean_terminated_length": 544.568603515625, "completions/min_length": 112.0, "completions/min_terminated_length": 112.0, "epoch": 12.830320699708455, "grad_norm": 0.32108935713768005, "learning_rate": 1e-06, "loss": -0.038, "num_tokens": 751621692.0, "reward": 0.684151828289032, "reward_std": 0.1454431116580963, "rewards/simpleverify_reward/mean": 0.6841517686843872, "rewards/simpleverify_reward/std": 0.4649176597595215, "step": 1337 }, { "clip_ratio/high_max": 0.0020650434635172132, "clip_ratio/high_mean": 0.0007980941172718303, "clip_ratio/low_mean": 0.00045705372667725896, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012551478612294886, "epoch": 12.839650145772595, "grad_norm": 0.23357625305652618, "learning_rate": 1e-06, "loss": -0.0334, "step": 1338 }, { "clip_ratio/high_max": 0.002242599490273278, "clip_ratio/high_mean": 0.000849088248287444, "clip_ratio/low_mean": 0.0006069744999877003, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014560627314494923, "epoch": 12.848979591836734, "grad_norm": 0.26391199231147766, "learning_rate": 1e-06, "loss": -0.0212, "step": 1339 }, { "clip_ratio/high_max": 0.0024206695379689336, "clip_ratio/high_mean": 0.0008901808414520929, "clip_ratio/low_mean": 0.0006458638163167052, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015360446705017239, "epoch": 12.858309037900874, "grad_norm": 0.2500108480453491, "learning_rate": 1e-06, "loss": -0.0501, "step": 1340 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0948660714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3981.0, "completions/mean_length": 926.6099853515625, "completions/mean_terminated_length": 594.4296875, "completions/min_length": 154.0, "completions/min_terminated_length": 154.0, "epoch": 12.867638483965015, "grad_norm": 0.30966082215309143, "learning_rate": 1e-06, "loss": -0.0618, "num_tokens": 753921542.0, "reward": 0.6559709906578064, "reward_std": 0.13021866977214813, "rewards/simpleverify_reward/mean": 0.6559709906578064, "rewards/simpleverify_reward/std": 0.4751168489456177, "step": 1341 }, { "clip_ratio/high_max": 0.001251615431101527, "clip_ratio/high_mean": 0.00048412456635560375, "clip_ratio/low_mean": 0.0003877637591358507, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008718883200344862, "epoch": 12.876967930029155, "grad_norm": 0.19917502999305725, "learning_rate": 1e-06, "loss": -0.0133, "step": 1342 }, { "clip_ratio/high_max": 0.0018528601394791622, "clip_ratio/high_mean": 0.0007360603176493896, "clip_ratio/low_mean": 0.0004451492359294207, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011812095508503262, "epoch": 12.886297376093294, "grad_norm": 0.21850262582302094, "learning_rate": 1e-06, "loss": -0.0273, "step": 1343 }, { "clip_ratio/high_max": 0.0014295061700977385, "clip_ratio/high_mean": 0.0005612518971247482, "clip_ratio/low_mean": 0.0005259070312604308, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010871589292946737, "epoch": 12.895626822157434, "grad_norm": 0.18033096194267273, "learning_rate": 1e-06, "loss": -0.0426, "step": 1344 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0951450892857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 4086.0, "completions/mean_length": 880.8426513671875, "completions/mean_terminated_length": 542.770263671875, "completions/min_length": 131.0, "completions/min_terminated_length": 131.0, "epoch": 12.904956268221575, "grad_norm": 0.24492000043392181, "learning_rate": 1e-06, "loss": -0.0321, "num_tokens": 756050114.0, "reward": 0.711495578289032, "reward_std": 0.12994582951068878, "rewards/simpleverify_reward/mean": 0.7114955186843872, "rewards/simpleverify_reward/std": 0.45313015580177307, "step": 1345 }, { "clip_ratio/high_max": 0.0021945409025647677, "clip_ratio/high_mean": 0.0008126678621920291, "clip_ratio/low_mean": 0.0004270888566679787, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012397567006701138, "epoch": 12.914285714285715, "grad_norm": 0.368929922580719, "learning_rate": 1e-06, "loss": -0.0594, "step": 1346 }, { "clip_ratio/high_max": 0.001542039135529194, "clip_ratio/high_mean": 0.0005897565247323655, "clip_ratio/low_mean": 0.0003537330778726755, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009434896228412981, "epoch": 12.923615160349854, "grad_norm": 0.2606196701526642, "learning_rate": 1e-06, "loss": -0.0352, "step": 1347 }, { "clip_ratio/high_max": 0.001642464441829361, "clip_ratio/high_mean": 0.0005797995800094213, "clip_ratio/low_mean": 0.0004160650014455314, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009958645932783838, "epoch": 12.932944606413994, "grad_norm": 0.28718101978302, "learning_rate": 1e-06, "loss": -0.0305, "step": 1348 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0931919642857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 4025.0, "completions/mean_length": 879.779052734375, "completions/mean_terminated_length": 549.2504272460938, "completions/min_length": 140.0, "completions/min_terminated_length": 140.0, "epoch": 12.942274052478133, "grad_norm": 0.22267115116119385, "learning_rate": 1e-06, "loss": -0.0694, "num_tokens": 758194402.0, "reward": 0.6780134439468384, "reward_std": 0.13106472790241241, "rewards/simpleverify_reward/mean": 0.6780133843421936, "rewards/simpleverify_reward/std": 0.467303067445755, "step": 1349 }, { "clip_ratio/high_max": 0.0020010745793115348, "clip_ratio/high_mean": 0.0007203308832686162, "clip_ratio/low_mean": 0.0003323115392959153, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001052642401191406, "epoch": 12.951603498542275, "grad_norm": 0.5825513005256653, "learning_rate": 1e-06, "loss": -0.026, "step": 1350 }, { "clip_ratio/high_max": 0.0016402942492277361, "clip_ratio/high_mean": 0.0006386227778421016, "clip_ratio/low_mean": 0.00037767936555610504, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010163021433982067, "epoch": 12.960932944606414, "grad_norm": 0.24993054568767548, "learning_rate": 1e-06, "loss": -0.0441, "step": 1351 }, { "clip_ratio/high_max": 0.001411598022968974, "clip_ratio/high_mean": 0.0005116860793350497, "clip_ratio/low_mean": 0.0005118141384627961, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010235002337140031, "epoch": 12.970262390670554, "grad_norm": 0.25922107696533203, "learning_rate": 1e-06, "loss": -0.013, "step": 1352 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1194196428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3537.0, "completions/mean_length": 983.327880859375, "completions/mean_terminated_length": 561.2037353515625, "completions/min_length": 92.0, "completions/min_terminated_length": 92.0, "epoch": 13.00932944606414, "grad_norm": 0.21288909018039703, "learning_rate": 1e-06, "loss": -0.0275, "num_tokens": 760311633.0, "reward": 0.654854953289032, "reward_std": 0.12755779922008514, "rewards/simpleverify_reward/mean": 0.6548548936843872, "rewards/simpleverify_reward/std": 0.47548189759254456, "step": 1353 }, { "clip_ratio/high_max": 0.0017635549702390563, "clip_ratio/high_mean": 0.0006714282280881889, "clip_ratio/low_mean": 0.00038624469334536116, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001057672910974361, "epoch": 13.018658892128279, "grad_norm": 0.21035708487033844, "learning_rate": 1e-06, "loss": -0.0787, "step": 1354 }, { "clip_ratio/high_max": 0.0019247570453444496, "clip_ratio/high_mean": 0.0005956817512924317, "clip_ratio/low_mean": 0.0004972351875949244, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010929169329756405, "epoch": 13.02798833819242, "grad_norm": 2.409979820251465, "learning_rate": 1e-06, "loss": -0.0551, "step": 1355 }, { "clip_ratio/high_max": 0.0017222318492713384, "clip_ratio/high_mean": 0.0005945315733697498, "clip_ratio/low_mean": 0.00041835067258944036, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010128822468686849, "epoch": 13.03731778425656, "grad_norm": 0.21069835126399994, "learning_rate": 1e-06, "loss": -0.0465, "step": 1356 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0887276785714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 2996.0, "completions/mean_length": 851.8295288085938, "completions/mean_terminated_length": 535.9549560546875, "completions/min_length": 125.0, "completions/min_terminated_length": 125.0, "epoch": 13.0466472303207, "grad_norm": 0.2242615967988968, "learning_rate": 1e-06, "loss": -0.0564, "num_tokens": 762413366.0, "reward": 0.7109375596046448, "reward_std": 0.12303990870714188, "rewards/simpleverify_reward/mean": 0.7109375, "rewards/simpleverify_reward/std": 0.4533902704715729, "step": 1357 }, { "clip_ratio/high_max": 0.0017101167795772199, "clip_ratio/high_mean": 0.0006382317742463783, "clip_ratio/low_mean": 0.00039784024079381197, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001036072015267564, "epoch": 13.055976676384839, "grad_norm": 0.5287723541259766, "learning_rate": 1e-06, "loss": -0.0307, "step": 1358 }, { "clip_ratio/high_max": 0.0018137655424652621, "clip_ratio/high_mean": 0.0005980109572192305, "clip_ratio/low_mean": 0.00038279367527138675, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009808046288526384, "epoch": 13.06530612244898, "grad_norm": 0.46577081084251404, "learning_rate": 1e-06, "loss": -0.029, "step": 1359 }, { "clip_ratio/high_max": 0.0017212935163115617, "clip_ratio/high_mean": 0.0006515332388516981, "clip_ratio/low_mean": 0.0004280672624190629, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010796005044539925, "epoch": 13.07463556851312, "grad_norm": 0.2109154760837555, "learning_rate": 1e-06, "loss": -0.0409, "step": 1360 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0951450892857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 4091.0, "completions/mean_length": 913.1649169921875, "completions/mean_terminated_length": 578.4912109375, "completions/min_length": 124.0, "completions/min_terminated_length": 124.0, "epoch": 13.08396501457726, "grad_norm": 0.23666232824325562, "learning_rate": 1e-06, "loss": -0.0669, "num_tokens": 764655765.0, "reward": 0.6941964626312256, "reward_std": 0.12869644165039062, "rewards/simpleverify_reward/mean": 0.6941964030265808, "rewards/simpleverify_reward/std": 0.46081122756004333, "step": 1361 }, { "clip_ratio/high_max": 0.0018724898145592306, "clip_ratio/high_mean": 0.0006769676856492879, "clip_ratio/low_mean": 0.0003563005366231664, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010332682213629596, "epoch": 13.093294460641399, "grad_norm": 0.25531163811683655, "learning_rate": 1e-06, "loss": -0.035, "step": 1362 }, { "clip_ratio/high_max": 0.0016703742076060735, "clip_ratio/high_mean": 0.0006588136084246798, "clip_ratio/low_mean": 0.0003537131350412892, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010125267472176347, "epoch": 13.102623906705539, "grad_norm": 0.8523982167243958, "learning_rate": 1e-06, "loss": -0.0628, "step": 1363 }, { "clip_ratio/high_max": 0.0016069326229626313, "clip_ratio/high_mean": 0.0006144889593997505, "clip_ratio/low_mean": 0.00037798475955241884, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009924737278197426, "epoch": 13.11195335276968, "grad_norm": 0.2160680890083313, "learning_rate": 1e-06, "loss": -0.0195, "step": 1364 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1135602678571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3806.0, "completions/mean_length": 970.85888671875, "completions/mean_terminated_length": 570.5023803710938, "completions/min_length": 124.0, "completions/min_terminated_length": 124.0, "epoch": 13.12128279883382, "grad_norm": 0.23190490901470184, "learning_rate": 1e-06, "loss": -0.0439, "num_tokens": 766830715.0, "reward": 0.6696428656578064, "reward_std": 0.12190435081720352, "rewards/simpleverify_reward/mean": 0.6696428656578064, "rewards/simpleverify_reward/std": 0.47040730714797974, "step": 1365 }, { "clip_ratio/high_max": 0.0018416907732898835, "clip_ratio/high_mean": 0.0005891160690225661, "clip_ratio/low_mean": 0.00038452450917247916, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009736405772855505, "epoch": 13.130612244897959, "grad_norm": 0.23464612662792206, "learning_rate": 1e-06, "loss": -0.0505, "step": 1366 }, { "clip_ratio/high_max": 0.001767993475368712, "clip_ratio/high_mean": 0.0006858417746116174, "clip_ratio/low_mean": 0.0003798578709393041, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010656996601028368, "epoch": 13.139941690962099, "grad_norm": 0.9712325930595398, "learning_rate": 1e-06, "loss": -0.0536, "step": 1367 }, { "clip_ratio/high_max": 0.0018728045251918957, "clip_ratio/high_mean": 0.0006776517002435867, "clip_ratio/low_mean": 0.0005311410768626956, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001208792775287293, "epoch": 13.14927113702624, "grad_norm": 0.2614913284778595, "learning_rate": 1e-06, "loss": -0.0067, "step": 1368 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.095703125, "completions/max_length": 4096.0, "completions/max_terminated_length": 3296.0, "completions/mean_length": 889.2863159179688, "completions/mean_terminated_length": 549.914794921875, "completions/min_length": 146.0, "completions/min_terminated_length": 146.0, "epoch": 13.15860058309038, "grad_norm": 0.261981338262558, "learning_rate": 1e-06, "loss": -0.0208, "num_tokens": 768992069.0, "reward": 0.690011203289032, "reward_std": 0.15065722167491913, "rewards/simpleverify_reward/mean": 0.6900111436843872, "rewards/simpleverify_reward/std": 0.4625532031059265, "step": 1369 }, { "clip_ratio/high_max": 0.0018522586142353248, "clip_ratio/high_mean": 0.0008177502659236779, "clip_ratio/low_mean": 0.0004908811615678133, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001308631428400986, "epoch": 13.167930029154519, "grad_norm": 0.23061193525791168, "learning_rate": 1e-06, "loss": -0.0726, "step": 1370 }, { "clip_ratio/high_max": 0.0016696475067874417, "clip_ratio/high_mean": 0.0006482559019787004, "clip_ratio/low_mean": 0.00046842709843986086, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011166830081492662, "epoch": 13.177259475218658, "grad_norm": 0.29461342096328735, "learning_rate": 1e-06, "loss": -0.0244, "step": 1371 }, { "clip_ratio/high_max": 0.002148230283637531, "clip_ratio/high_mean": 0.000955799925577594, "clip_ratio/low_mean": 0.0003515655166665965, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013073654445179272, "epoch": 13.186588921282798, "grad_norm": 0.2291157841682434, "learning_rate": 1e-06, "loss": -0.1022, "step": 1372 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1077008928571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3400.0, "completions/mean_length": 939.5533447265625, "completions/mean_terminated_length": 558.5687866210938, "completions/min_length": 124.0, "completions/min_terminated_length": 124.0, "epoch": 13.19591836734694, "grad_norm": 0.2633364498615265, "learning_rate": 1e-06, "loss": -0.0271, "num_tokens": 771147684.0, "reward": 0.6690848469734192, "reward_std": 0.15076038241386414, "rewards/simpleverify_reward/mean": 0.6690848469734192, "rewards/simpleverify_reward/std": 0.47060826420783997, "step": 1373 }, { "clip_ratio/high_max": 0.0020290983957238495, "clip_ratio/high_mean": 0.0007439805631292984, "clip_ratio/low_mean": 0.00043354210288271133, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011775226703321096, "epoch": 13.205247813411079, "grad_norm": 0.2795291841030121, "learning_rate": 1e-06, "loss": -0.0368, "step": 1374 }, { "clip_ratio/high_max": 0.0019850371245411225, "clip_ratio/high_mean": 0.0007757081748422934, "clip_ratio/low_mean": 0.00035651048438012367, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011322186728648376, "epoch": 13.214577259475218, "grad_norm": 0.22403988242149353, "learning_rate": 1e-06, "loss": -0.0737, "step": 1375 }, { "clip_ratio/high_max": 0.0021948249559500255, "clip_ratio/high_mean": 0.0007896017832536018, "clip_ratio/low_mean": 0.0005586083088928717, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013482100839610212, "epoch": 13.223906705539358, "grad_norm": 0.2336522489786148, "learning_rate": 1e-06, "loss": -0.0371, "step": 1376 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0904017857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3722.0, "completions/mean_length": 883.3697509765625, "completions/mean_terminated_length": 564.0775756835938, "completions/min_length": 119.0, "completions/min_terminated_length": 119.0, "epoch": 13.2332361516035, "grad_norm": 0.23512232303619385, "learning_rate": 1e-06, "loss": -0.0383, "num_tokens": 773344729.0, "reward": 0.666573703289032, "reward_std": 0.1294286698102951, "rewards/simpleverify_reward/mean": 0.6665736436843872, "rewards/simpleverify_reward/std": 0.4715031683444977, "step": 1377 }, { "clip_ratio/high_max": 0.0015956578354234807, "clip_ratio/high_mean": 0.0006420223980967421, "clip_ratio/low_mean": 0.0002924469949903141, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009344693808088778, "epoch": 13.242565597667639, "grad_norm": 0.2456141710281372, "learning_rate": 1e-06, "loss": -0.0702, "step": 1378 }, { "clip_ratio/high_max": 0.0016567997045058291, "clip_ratio/high_mean": 0.0006539997084473725, "clip_ratio/low_mean": 0.0004228212874295423, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010768209867819678, "epoch": 13.251895043731778, "grad_norm": 0.717322051525116, "learning_rate": 1e-06, "loss": 0.013, "step": 1379 }, { "clip_ratio/high_max": 0.0018727702954492997, "clip_ratio/high_mean": 0.0007158271037042141, "clip_ratio/low_mean": 0.0003062798691644275, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010221069678664207, "epoch": 13.261224489795918, "grad_norm": 0.1752382516860962, "learning_rate": 1e-06, "loss": -0.0656, "step": 1380 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0945870535714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3715.0, "completions/mean_length": 861.316162109375, "completions/mean_terminated_length": 523.393798828125, "completions/min_length": 93.0, "completions/min_terminated_length": 93.0, "epoch": 13.270553935860057, "grad_norm": 0.2035243809223175, "learning_rate": 1e-06, "loss": -0.0599, "num_tokens": 775400270.0, "reward": 0.7020089626312256, "reward_std": 0.10921093076467514, "rewards/simpleverify_reward/mean": 0.7020089030265808, "rewards/simpleverify_reward/std": 0.4574393928050995, "step": 1381 }, { "clip_ratio/high_max": 0.002005413469305495, "clip_ratio/high_mean": 0.0005876733584955218, "clip_ratio/low_mean": 0.00031313798081100686, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009008113211166346, "epoch": 13.279883381924199, "grad_norm": 0.23004940152168274, "learning_rate": 1e-06, "loss": -0.0613, "step": 1382 }, { "clip_ratio/high_max": 0.0014666777569800615, "clip_ratio/high_mean": 0.000512198464548419, "clip_ratio/low_mean": 0.00033425547917431686, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008464539496344514, "epoch": 13.289212827988338, "grad_norm": 0.19602151215076447, "learning_rate": 1e-06, "loss": -0.0449, "step": 1383 }, { "clip_ratio/high_max": 0.001699587948678527, "clip_ratio/high_mean": 0.0006098105213823146, "clip_ratio/low_mean": 0.00039166922033473384, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010014797444455326, "epoch": 13.298542274052478, "grad_norm": 0.21829290688037872, "learning_rate": 1e-06, "loss": -0.0609, "step": 1384 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1057477678571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 4088.0, "completions/mean_length": 946.8811645507812, "completions/mean_terminated_length": 574.4891967773438, "completions/min_length": 93.0, "completions/min_terminated_length": 93.0, "epoch": 13.307871720116617, "grad_norm": 0.30542993545532227, "learning_rate": 1e-06, "loss": -0.0264, "num_tokens": 777586596.0, "reward": 0.6487165689468384, "reward_std": 0.13695351779460907, "rewards/simpleverify_reward/mean": 0.6487165093421936, "rewards/simpleverify_reward/std": 0.47743794322013855, "step": 1385 }, { "clip_ratio/high_max": 0.0016583566612098366, "clip_ratio/high_mean": 0.0006422700780603918, "clip_ratio/low_mean": 0.0004086324561285437, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010509025232749991, "epoch": 13.317201166180759, "grad_norm": 0.22802017629146576, "learning_rate": 1e-06, "loss": -0.0677, "step": 1386 }, { "clip_ratio/high_max": 0.0019478810536384117, "clip_ratio/high_mean": 0.0006334361951303435, "clip_ratio/low_mean": 0.00041980987134593306, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010532460801186971, "epoch": 13.326530612244898, "grad_norm": 0.1890718787908554, "learning_rate": 1e-06, "loss": -0.0636, "step": 1387 }, { "clip_ratio/high_max": 0.0016981562657747418, "clip_ratio/high_mean": 0.0006733030445502663, "clip_ratio/low_mean": 0.0006004402225698868, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012737432662106585, "epoch": 13.335860058309038, "grad_norm": 0.4280318319797516, "learning_rate": 1e-06, "loss": -0.0296, "step": 1388 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.08984375, "completions/max_length": 4096.0, "completions/max_terminated_length": 3396.0, "completions/mean_length": 840.1554565429688, "completions/mean_terminated_length": 518.7630615234375, "completions/min_length": 113.0, "completions/min_terminated_length": 113.0, "epoch": 13.345189504373177, "grad_norm": 0.29266786575317383, "learning_rate": 1e-06, "loss": -0.0503, "num_tokens": 779624177.0, "reward": 0.7181919813156128, "reward_std": 0.12391340732574463, "rewards/simpleverify_reward/mean": 0.7181919813156128, "rewards/simpleverify_reward/std": 0.44994303584098816, "step": 1389 }, { "clip_ratio/high_max": 0.0019305573223391548, "clip_ratio/high_mean": 0.0007609481435792986, "clip_ratio/low_mean": 0.00026734622861113166, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001028294354910031, "epoch": 13.354518950437317, "grad_norm": 0.2086375504732132, "learning_rate": 1e-06, "loss": -0.0605, "step": 1390 }, { "clip_ratio/high_max": 0.001969888835446909, "clip_ratio/high_mean": 0.0007721817382844165, "clip_ratio/low_mean": 0.0002465801029529757, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010187618136114907, "epoch": 13.363848396501458, "grad_norm": 0.20755456387996674, "learning_rate": 1e-06, "loss": -0.0686, "step": 1391 }, { "clip_ratio/high_max": 0.001732702734443592, "clip_ratio/high_mean": 0.000596791913267225, "clip_ratio/low_mean": 0.00027064163793966145, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008674335604155203, "epoch": 13.373177842565598, "grad_norm": 0.25254005193710327, "learning_rate": 1e-06, "loss": -0.0069, "step": 1392 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0948660714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3858.0, "completions/mean_length": 871.9760131835938, "completions/mean_terminated_length": 534.0696411132812, "completions/min_length": 136.0, "completions/min_terminated_length": 136.0, "epoch": 13.382507288629737, "grad_norm": 0.21995221078395844, "learning_rate": 1e-06, "loss": -0.034, "num_tokens": 781714763.0, "reward": 0.7064732313156128, "reward_std": 0.11532038450241089, "rewards/simpleverify_reward/mean": 0.7064732313156128, "rewards/simpleverify_reward/std": 0.4554412066936493, "step": 1393 }, { "clip_ratio/high_max": 0.0015890078502707183, "clip_ratio/high_mean": 0.0006059181087039178, "clip_ratio/low_mean": 0.0003575073167212395, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009634254274715204, "epoch": 13.391836734693877, "grad_norm": 1.3022396564483643, "learning_rate": 1e-06, "loss": -0.0444, "step": 1394 }, { "clip_ratio/high_max": 0.0019438958261162043, "clip_ratio/high_mean": 0.0006120850612205686, "clip_ratio/low_mean": 0.0003181603703978908, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009302454491262324, "epoch": 13.401166180758018, "grad_norm": 0.2869971692562103, "learning_rate": 1e-06, "loss": -0.0438, "step": 1395 }, { "clip_ratio/high_max": 0.0017555804843141232, "clip_ratio/high_mean": 0.000632987120297912, "clip_ratio/low_mean": 0.00039248872690222925, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010254758399241837, "epoch": 13.410495626822158, "grad_norm": 0.2125430703163147, "learning_rate": 1e-06, "loss": -0.0172, "step": 1396 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1169084821428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3623.0, "completions/mean_length": 968.74365234375, "completions/mean_terminated_length": 554.740234375, "completions/min_length": 141.0, "completions/min_terminated_length": 141.0, "epoch": 13.419825072886297, "grad_norm": 0.21314945816993713, "learning_rate": 1e-06, "loss": -0.0538, "num_tokens": 783843124.0, "reward": 0.6414620876312256, "reward_std": 0.13355320692062378, "rewards/simpleverify_reward/mean": 0.6414620280265808, "rewards/simpleverify_reward/std": 0.47963806986808777, "step": 1397 }, { "clip_ratio/high_max": 0.002262541587697342, "clip_ratio/high_mean": 0.000737805296012084, "clip_ratio/low_mean": 0.0003051896524084441, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010429949579702225, "epoch": 13.429154518950437, "grad_norm": 0.27586984634399414, "learning_rate": 1e-06, "loss": -0.0644, "step": 1398 }, { "clip_ratio/high_max": 0.001584132645803038, "clip_ratio/high_mean": 0.0006252572584344307, "clip_ratio/low_mean": 0.0002849069892363332, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009101642535824794, "epoch": 13.438483965014576, "grad_norm": 0.19062864780426025, "learning_rate": 1e-06, "loss": -0.0644, "step": 1399 }, { "clip_ratio/high_max": 0.0015464688658539671, "clip_ratio/high_mean": 0.0006244301021069987, "clip_ratio/low_mean": 0.0004627149519365048, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010871450576814823, "epoch": 13.447813411078718, "grad_norm": 0.21767592430114746, "learning_rate": 1e-06, "loss": -0.0327, "step": 1400 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0990513392857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3686.0, "completions/mean_length": 922.9855346679688, "completions/mean_terminated_length": 574.1405639648438, "completions/min_length": 103.0, "completions/min_terminated_length": 103.0, "epoch": 13.457142857142857, "grad_norm": 0.29196789860725403, "learning_rate": 1e-06, "loss": -0.0644, "num_tokens": 786058632.0, "reward": 0.6791294813156128, "reward_std": 0.12557834386825562, "rewards/simpleverify_reward/mean": 0.6791294813156128, "rewards/simpleverify_reward/std": 0.4668762683868408, "step": 1401 }, { "clip_ratio/high_max": 0.00198634931439301, "clip_ratio/high_mean": 0.000669236169414944, "clip_ratio/low_mean": 0.0003749031311599538, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010441392805660143, "epoch": 13.466472303206997, "grad_norm": 0.2207353562116623, "learning_rate": 1e-06, "loss": -0.0303, "step": 1402 }, { "clip_ratio/high_max": 0.002032542623055633, "clip_ratio/high_mean": 0.0007000008536124369, "clip_ratio/low_mean": 0.00043820115524795256, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011382020129531156, "epoch": 13.475801749271136, "grad_norm": 0.2764502763748169, "learning_rate": 1e-06, "loss": -0.056, "step": 1403 }, { "clip_ratio/high_max": 0.00228369146498153, "clip_ratio/high_mean": 0.0007882878198870458, "clip_ratio/low_mean": 0.00044291411904850975, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001231201931659598, "epoch": 13.485131195335278, "grad_norm": 0.6986338496208191, "learning_rate": 1e-06, "loss": -0.0545, "step": 1404 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1135602678571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3814.0, "completions/mean_length": 951.1389770507812, "completions/mean_terminated_length": 548.2562255859375, "completions/min_length": 132.0, "completions/min_terminated_length": 132.0, "epoch": 13.494460641399417, "grad_norm": 0.24562038481235504, "learning_rate": 1e-06, "loss": -0.0522, "num_tokens": 788165282.0, "reward": 0.6679688096046448, "reward_std": 0.12077514082193375, "rewards/simpleverify_reward/mean": 0.66796875, "rewards/simpleverify_reward/std": 0.4710078537464142, "step": 1405 }, { "clip_ratio/high_max": 0.0015636698735761456, "clip_ratio/high_mean": 0.0005458117529997253, "clip_ratio/low_mean": 0.00042507604075581185, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.000970887780567864, "epoch": 13.503790087463557, "grad_norm": 0.22943080961704254, "learning_rate": 1e-06, "loss": -0.0197, "step": 1406 }, { "clip_ratio/high_max": 0.0015982425302354386, "clip_ratio/high_mean": 0.0006434280148823746, "clip_ratio/low_mean": 0.00037332240208343137, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.00101675040423288, "epoch": 13.513119533527696, "grad_norm": 0.20886445045471191, "learning_rate": 1e-06, "loss": -0.1025, "step": 1407 }, { "clip_ratio/high_max": 0.001839226773881819, "clip_ratio/high_mean": 0.0006968725383558194, "clip_ratio/low_mean": 0.00043970778915536357, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011365803657099605, "epoch": 13.522448979591836, "grad_norm": 0.23943829536437988, "learning_rate": 1e-06, "loss": -0.0373, "step": 1408 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0968191964285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 4037.0, "completions/mean_length": 889.9342041015625, "completions/mean_terminated_length": 546.250244140625, "completions/min_length": 127.0, "completions/min_terminated_length": 127.0, "epoch": 13.531778425655977, "grad_norm": 0.23516997694969177, "learning_rate": 1e-06, "loss": -0.0343, "num_tokens": 790284926.0, "reward": 0.6875000596046448, "reward_std": 0.13573116064071655, "rewards/simpleverify_reward/mean": 0.6875, "rewards/simpleverify_reward/std": 0.4635770916938782, "step": 1409 }, { "clip_ratio/high_max": 0.001956761952897068, "clip_ratio/high_mean": 0.0007247169351103366, "clip_ratio/low_mean": 0.0002972419965772133, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010219589166808873, "epoch": 13.541107871720117, "grad_norm": 0.2601413130760193, "learning_rate": 1e-06, "loss": -0.0786, "step": 1410 }, { "clip_ratio/high_max": 0.0013954223904875107, "clip_ratio/high_mean": 0.0006066164451112854, "clip_ratio/low_mean": 0.0004460126683625276, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010526291371206753, "epoch": 13.550437317784256, "grad_norm": 0.2888765335083008, "learning_rate": 1e-06, "loss": -0.0404, "step": 1411 }, { "clip_ratio/high_max": 0.0018363310300628655, "clip_ratio/high_mean": 0.0008135862353810808, "clip_ratio/low_mean": 0.0003581803766792291, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011717666311596986, "epoch": 13.559766763848396, "grad_norm": 0.28833556175231934, "learning_rate": 1e-06, "loss": -0.0658, "step": 1412 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1068638392857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3541.0, "completions/mean_length": 924.638427734375, "completions/mean_terminated_length": 545.1846313476562, "completions/min_length": 135.0, "completions/min_terminated_length": 135.0, "epoch": 13.569096209912537, "grad_norm": 0.27648115158081055, "learning_rate": 1e-06, "loss": -0.0262, "num_tokens": 792389326.0, "reward": 0.6788504719734192, "reward_std": 0.139299213886261, "rewards/simpleverify_reward/mean": 0.6788504719734192, "rewards/simpleverify_reward/std": 0.46698322892189026, "step": 1413 }, { "clip_ratio/high_max": 0.0021280740947986487, "clip_ratio/high_mean": 0.0007552420102001633, "clip_ratio/low_mean": 0.000391119989217259, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011463619848655071, "epoch": 13.578425655976677, "grad_norm": 0.2779802978038788, "learning_rate": 1e-06, "loss": -0.026, "step": 1414 }, { "clip_ratio/high_max": 0.001904585791635327, "clip_ratio/high_mean": 0.0007882241679908475, "clip_ratio/low_mean": 0.00038184797722351504, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011700721661327407, "epoch": 13.587755102040816, "grad_norm": 0.6845740079879761, "learning_rate": 1e-06, "loss": -0.0577, "step": 1415 }, { "clip_ratio/high_max": 0.002082157625409309, "clip_ratio/high_mean": 0.0008207944920286536, "clip_ratio/low_mean": 0.0004764072164107347, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012972017393622082, "epoch": 13.597084548104956, "grad_norm": 0.3065333664417267, "learning_rate": 1e-06, "loss": -0.0549, "step": 1416 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1155133928571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3790.0, "completions/mean_length": 982.9656982421875, "completions/mean_terminated_length": 576.4053344726562, "completions/min_length": 111.0, "completions/min_terminated_length": 111.0, "epoch": 13.606413994169095, "grad_norm": 0.2265787124633789, "learning_rate": 1e-06, "loss": -0.0693, "num_tokens": 794584331.0, "reward": 0.6590402126312256, "reward_std": 0.1379466950893402, "rewards/simpleverify_reward/mean": 0.6590401530265808, "rewards/simpleverify_reward/std": 0.47409799695014954, "step": 1417 }, { "clip_ratio/high_max": 0.0019543363378033973, "clip_ratio/high_mean": 0.0007855215208110167, "clip_ratio/low_mean": 0.0003952117251628806, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011807332739408594, "epoch": 13.615743440233237, "grad_norm": 0.4911338686943054, "learning_rate": 1e-06, "loss": -0.0517, "step": 1418 }, { "clip_ratio/high_max": 0.0018853165311156772, "clip_ratio/high_mean": 0.0007034505670162616, "clip_ratio/low_mean": 0.0005340785805856285, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012375291516946163, "epoch": 13.625072886297376, "grad_norm": 0.328219473361969, "learning_rate": 1e-06, "loss": 0.0028, "step": 1419 }, { "clip_ratio/high_max": 0.0022736557803000323, "clip_ratio/high_mean": 0.000850781550980173, "clip_ratio/low_mean": 0.0002688177528398228, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001119599284720607, "epoch": 13.634402332361516, "grad_norm": 0.3365557789802551, "learning_rate": 1e-06, "loss": -0.1137, "step": 1420 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.11328125, "completions/max_length": 4096.0, "completions/max_terminated_length": 4096.0, "completions/mean_length": 979.5851440429688, "completions/mean_terminated_length": 581.4528198242188, "completions/min_length": 126.0, "completions/min_terminated_length": 126.0, "epoch": 13.643731778425655, "grad_norm": 0.2708491086959839, "learning_rate": 1e-06, "loss": -0.0854, "num_tokens": 796786292.0, "reward": 0.6771763563156128, "reward_std": 0.14764057099819183, "rewards/simpleverify_reward/mean": 0.6771763563156128, "rewards/simpleverify_reward/std": 0.4676211476325989, "step": 1421 }, { "clip_ratio/high_max": 0.0019801655944320373, "clip_ratio/high_mean": 0.0007584871127619408, "clip_ratio/low_mean": 0.00034046183554892195, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010989489419443998, "epoch": 13.653061224489797, "grad_norm": 0.2272440791130066, "learning_rate": 1e-06, "loss": -0.0332, "step": 1422 }, { "clip_ratio/high_max": 0.0016970639080682304, "clip_ratio/high_mean": 0.0007948412576297414, "clip_ratio/low_mean": 0.0003356736449404707, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011305148982501123, "epoch": 13.662390670553936, "grad_norm": 0.21907949447631836, "learning_rate": 1e-06, "loss": -0.0761, "step": 1423 }, { "clip_ratio/high_max": 0.0017670050110609736, "clip_ratio/high_mean": 0.0007654417622688925, "clip_ratio/low_mean": 0.0004816700320589007, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012471118025132455, "epoch": 13.671720116618076, "grad_norm": 0.2306557595729828, "learning_rate": 1e-06, "loss": -0.0229, "step": 1424 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0984933035714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3907.0, "completions/mean_length": 901.947021484375, "completions/mean_terminated_length": 552.983642578125, "completions/min_length": 100.0, "completions/min_terminated_length": 100.0, "epoch": 13.681049562682215, "grad_norm": 0.291649729013443, "learning_rate": 1e-06, "loss": -0.0272, "num_tokens": 798916462.0, "reward": 0.6810826063156128, "reward_std": 0.12313438951969147, "rewards/simpleverify_reward/mean": 0.6810826063156128, "rewards/simpleverify_reward/std": 0.46612197160720825, "step": 1425 }, { "clip_ratio/high_max": 0.0018193289361079223, "clip_ratio/high_mean": 0.0006051261743778014, "clip_ratio/low_mean": 0.0003238508876393098, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009289770714531187, "epoch": 13.690379008746355, "grad_norm": 0.33481365442276, "learning_rate": 1e-06, "loss": -0.0703, "step": 1426 }, { "clip_ratio/high_max": 0.0018550099885032978, "clip_ratio/high_mean": 0.0006465746455432964, "clip_ratio/low_mean": 0.00041338193477713503, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010599565976008307, "epoch": 13.699708454810496, "grad_norm": 0.27453917264938354, "learning_rate": 1e-06, "loss": -0.0273, "step": 1427 }, { "clip_ratio/high_max": 0.0018198799079982564, "clip_ratio/high_mean": 0.0006246040029509459, "clip_ratio/low_mean": 0.0004575756574922707, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010821796495292801, "epoch": 13.709037900874636, "grad_norm": 0.23001761734485626, "learning_rate": 1e-06, "loss": -0.0203, "step": 1428 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1143973214285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 4028.0, "completions/mean_length": 967.4395141601562, "completions/mean_terminated_length": 563.30908203125, "completions/min_length": 157.0, "completions/min_terminated_length": 157.0, "epoch": 13.718367346938775, "grad_norm": 0.3058474063873291, "learning_rate": 1e-06, "loss": -0.0547, "num_tokens": 801064581.0, "reward": 0.6715959906578064, "reward_std": 0.14959684014320374, "rewards/simpleverify_reward/mean": 0.6715959906578064, "rewards/simpleverify_reward/std": 0.4696981608867645, "step": 1429 }, { "clip_ratio/high_max": 0.0023121967678889632, "clip_ratio/high_mean": 0.0008226119789469521, "clip_ratio/low_mean": 0.0003072344395604887, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001129846423282288, "epoch": 13.727696793002915, "grad_norm": 0.23627729713916779, "learning_rate": 1e-06, "loss": -0.1011, "step": 1430 }, { "clip_ratio/high_max": 0.0021394586328824516, "clip_ratio/high_mean": 0.0008401936938753352, "clip_ratio/low_mean": 0.00041068469067795377, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001250878380233189, "epoch": 13.737026239067056, "grad_norm": 0.40490540862083435, "learning_rate": 1e-06, "loss": -0.0688, "step": 1431 }, { "clip_ratio/high_max": 0.002031424442975549, "clip_ratio/high_mean": 0.0007917287275631679, "clip_ratio/low_mean": 0.00031921634399623144, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001110945060645463, "epoch": 13.746355685131196, "grad_norm": 0.3457842171192169, "learning_rate": 1e-06, "loss": -0.0629, "step": 1432 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1272321428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 4089.0, "completions/mean_length": 1027.6705322265625, "completions/mean_terminated_length": 580.3692626953125, "completions/min_length": 150.0, "completions/min_terminated_length": 150.0, "epoch": 13.755685131195335, "grad_norm": 0.22895513474941254, "learning_rate": 1e-06, "loss": -0.0544, "num_tokens": 803254360.0, "reward": 0.6400669813156128, "reward_std": 0.13554410636425018, "rewards/simpleverify_reward/mean": 0.6400669813156128, "rewards/simpleverify_reward/std": 0.48004743456840515, "step": 1433 }, { "clip_ratio/high_max": 0.0016798612268758006, "clip_ratio/high_mean": 0.0006310529734037118, "clip_ratio/low_mean": 0.00033670871789581724, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009677617072156863, "epoch": 13.765014577259475, "grad_norm": 0.1850878745317459, "learning_rate": 1e-06, "loss": -0.0565, "step": 1434 }, { "clip_ratio/high_max": 0.0018957941829285119, "clip_ratio/high_mean": 0.0006431912161133368, "clip_ratio/low_mean": 0.0003849965960398549, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010281878167006653, "epoch": 13.774344023323614, "grad_norm": 0.21771684288978577, "learning_rate": 1e-06, "loss": -0.0693, "step": 1435 }, { "clip_ratio/high_max": 0.001802396302082343, "clip_ratio/high_mean": 0.0006520615838780941, "clip_ratio/low_mean": 0.0003981012778240256, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010501628403289942, "epoch": 13.783673469387756, "grad_norm": 0.2551950216293335, "learning_rate": 1e-06, "loss": -0.0338, "step": 1436 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1263950892857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3987.0, "completions/mean_length": 1003.05615234375, "completions/mean_terminated_length": 555.5621337890625, "completions/min_length": 138.0, "completions/min_terminated_length": 138.0, "epoch": 13.793002915451895, "grad_norm": 0.24202559888362885, "learning_rate": 1e-06, "loss": -0.0358, "num_tokens": 805351641.0, "reward": 0.662667453289032, "reward_std": 0.13207490742206573, "rewards/simpleverify_reward/mean": 0.6626673936843872, "rewards/simpleverify_reward/std": 0.47286540269851685, "step": 1437 }, { "clip_ratio/high_max": 0.002065873697574716, "clip_ratio/high_mean": 0.0007704324270889629, "clip_ratio/low_mean": 0.00031157137186710315, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010820038041856606, "epoch": 13.802332361516035, "grad_norm": 0.9785948395729065, "learning_rate": 1e-06, "loss": -0.0791, "step": 1438 }, { "clip_ratio/high_max": 0.0023145115992520005, "clip_ratio/high_mean": 0.0007731543901172699, "clip_ratio/low_mean": 0.0003798685597757867, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011530229385243729, "epoch": 13.811661807580174, "grad_norm": 0.23611028492450714, "learning_rate": 1e-06, "loss": -0.0634, "step": 1439 }, { "clip_ratio/high_max": 0.0022859665696159936, "clip_ratio/high_mean": 0.00083440747584973, "clip_ratio/low_mean": 0.000507960440700117, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013423678938124795, "epoch": 13.820991253644316, "grad_norm": 0.2615151107311249, "learning_rate": 1e-06, "loss": -0.0693, "step": 1440 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1336495535714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3699.0, "completions/mean_length": 1035.1839599609375, "completions/mean_terminated_length": 563.0, "completions/min_length": 148.0, "completions/min_terminated_length": 148.0, "epoch": 13.830320699708455, "grad_norm": 0.2757267653942108, "learning_rate": 1e-06, "loss": -0.0693, "num_tokens": 807459836.0, "reward": 0.650948703289032, "reward_std": 0.13250139355659485, "rewards/simpleverify_reward/mean": 0.6509486436843872, "rewards/simpleverify_reward/std": 0.47673672437667847, "step": 1441 }, { "clip_ratio/high_max": 0.001878063332696911, "clip_ratio/high_mean": 0.0005800124972665799, "clip_ratio/low_mean": 0.00047960027723092935, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010596127622193308, "epoch": 13.839650145772595, "grad_norm": 0.29817602038383484, "learning_rate": 1e-06, "loss": -0.0441, "step": 1442 }, { "clip_ratio/high_max": 0.0017079910576285329, "clip_ratio/high_mean": 0.0005452491714095231, "clip_ratio/low_mean": 0.000535171183400962, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010804203666339163, "epoch": 13.848979591836734, "grad_norm": 0.18822871148586273, "learning_rate": 1e-06, "loss": -0.0355, "step": 1443 }, { "clip_ratio/high_max": 0.0018416862039885018, "clip_ratio/high_mean": 0.0006813225445512217, "clip_ratio/low_mean": 0.0005924913593844394, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012738138939312194, "epoch": 13.858309037900874, "grad_norm": 0.30579936504364014, "learning_rate": 1e-06, "loss": -0.0412, "step": 1444 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1196986607142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3130.0, "completions/mean_length": 968.8599853515625, "completions/mean_terminated_length": 543.648193359375, "completions/min_length": 124.0, "completions/min_terminated_length": 124.0, "epoch": 13.867638483965015, "grad_norm": 0.3076058626174927, "learning_rate": 1e-06, "loss": -0.061, "num_tokens": 809522294.0, "reward": 0.664620578289032, "reward_std": 0.13296285271644592, "rewards/simpleverify_reward/mean": 0.6646205186843872, "rewards/simpleverify_reward/std": 0.472188800573349, "step": 1445 }, { "clip_ratio/high_max": 0.0017073651470127515, "clip_ratio/high_mean": 0.0006574979433935368, "clip_ratio/low_mean": 0.00041659821135908714, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010740961697592866, "epoch": 13.876967930029155, "grad_norm": 0.240012064576149, "learning_rate": 1e-06, "loss": -0.0348, "step": 1446 }, { "clip_ratio/high_max": 0.0017013302131090313, "clip_ratio/high_mean": 0.0006849740748293698, "clip_ratio/low_mean": 0.00034861694985011127, "clip_ratio/low_min": 2.8639071388170123e-05, "clip_ratio/region_mean": 0.0010335910083085764, "epoch": 13.886297376093294, "grad_norm": 0.3220127820968628, "learning_rate": 1e-06, "loss": -0.0303, "step": 1447 }, { "clip_ratio/high_max": 0.0021387139058788307, "clip_ratio/high_mean": 0.0007995177347766003, "clip_ratio/low_mean": 0.0003607101325542317, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.00116022788824921, "epoch": 13.895626822157434, "grad_norm": 0.19762931764125824, "learning_rate": 1e-06, "loss": -0.0843, "step": 1448 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1238839285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 4026.0, "completions/mean_length": 983.7196044921875, "completions/mean_terminated_length": 543.63916015625, "completions/min_length": 122.0, "completions/min_terminated_length": 122.0, "epoch": 13.904956268221575, "grad_norm": 0.24133624136447906, "learning_rate": 1e-06, "loss": -0.0643, "num_tokens": 811577617.0, "reward": 0.6679688096046448, "reward_std": 0.13137298822402954, "rewards/simpleverify_reward/mean": 0.66796875, "rewards/simpleverify_reward/std": 0.4710078537464142, "step": 1449 }, { "clip_ratio/high_max": 0.001812853648516466, "clip_ratio/high_mean": 0.0006291266313382948, "clip_ratio/low_mean": 0.00030826371630610083, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009373903430969222, "epoch": 13.914285714285715, "grad_norm": 0.18584126234054565, "learning_rate": 1e-06, "loss": -0.0846, "step": 1450 }, { "clip_ratio/high_max": 0.0017031242605298758, "clip_ratio/high_mean": 0.0006229063919818145, "clip_ratio/low_mean": 0.00025761251663425355, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008805189027043525, "epoch": 13.923615160349854, "grad_norm": 0.1733618825674057, "learning_rate": 1e-06, "loss": -0.0719, "step": 1451 }, { "clip_ratio/high_max": 0.0021805533760925755, "clip_ratio/high_mean": 0.0007481766551791225, "clip_ratio/low_mean": 0.00042493661430853535, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001173113276308868, "epoch": 13.932944606413994, "grad_norm": 1.1170430183410645, "learning_rate": 1e-06, "loss": -0.0573, "step": 1452 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.123046875, "completions/max_length": 4096.0, "completions/max_terminated_length": 2921.0, "completions/mean_length": 998.0820922851562, "completions/mean_terminated_length": 563.4075927734375, "completions/min_length": 127.0, "completions/min_terminated_length": 127.0, "epoch": 13.942274052478133, "grad_norm": 0.2578044533729553, "learning_rate": 1e-06, "loss": -0.0601, "num_tokens": 813702231.0, "reward": 0.635323703289032, "reward_std": 0.12600919604301453, "rewards/simpleverify_reward/mean": 0.6353236436843872, "rewards/simpleverify_reward/std": 0.48140645027160645, "step": 1453 }, { "clip_ratio/high_max": 0.0017034691918524913, "clip_ratio/high_mean": 0.000568199977351469, "clip_ratio/low_mean": 0.00035777241237155977, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009259723919967655, "epoch": 13.951603498542275, "grad_norm": 0.21260511875152588, "learning_rate": 1e-06, "loss": -0.0493, "step": 1454 }, { "clip_ratio/high_max": 0.0015767288459755946, "clip_ratio/high_mean": 0.000537239650839183, "clip_ratio/low_mean": 0.0004069789774803212, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009442186310479883, "epoch": 13.960932944606414, "grad_norm": 0.3167249262332916, "learning_rate": 1e-06, "loss": -0.018, "step": 1455 }, { "clip_ratio/high_max": 0.0019210395439586136, "clip_ratio/high_mean": 0.0005645713199555757, "clip_ratio/low_mean": 0.00039476963047491154, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009593409595254343, "epoch": 13.970262390670554, "grad_norm": 0.1877274364233017, "learning_rate": 1e-06, "loss": -0.0333, "step": 1456 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 4096.0, "completions/max_terminated_length": 3930.0, "completions/mean_length": 996.2463989257812, "completions/mean_terminated_length": 553.4244384765625, "completions/min_length": 121.0, "completions/min_terminated_length": 121.0, "epoch": 14.00932944606414, "grad_norm": 0.257035493850708, "learning_rate": 1e-06, "loss": -0.0501, "num_tokens": 815801026.0, "reward": 0.6699219346046448, "reward_std": 0.12435755878686905, "rewards/simpleverify_reward/mean": 0.669921875, "rewards/simpleverify_reward/std": 0.4703065752983093, "step": 1457 }, { "clip_ratio/high_max": 0.0017598652266315185, "clip_ratio/high_mean": 0.0006823039229857386, "clip_ratio/low_mean": 0.00032885251403058646, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001011156437016325, "epoch": 14.018658892128279, "grad_norm": 0.2623341977596283, "learning_rate": 1e-06, "loss": -0.0248, "step": 1458 }, { "clip_ratio/high_max": 0.0019798725443251897, "clip_ratio/high_mean": 0.000642903143670992, "clip_ratio/low_mean": 0.000434037452123448, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001076940621715039, "epoch": 14.02798833819242, "grad_norm": 0.9761454463005066, "learning_rate": 1e-06, "loss": -0.0483, "step": 1459 }, { "clip_ratio/high_max": 0.0021686724285245873, "clip_ratio/high_mean": 0.000739072773285443, "clip_ratio/low_mean": 0.00042419955525474506, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001163272372650681, "epoch": 14.03731778425656, "grad_norm": 0.269163578748703, "learning_rate": 1e-06, "loss": -0.0535, "step": 1460 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.109375, "completions/max_length": 4096.0, "completions/max_terminated_length": 3715.0, "completions/mean_length": 951.8153076171875, "completions/mean_terminated_length": 565.6873168945312, "completions/min_length": 128.0, "completions/min_terminated_length": 128.0, "epoch": 14.0466472303207, "grad_norm": 0.28160279989242554, "learning_rate": 1e-06, "loss": -0.0515, "num_tokens": 817964148.0, "reward": 0.6738281846046448, "reward_std": 0.1335342526435852, "rewards/simpleverify_reward/mean": 0.673828125, "rewards/simpleverify_reward/std": 0.468876451253891, "step": 1461 }, { "clip_ratio/high_max": 0.0016882374802662525, "clip_ratio/high_mean": 0.0005531436686396773, "clip_ratio/low_mean": 0.00037597506434394745, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009291187452618033, "epoch": 14.055976676384839, "grad_norm": 1.1307976245880127, "learning_rate": 1e-06, "loss": -0.0463, "step": 1462 }, { "clip_ratio/high_max": 0.00162481706138351, "clip_ratio/high_mean": 0.0006772424176233471, "clip_ratio/low_mean": 0.0003719575772720418, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010492000001249835, "epoch": 14.06530612244898, "grad_norm": 0.23051241040229797, "learning_rate": 1e-06, "loss": -0.0752, "step": 1463 }, { "clip_ratio/high_max": 0.0014106218186498154, "clip_ratio/high_mean": 0.0006018304502504179, "clip_ratio/low_mean": 0.0005260913858364802, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011279218342679087, "epoch": 14.07463556851312, "grad_norm": 2.616154193878174, "learning_rate": 1e-06, "loss": -0.0328, "step": 1464 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1205357142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 2910.0, "completions/mean_length": 977.8926391601562, "completions/mean_terminated_length": 550.5377197265625, "completions/min_length": 117.0, "completions/min_terminated_length": 117.0, "epoch": 14.08396501457726, "grad_norm": 0.28305238485336304, "learning_rate": 1e-06, "loss": -0.057, "num_tokens": 820049187.0, "reward": 0.7025669813156128, "reward_std": 0.13433948159217834, "rewards/simpleverify_reward/mean": 0.7025669813156128, "rewards/simpleverify_reward/std": 0.45719248056411743, "step": 1465 }, { "clip_ratio/high_max": 0.0017752682579157408, "clip_ratio/high_mean": 0.0006087112396926386, "clip_ratio/low_mean": 0.0003916982323062257, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001000409463813412, "epoch": 14.093294460641399, "grad_norm": 0.3709585964679718, "learning_rate": 1e-06, "loss": -0.0775, "step": 1466 }, { "clip_ratio/high_max": 0.0017649307010287885, "clip_ratio/high_mean": 0.0007175974023994058, "clip_ratio/low_mean": 0.00033425516573970526, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010518525650695665, "epoch": 14.102623906705539, "grad_norm": 0.19802385568618774, "learning_rate": 1e-06, "loss": -0.0843, "step": 1467 }, { "clip_ratio/high_max": 0.0016728815135138575, "clip_ratio/high_mean": 0.0006415759089577477, "clip_ratio/low_mean": 0.0005372895957407309, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011788654992415104, "epoch": 14.11195335276968, "grad_norm": 0.5499393343925476, "learning_rate": 1e-06, "loss": -0.0479, "step": 1468 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1196986607142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3488.0, "completions/mean_length": 974.0059204101562, "completions/mean_terminated_length": 549.4938354492188, "completions/min_length": 122.0, "completions/min_terminated_length": 122.0, "epoch": 14.12128279883382, "grad_norm": 0.21205177903175354, "learning_rate": 1e-06, "loss": -0.0294, "num_tokens": 822134728.0, "reward": 0.6766183376312256, "reward_std": 0.1248067170381546, "rewards/simpleverify_reward/mean": 0.6766182780265808, "rewards/simpleverify_reward/std": 0.4678322672843933, "step": 1469 }, { "clip_ratio/high_max": 0.0017665832638158463, "clip_ratio/high_mean": 0.0007070381961966632, "clip_ratio/low_mean": 0.000414185143654322, "clip_ratio/low_min": 1.4282450138125569e-05, "clip_ratio/region_mean": 0.0011212233475816902, "epoch": 14.130612244897959, "grad_norm": 0.25693151354789734, "learning_rate": 1e-06, "loss": -0.0525, "step": 1470 }, { "clip_ratio/high_max": 0.0016675422120897565, "clip_ratio/high_mean": 0.0006134913382993545, "clip_ratio/low_mean": 0.0002912058904485093, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009046972263604403, "epoch": 14.139941690962099, "grad_norm": 0.19619427621364594, "learning_rate": 1e-06, "loss": -0.0738, "step": 1471 }, { "clip_ratio/high_max": 0.0021431104651128408, "clip_ratio/high_mean": 0.0007379040180239826, "clip_ratio/low_mean": 0.0003733983978690958, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011113024156657048, "epoch": 14.14927113702624, "grad_norm": 0.21259957551956177, "learning_rate": 1e-06, "loss": -0.0667, "step": 1472 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1305803571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3718.0, "completions/mean_length": 1015.0103759765625, "completions/mean_terminated_length": 552.2686157226562, "completions/min_length": 121.0, "completions/min_terminated_length": 121.0, "epoch": 14.15860058309038, "grad_norm": 0.28357866406440735, "learning_rate": 1e-06, "loss": -0.1063, "num_tokens": 824208933.0, "reward": 0.6819196939468384, "reward_std": 0.12599878013134003, "rewards/simpleverify_reward/mean": 0.6819196343421936, "rewards/simpleverify_reward/std": 0.4657958447933197, "step": 1473 }, { "clip_ratio/high_max": 0.002120259654475376, "clip_ratio/high_mean": 0.0007779968163958983, "clip_ratio/low_mean": 0.00033594223327781947, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011139390626340173, "epoch": 14.167930029154519, "grad_norm": 0.3633739948272705, "learning_rate": 1e-06, "loss": -0.0551, "step": 1474 }, { "clip_ratio/high_max": 0.0016150159135577269, "clip_ratio/high_mean": 0.0005898562139918795, "clip_ratio/low_mean": 0.0003023923711680254, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008922485831135418, "epoch": 14.177259475218658, "grad_norm": 0.26135361194610596, "learning_rate": 1e-06, "loss": -0.0698, "step": 1475 }, { "clip_ratio/high_max": 0.001818940851080697, "clip_ratio/high_mean": 0.0005925038203713484, "clip_ratio/low_mean": 0.00030920192557459814, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009017057418532204, "epoch": 14.186588921282798, "grad_norm": 1.2202081680297852, "learning_rate": 1e-06, "loss": -0.0427, "step": 1476 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1336495535714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 2787.0, "completions/mean_length": 996.919677734375, "completions/mean_terminated_length": 518.8328247070312, "completions/min_length": 120.0, "completions/min_terminated_length": 120.0, "epoch": 14.19591836734694, "grad_norm": 0.24754464626312256, "learning_rate": 1e-06, "loss": -0.1153, "num_tokens": 826169085.0, "reward": 0.6827567219734192, "reward_std": 0.13351550698280334, "rewards/simpleverify_reward/mean": 0.6827567219734192, "rewards/simpleverify_reward/std": 0.4654679596424103, "step": 1477 }, { "clip_ratio/high_max": 0.002019547660893295, "clip_ratio/high_mean": 0.000756361490857671, "clip_ratio/low_mean": 0.00027024893279303797, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010266104382026242, "epoch": 14.205247813411079, "grad_norm": 0.2665383815765381, "learning_rate": 1e-06, "loss": -0.1055, "step": 1478 }, { "clip_ratio/high_max": 0.0019122425364912488, "clip_ratio/high_mean": 0.0006724672148266109, "clip_ratio/low_mean": 0.00041097269513556967, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010834399217856117, "epoch": 14.214577259475218, "grad_norm": 0.498045414686203, "learning_rate": 1e-06, "loss": -0.0932, "step": 1479 }, { "clip_ratio/high_max": 0.002102595775795635, "clip_ratio/high_mean": 0.0007243249410748831, "clip_ratio/low_mean": 0.0005672595070791431, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012915844308736268, "epoch": 14.223906705539358, "grad_norm": 0.26925981044769287, "learning_rate": 1e-06, "loss": -0.0366, "step": 1480 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1258370535714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3612.0, "completions/mean_length": 1001.5549926757812, "completions/mean_terminated_length": 556.1050415039062, "completions/min_length": 101.0, "completions/min_terminated_length": 101.0, "epoch": 14.2332361516035, "grad_norm": 0.25094231963157654, "learning_rate": 1e-06, "loss": -0.0617, "num_tokens": 828279234.0, "reward": 0.6629464626312256, "reward_std": 0.1241048127412796, "rewards/simpleverify_reward/mean": 0.6629464030265808, "rewards/simpleverify_reward/std": 0.4727693498134613, "step": 1481 }, { "clip_ratio/high_max": 0.0017184223579533864, "clip_ratio/high_mean": 0.0006355511859510443, "clip_ratio/low_mean": 0.0003537010202308011, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009892521975416457, "epoch": 14.242565597667639, "grad_norm": 0.31553739309310913, "learning_rate": 1e-06, "loss": -0.0581, "step": 1482 }, { "clip_ratio/high_max": 0.001699879521765979, "clip_ratio/high_mean": 0.0006396244848474453, "clip_ratio/low_mean": 0.00043134020984325616, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010709646721807076, "epoch": 14.251895043731778, "grad_norm": 0.25315365195274353, "learning_rate": 1e-06, "loss": -0.0598, "step": 1483 }, { "clip_ratio/high_max": 0.001610553847058327, "clip_ratio/high_mean": 0.00048447225958625495, "clip_ratio/low_mean": 0.00046616515692221583, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.00095063742264756, "epoch": 14.261224489795918, "grad_norm": 0.24537959694862366, "learning_rate": 1e-06, "loss": -0.0353, "step": 1484 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1319754464285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3954.0, "completions/mean_length": 1031.6409912109375, "completions/mean_terminated_length": 565.7322387695312, "completions/min_length": 128.0, "completions/min_terminated_length": 128.0, "epoch": 14.270553935860057, "grad_norm": 0.3242650032043457, "learning_rate": 1e-06, "loss": -0.0969, "num_tokens": 830383043.0, "reward": 0.6400669813156128, "reward_std": 0.13541145622730255, "rewards/simpleverify_reward/mean": 0.6400669813156128, "rewards/simpleverify_reward/std": 0.48004743456840515, "step": 1485 }, { "clip_ratio/high_max": 0.0016808089349069633, "clip_ratio/high_mean": 0.0006677212077192962, "clip_ratio/low_mean": 0.00043472720699355705, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011024484410881996, "epoch": 14.279883381924199, "grad_norm": 0.24289438128471375, "learning_rate": 1e-06, "loss": -0.0553, "step": 1486 }, { "clip_ratio/high_max": 0.001927305493154563, "clip_ratio/high_mean": 0.0006770734744350193, "clip_ratio/low_mean": 0.0004024712370664929, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010795447051350493, "epoch": 14.289212827988338, "grad_norm": 0.24343040585517883, "learning_rate": 1e-06, "loss": -0.0758, "step": 1487 }, { "clip_ratio/high_max": 0.002105100793414749, "clip_ratio/high_mean": 0.0006901806827954715, "clip_ratio/low_mean": 0.0005438578155008145, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012340384964772966, "epoch": 14.298542274052478, "grad_norm": 0.2709929347038269, "learning_rate": 1e-06, "loss": -0.0208, "step": 1488 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1286272321428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3999.0, "completions/mean_length": 1015.6886596679688, "completions/mean_terminated_length": 560.9901123046875, "completions/min_length": 129.0, "completions/min_terminated_length": 129.0, "epoch": 14.307871720116617, "grad_norm": 0.26682010293006897, "learning_rate": 1e-06, "loss": -0.0166, "num_tokens": 832484271.0, "reward": 0.6495535969734192, "reward_std": 0.1388680785894394, "rewards/simpleverify_reward/mean": 0.6495535969734192, "rewards/simpleverify_reward/std": 0.47717633843421936, "step": 1489 }, { "clip_ratio/high_max": 0.002045384913799353, "clip_ratio/high_mean": 0.0007090457174854237, "clip_ratio/low_mean": 0.00038803986899438314, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010970855819323333, "epoch": 14.317201166180759, "grad_norm": 0.23076650500297546, "learning_rate": 1e-06, "loss": -0.0877, "step": 1490 }, { "clip_ratio/high_max": 0.002157004884793423, "clip_ratio/high_mean": 0.0008168325966835255, "clip_ratio/low_mean": 0.0004142135694564786, "clip_ratio/low_min": 2.7914247766602784e-05, "clip_ratio/region_mean": 0.0012310461897868663, "epoch": 14.326530612244898, "grad_norm": 0.23570062220096588, "learning_rate": 1e-06, "loss": -0.0297, "step": 1491 }, { "clip_ratio/high_max": 0.001987045343412319, "clip_ratio/high_mean": 0.0007558735596830957, "clip_ratio/low_mean": 0.0005221279166107706, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012780014931195183, "epoch": 14.335860058309038, "grad_norm": 0.31539222598075867, "learning_rate": 1e-06, "loss": -0.0632, "step": 1492 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1266741071428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3678.0, "completions/mean_length": 1015.9874877929688, "completions/mean_terminated_length": 569.238037109375, "completions/min_length": 119.0, "completions/min_terminated_length": 119.0, "epoch": 14.345189504373177, "grad_norm": 0.25717034935951233, "learning_rate": 1e-06, "loss": -0.0319, "num_tokens": 834619722.0, "reward": 0.6498326063156128, "reward_std": 0.1472674459218979, "rewards/simpleverify_reward/mean": 0.6498326063156128, "rewards/simpleverify_reward/std": 0.4770887792110443, "step": 1493 }, { "clip_ratio/high_max": 0.002081916405586526, "clip_ratio/high_mean": 0.0008212497468775837, "clip_ratio/low_mean": 0.0003888551334512158, "clip_ratio/low_min": 1.5939811419229954e-05, "clip_ratio/region_mean": 0.001210104896017583, "epoch": 14.354518950437317, "grad_norm": 0.8526971340179443, "learning_rate": 1e-06, "loss": -0.0605, "step": 1494 }, { "clip_ratio/high_max": 0.001983191290491959, "clip_ratio/high_mean": 0.0007649265262443805, "clip_ratio/low_mean": 0.00039638816338083416, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011613146780291572, "epoch": 14.363848396501458, "grad_norm": 0.2302425354719162, "learning_rate": 1e-06, "loss": -0.0936, "step": 1495 }, { "clip_ratio/high_max": 0.0021858961044927128, "clip_ratio/high_mean": 0.0007818802532710833, "clip_ratio/low_mean": 0.0003954052526751184, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011772855250455905, "epoch": 14.373177842565598, "grad_norm": 0.26615142822265625, "learning_rate": 1e-06, "loss": -0.0745, "step": 1496 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1018415178571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3782.0, "completions/mean_length": 926.65380859375, "completions/mean_terminated_length": 567.283935546875, "completions/min_length": 135.0, "completions/min_terminated_length": 135.0, "epoch": 14.382507288629737, "grad_norm": 0.25878140330314636, "learning_rate": 1e-06, "loss": -0.0186, "num_tokens": 836788617.0, "reward": 0.6721540689468384, "reward_std": 0.1424025446176529, "rewards/simpleverify_reward/mean": 0.6721540093421936, "rewards/simpleverify_reward/std": 0.4694938659667969, "step": 1497 }, { "clip_ratio/high_max": 0.0017198733039549552, "clip_ratio/high_mean": 0.0006875689778098604, "clip_ratio/low_mean": 0.00037621458363901183, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010637835730449297, "epoch": 14.391836734693877, "grad_norm": 0.2273779958486557, "learning_rate": 1e-06, "loss": -0.0726, "step": 1498 }, { "clip_ratio/high_max": 0.0019644709027488716, "clip_ratio/high_mean": 0.0007321423836401664, "clip_ratio/low_mean": 0.0004133280544920126, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001145470456322073, "epoch": 14.401166180758018, "grad_norm": 0.25907737016677856, "learning_rate": 1e-06, "loss": -0.048, "step": 1499 }, { "clip_ratio/high_max": 0.002024045243160799, "clip_ratio/high_mean": 0.0008353213361260714, "clip_ratio/low_mean": 0.0003882976641307323, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012236189977556933, "epoch": 14.410495626822158, "grad_norm": 0.2595151662826538, "learning_rate": 1e-06, "loss": -0.0397, "step": 1500 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1319754464285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 4032.0, "completions/mean_length": 1034.146484375, "completions/mean_terminated_length": 568.6187744140625, "completions/min_length": 127.0, "completions/min_terminated_length": 127.0, "epoch": 14.419825072886297, "grad_norm": 0.23181487619876862, "learning_rate": 1e-06, "loss": -0.0492, "num_tokens": 838913326.0, "reward": 0.6682478189468384, "reward_std": 0.13257834315299988, "rewards/simpleverify_reward/mean": 0.6682477593421936, "rewards/simpleverify_reward/std": 0.4709082245826721, "step": 1501 }, { "clip_ratio/high_max": 0.0018233435330330394, "clip_ratio/high_mean": 0.000648120363621274, "clip_ratio/low_mean": 0.00045311820463211916, "clip_ratio/low_min": 9.380159099237062e-06, "clip_ratio/region_mean": 0.0011012385875801556, "epoch": 14.429154518950437, "grad_norm": 0.22719138860702515, "learning_rate": 1e-06, "loss": -0.0185, "step": 1502 }, { "clip_ratio/high_max": 0.0020507172303041443, "clip_ratio/high_mean": 0.0007355928155448055, "clip_ratio/low_mean": 0.0003289355695414997, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010645283909980208, "epoch": 14.438483965014576, "grad_norm": 0.3230571150779724, "learning_rate": 1e-06, "loss": -0.0547, "step": 1503 }, { "clip_ratio/high_max": 0.0018695968301472021, "clip_ratio/high_mean": 0.0007009762375673745, "clip_ratio/low_mean": 0.0003225858511086699, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010235620848106919, "epoch": 14.447813411078718, "grad_norm": 0.21321989595890045, "learning_rate": 1e-06, "loss": -0.1024, "step": 1504 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1174665178571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 4041.0, "completions/mean_length": 979.43701171875, "completions/mean_terminated_length": 564.6177978515625, "completions/min_length": 131.0, "completions/min_terminated_length": 131.0, "epoch": 14.457142857142857, "grad_norm": 0.25736600160598755, "learning_rate": 1e-06, "loss": -0.0579, "num_tokens": 841076100.0, "reward": 0.652901828289032, "reward_std": 0.1293831169605255, "rewards/simpleverify_reward/mean": 0.6529017686843872, "rewards/simpleverify_reward/std": 0.4761137068271637, "step": 1505 }, { "clip_ratio/high_max": 0.001910074697661912, "clip_ratio/high_mean": 0.000697032037351164, "clip_ratio/low_mean": 0.00030718194921064423, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010042140002042288, "epoch": 14.466472303206997, "grad_norm": 0.27086371183395386, "learning_rate": 1e-06, "loss": -0.0715, "step": 1506 }, { "clip_ratio/high_max": 0.0015885842658462934, "clip_ratio/high_mean": 0.0006171458608150715, "clip_ratio/low_mean": 0.0005170925269339932, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011342383913870435, "epoch": 14.475801749271136, "grad_norm": 0.29060277342796326, "learning_rate": 1e-06, "loss": -0.0112, "step": 1507 }, { "clip_ratio/high_max": 0.00209447341694613, "clip_ratio/high_mean": 0.000757915024223621, "clip_ratio/low_mean": 0.00037829890698048985, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001136213915742701, "epoch": 14.485131195335278, "grad_norm": 0.21735703945159912, "learning_rate": 1e-06, "loss": -0.0526, "step": 1508 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1277901785714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3912.0, "completions/mean_length": 999.8817138671875, "completions/mean_terminated_length": 546.259765625, "completions/min_length": 113.0, "completions/min_terminated_length": 113.0, "epoch": 14.494460641399417, "grad_norm": 0.27782538533210754, "learning_rate": 1e-06, "loss": -0.0354, "num_tokens": 843141996.0, "reward": 0.6495535969734192, "reward_std": 0.13555462658405304, "rewards/simpleverify_reward/mean": 0.6495535969734192, "rewards/simpleverify_reward/std": 0.47717633843421936, "step": 1509 }, { "clip_ratio/high_max": 0.0017873397991934326, "clip_ratio/high_mean": 0.0007010036497376859, "clip_ratio/low_mean": 0.00037619423460455437, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010771978668344673, "epoch": 14.503790087463557, "grad_norm": 0.21744512021541595, "learning_rate": 1e-06, "loss": -0.0609, "step": 1510 }, { "clip_ratio/high_max": 0.001728607625409495, "clip_ratio/high_mean": 0.000631923830951564, "clip_ratio/low_mean": 0.0005439759052023874, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011758997607103083, "epoch": 14.513119533527696, "grad_norm": 0.48800164461135864, "learning_rate": 1e-06, "loss": -0.0157, "step": 1511 }, { "clip_ratio/high_max": 0.0021881069915252738, "clip_ratio/high_mean": 0.0008708532877790276, "clip_ratio/low_mean": 0.00047672934942966094, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013475826490321197, "epoch": 14.522448979591836, "grad_norm": 0.2724487781524658, "learning_rate": 1e-06, "loss": -0.098, "step": 1512 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1096540178571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 4055.0, "completions/mean_length": 940.001708984375, "completions/mean_terminated_length": 551.3124389648438, "completions/min_length": 140.0, "completions/min_terminated_length": 140.0, "epoch": 14.531778425655977, "grad_norm": 0.2955566346645355, "learning_rate": 1e-06, "loss": -0.0254, "num_tokens": 845260962.0, "reward": 0.680245578289032, "reward_std": 0.12747327983379364, "rewards/simpleverify_reward/mean": 0.6802455186843872, "rewards/simpleverify_reward/std": 0.4664463996887207, "step": 1513 }, { "clip_ratio/high_max": 0.001928613073687302, "clip_ratio/high_mean": 0.0005987636914142058, "clip_ratio/low_mean": 0.00033929450319192256, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009380581959703704, "epoch": 14.541107871720117, "grad_norm": 0.2214987725019455, "learning_rate": 1e-06, "loss": -0.0603, "step": 1514 }, { "clip_ratio/high_max": 0.001970871460798662, "clip_ratio/high_mean": 0.0007123254672478652, "clip_ratio/low_mean": 0.00046383863445953466, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011761641144403256, "epoch": 14.550437317784256, "grad_norm": 0.32609906792640686, "learning_rate": 1e-06, "loss": -0.0418, "step": 1515 }, { "clip_ratio/high_max": 0.001763349435350392, "clip_ratio/high_mean": 0.0006283502016231068, "clip_ratio/low_mean": 0.0004096307702639024, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010379809973528609, "epoch": 14.559766763848396, "grad_norm": 0.22447293996810913, "learning_rate": 1e-06, "loss": -0.057, "step": 1516 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1241629464285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3830.0, "completions/mean_length": 980.36865234375, "completions/mean_terminated_length": 538.6814575195312, "completions/min_length": 112.0, "completions/min_terminated_length": 112.0, "epoch": 14.569096209912537, "grad_norm": 0.25725117325782776, "learning_rate": 1e-06, "loss": -0.0556, "num_tokens": 847311707.0, "reward": 0.6654576063156128, "reward_std": 0.13141179084777832, "rewards/simpleverify_reward/mean": 0.6654576063156128, "rewards/simpleverify_reward/std": 0.47189608216285706, "step": 1517 }, { "clip_ratio/high_max": 0.0019702778954524547, "clip_ratio/high_mean": 0.000729365170627716, "clip_ratio/low_mean": 0.00046982058074718225, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011991857645625714, "epoch": 14.578425655976677, "grad_norm": 0.2858355641365051, "learning_rate": 1e-06, "loss": -0.0384, "step": 1518 }, { "clip_ratio/high_max": 0.0016076383290055674, "clip_ratio/high_mean": 0.0006901400010974612, "clip_ratio/low_mean": 0.0002581866428954527, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009483266421739245, "epoch": 14.587755102040816, "grad_norm": 0.18827185034751892, "learning_rate": 1e-06, "loss": -0.0733, "step": 1519 }, { "clip_ratio/high_max": 0.0018601639385451563, "clip_ratio/high_mean": 0.0006837736827947083, "clip_ratio/low_mean": 0.0004342568627180299, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011180305264133494, "epoch": 14.597084548104956, "grad_norm": 0.3857232332229614, "learning_rate": 1e-06, "loss": -0.0476, "step": 1520 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1068638392857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 4025.0, "completions/mean_length": 924.685302734375, "completions/mean_terminated_length": 545.2371215820312, "completions/min_length": 128.0, "completions/min_terminated_length": 128.0, "epoch": 14.606413994169095, "grad_norm": 0.2379305064678192, "learning_rate": 1e-06, "loss": -0.0697, "num_tokens": 849405195.0, "reward": 0.688058078289032, "reward_std": 0.12181064486503601, "rewards/simpleverify_reward/mean": 0.6880580186843872, "rewards/simpleverify_reward/std": 0.46335092186927795, "step": 1521 }, { "clip_ratio/high_max": 0.0016910591839405242, "clip_ratio/high_mean": 0.0006397612978616962, "clip_ratio/low_mean": 0.0002575806945515069, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008973420044640079, "epoch": 14.615743440233237, "grad_norm": 0.22202369570732117, "learning_rate": 1e-06, "loss": -0.0604, "step": 1522 }, { "clip_ratio/high_max": 0.0016988092538667843, "clip_ratio/high_mean": 0.0005703534679923905, "clip_ratio/low_mean": 0.0004060049104737118, "clip_ratio/low_min": 1.2230920219735708e-05, "clip_ratio/region_mean": 0.0009763583911990281, "epoch": 14.625072886297376, "grad_norm": 0.6364647150039673, "learning_rate": 1e-06, "loss": -0.0107, "step": 1523 }, { "clip_ratio/high_max": 0.001751845844410127, "clip_ratio/high_mean": 0.0006551328124260181, "clip_ratio/low_mean": 0.000358367724402342, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010135005686606746, "epoch": 14.634402332361516, "grad_norm": 0.2614986300468445, "learning_rate": 1e-06, "loss": -0.0481, "step": 1524 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1174665178571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3687.0, "completions/mean_length": 964.2586669921875, "completions/mean_terminated_length": 547.4192504882812, "completions/min_length": 103.0, "completions/min_terminated_length": 103.0, "epoch": 14.643731778425655, "grad_norm": 0.22009484469890594, "learning_rate": 1e-06, "loss": -0.0328, "num_tokens": 851503418.0, "reward": 0.6506696939468384, "reward_std": 0.12944675981998444, "rewards/simpleverify_reward/mean": 0.6506696343421936, "rewards/simpleverify_reward/std": 0.4768250286579132, "step": 1525 }, { "clip_ratio/high_max": 0.001847645118687069, "clip_ratio/high_mean": 0.0006489855732070282, "clip_ratio/low_mean": 0.000403790790187486, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010527763624850195, "epoch": 14.653061224489797, "grad_norm": 0.29605633020401, "learning_rate": 1e-06, "loss": -0.0488, "step": 1526 }, { "clip_ratio/high_max": 0.0021392859816842247, "clip_ratio/high_mean": 0.0007754933430987876, "clip_ratio/low_mean": 0.0005310648105023574, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013065581733826548, "epoch": 14.662390670553936, "grad_norm": 0.2626442015171051, "learning_rate": 1e-06, "loss": -0.0841, "step": 1527 }, { "clip_ratio/high_max": 0.0016690422744431999, "clip_ratio/high_mean": 0.0006155456730994047, "clip_ratio/low_mean": 0.0004719517314697441, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010874974141188432, "epoch": 14.671720116618076, "grad_norm": 0.20452632009983063, "learning_rate": 1e-06, "loss": -0.0459, "step": 1528 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1216517857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3995.0, "completions/mean_length": 988.58935546875, "completions/mean_terminated_length": 558.2108764648438, "completions/min_length": 119.0, "completions/min_terminated_length": 119.0, "epoch": 14.681049562682215, "grad_norm": 0.2336050420999527, "learning_rate": 1e-06, "loss": -0.0333, "num_tokens": 853629426.0, "reward": 0.650390625, "reward_std": 0.1213955506682396, "rewards/simpleverify_reward/mean": 0.650390625, "rewards/simpleverify_reward/std": 0.47691309452056885, "step": 1529 }, { "clip_ratio/high_max": 0.0016522523837920744, "clip_ratio/high_mean": 0.0006012308476783801, "clip_ratio/low_mean": 0.00029345238544919994, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008946832458605058, "epoch": 14.690379008746355, "grad_norm": 0.1907612532377243, "learning_rate": 1e-06, "loss": -0.0729, "step": 1530 }, { "clip_ratio/high_max": 0.0018796650838339701, "clip_ratio/high_mean": 0.0006443582778956625, "clip_ratio/low_mean": 0.00038208587511689984, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010264441516483203, "epoch": 14.699708454810496, "grad_norm": 0.20349134504795074, "learning_rate": 1e-06, "loss": -0.0597, "step": 1531 }, { "clip_ratio/high_max": 0.002037887607002631, "clip_ratio/high_mean": 0.0007395272787107388, "clip_ratio/low_mean": 0.0003416582467252738, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010811854772327933, "epoch": 14.709037900874636, "grad_norm": 2.2707295417785645, "learning_rate": 1e-06, "loss": -0.0875, "step": 1532 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.099609375, "completions/max_length": 4096.0, "completions/max_terminated_length": 3913.0, "completions/mean_length": 893.4406127929688, "completions/mean_terminated_length": 539.1444091796875, "completions/min_length": 109.0, "completions/min_terminated_length": 109.0, "epoch": 14.718367346938775, "grad_norm": 0.2371624857187271, "learning_rate": 1e-06, "loss": -0.0353, "num_tokens": 855737389.0, "reward": 0.6894531846046448, "reward_std": 0.12844431400299072, "rewards/simpleverify_reward/mean": 0.689453125, "rewards/simpleverify_reward/std": 0.462782084941864, "step": 1533 }, { "clip_ratio/high_max": 0.002277500334457727, "clip_ratio/high_mean": 0.000710401313881448, "clip_ratio/low_mean": 0.00031073373293111217, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010211350290774135, "epoch": 14.727696793002915, "grad_norm": 0.7329742312431335, "learning_rate": 1e-06, "loss": -0.067, "step": 1534 }, { "clip_ratio/high_max": 0.0016415313439210877, "clip_ratio/high_mean": 0.00059917675662291, "clip_ratio/low_mean": 0.00046225591586335213, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010614326820359565, "epoch": 14.737026239067056, "grad_norm": 0.33930861949920654, "learning_rate": 1e-06, "loss": -0.0326, "step": 1535 }, { "clip_ratio/high_max": 0.0022400584784918465, "clip_ratio/high_mean": 0.0007405665928672533, "clip_ratio/low_mean": 0.0004523958014033269, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001192962397908559, "epoch": 14.746355685131196, "grad_norm": 0.20876266062259674, "learning_rate": 1e-06, "loss": -0.0696, "step": 1536 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1130022321428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3822.0, "completions/mean_length": 959.3234252929688, "completions/mean_terminated_length": 559.71533203125, "completions/min_length": 145.0, "completions/min_terminated_length": 145.0, "epoch": 14.755685131195335, "grad_norm": 0.2641276717185974, "learning_rate": 1e-06, "loss": -0.0203, "num_tokens": 857896508.0, "reward": 0.6436942219734192, "reward_std": 0.1429843157529831, "rewards/simpleverify_reward/mean": 0.6436942219734192, "rewards/simpleverify_reward/std": 0.4789738953113556, "step": 1537 }, { "clip_ratio/high_max": 0.0019419903510424774, "clip_ratio/high_mean": 0.0007168242236730293, "clip_ratio/low_mean": 0.0004587269195326371, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011755511368392035, "epoch": 14.765014577259475, "grad_norm": 0.318342387676239, "learning_rate": 1e-06, "loss": -0.0506, "step": 1538 }, { "clip_ratio/high_max": 0.0021026182766945567, "clip_ratio/high_mean": 0.0007404196512652561, "clip_ratio/low_mean": 0.0005251389620752889, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012655586106120609, "epoch": 14.774344023323614, "grad_norm": 0.23896174132823944, "learning_rate": 1e-06, "loss": -0.0582, "step": 1539 }, { "clip_ratio/high_max": 0.002166682985262014, "clip_ratio/high_mean": 0.0009173442995233927, "clip_ratio/low_mean": 0.0005186016778679914, "clip_ratio/low_min": 1.3133010725141503e-05, "clip_ratio/region_mean": 0.0014359459310071543, "epoch": 14.783673469387756, "grad_norm": 0.43713605403900146, "learning_rate": 1e-06, "loss": -0.084, "step": 1540 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1233258928571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 4010.0, "completions/mean_length": 986.1096801757812, "completions/mean_terminated_length": 548.6266479492188, "completions/min_length": 121.0, "completions/min_terminated_length": 121.0, "epoch": 14.793002915451895, "grad_norm": 0.22939631342887878, "learning_rate": 1e-06, "loss": -0.0376, "num_tokens": 859979765.0, "reward": 0.6676897406578064, "reward_std": 0.1234627366065979, "rewards/simpleverify_reward/mean": 0.6676897406578064, "rewards/simpleverify_reward/std": 0.4711073040962219, "step": 1541 }, { "clip_ratio/high_max": 0.0017176447436213493, "clip_ratio/high_mean": 0.0006021240415066131, "clip_ratio/low_mean": 0.00031006343238004774, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009121874700213084, "epoch": 14.802332361516035, "grad_norm": 1.1724590063095093, "learning_rate": 1e-06, "loss": -0.0665, "step": 1542 }, { "clip_ratio/high_max": 0.0018572095505078323, "clip_ratio/high_mean": 0.0006837701675976859, "clip_ratio/low_mean": 0.00031779890559846535, "clip_ratio/low_min": 1.44877139973687e-05, "clip_ratio/region_mean": 0.0010015690786531195, "epoch": 14.811661807580174, "grad_norm": 0.21610163152217865, "learning_rate": 1e-06, "loss": -0.0597, "step": 1543 }, { "clip_ratio/high_max": 0.002004118447075598, "clip_ratio/high_mean": 0.0008187224593712017, "clip_ratio/low_mean": 0.0004140201122027065, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012327425502007827, "epoch": 14.820991253644316, "grad_norm": 0.23821130394935608, "learning_rate": 1e-06, "loss": -0.0481, "step": 1544 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1277901785714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3795.0, "completions/mean_length": 1017.9590454101562, "completions/mean_terminated_length": 566.985595703125, "completions/min_length": 126.0, "completions/min_terminated_length": 126.0, "epoch": 14.830320699708455, "grad_norm": 0.2505877912044525, "learning_rate": 1e-06, "loss": -0.0375, "num_tokens": 862111474.0, "reward": 0.652901828289032, "reward_std": 0.12802860140800476, "rewards/simpleverify_reward/mean": 0.6529017686843872, "rewards/simpleverify_reward/std": 0.4761137068271637, "step": 1545 }, { "clip_ratio/high_max": 0.0015149488026509061, "clip_ratio/high_mean": 0.0005636097612295998, "clip_ratio/low_mean": 0.0002896779519687698, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.000853287707286654, "epoch": 14.839650145772595, "grad_norm": 0.18665611743927002, "learning_rate": 1e-06, "loss": -0.0857, "step": 1546 }, { "clip_ratio/high_max": 0.0015173944011621643, "clip_ratio/high_mean": 0.0005724406273657223, "clip_ratio/low_mean": 0.0004088763791969541, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009813169726839988, "epoch": 14.848979591836734, "grad_norm": 0.2368297576904297, "learning_rate": 1e-06, "loss": -0.0564, "step": 1547 }, { "clip_ratio/high_max": 0.0015659510354453232, "clip_ratio/high_mean": 0.0005985265952404006, "clip_ratio/low_mean": 0.00041022252435141127, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010087490882142447, "epoch": 14.858309037900874, "grad_norm": 0.2216494381427765, "learning_rate": 1e-06, "loss": -0.0235, "step": 1548 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1169084821428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3767.0, "completions/mean_length": 941.9869384765625, "completions/mean_terminated_length": 524.4413452148438, "completions/min_length": 127.0, "completions/min_terminated_length": 127.0, "epoch": 14.867638483965015, "grad_norm": 0.23349584639072418, "learning_rate": 1e-06, "loss": 0.0054, "num_tokens": 864135891.0, "reward": 0.6662946939468384, "reward_std": 0.1247420534491539, "rewards/simpleverify_reward/mean": 0.6662946343421936, "rewards/simpleverify_reward/std": 0.4716016948223114, "step": 1549 }, { "clip_ratio/high_max": 0.0019703822617884725, "clip_ratio/high_mean": 0.000753259648263338, "clip_ratio/low_mean": 0.0003380575551545917, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010913172081927769, "epoch": 14.876967930029155, "grad_norm": 0.24128304421901703, "learning_rate": 1e-06, "loss": -0.0665, "step": 1550 }, { "clip_ratio/high_max": 0.001816647089071921, "clip_ratio/high_mean": 0.0006781430010960321, "clip_ratio/low_mean": 0.0003946580761748919, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010728011002356652, "epoch": 14.886297376093294, "grad_norm": 0.24303920567035675, "learning_rate": 1e-06, "loss": -0.054, "step": 1551 }, { "clip_ratio/high_max": 0.0018581726180855185, "clip_ratio/high_mean": 0.0006583145877812058, "clip_ratio/low_mean": 0.0004120830171814305, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001070397614967078, "epoch": 14.895626822157434, "grad_norm": 0.3556899428367615, "learning_rate": 1e-06, "loss": -0.0259, "step": 1552 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1227678571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 4022.0, "completions/mean_length": 983.3016357421875, "completions/mean_terminated_length": 547.6822509765625, "completions/min_length": 124.0, "completions/min_terminated_length": 124.0, "epoch": 14.904956268221575, "grad_norm": 0.26745542883872986, "learning_rate": 1e-06, "loss": -0.0872, "num_tokens": 866199796.0, "reward": 0.6640625, "reward_std": 0.1451224982738495, "rewards/simpleverify_reward/mean": 0.6640625, "rewards/simpleverify_reward/std": 0.472383052110672, "step": 1553 }, { "clip_ratio/high_max": 0.0018350748214288615, "clip_ratio/high_mean": 0.0006775949459552066, "clip_ratio/low_mean": 0.0003944822246921831, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001072077142453054, "epoch": 14.914285714285715, "grad_norm": 0.24091646075248718, "learning_rate": 1e-06, "loss": -0.0466, "step": 1554 }, { "clip_ratio/high_max": 0.002181936433771625, "clip_ratio/high_mean": 0.0008953983779065311, "clip_ratio/low_mean": 0.0003321862395750941, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012275846384000033, "epoch": 14.923615160349854, "grad_norm": 0.20167486369609833, "learning_rate": 1e-06, "loss": -0.0937, "step": 1555 }, { "clip_ratio/high_max": 0.0018118642146873754, "clip_ratio/high_mean": 0.0006809782535128761, "clip_ratio/low_mean": 0.00044448417065723334, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011254623877903214, "epoch": 14.932944606413994, "grad_norm": 0.33500656485557556, "learning_rate": 1e-06, "loss": -0.0613, "step": 1556 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1236049107142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3079.0, "completions/mean_length": 975.1138916015625, "completions/mean_terminated_length": 534.9506225585938, "completions/min_length": 126.0, "completions/min_terminated_length": 126.0, "epoch": 14.942274052478133, "grad_norm": 0.2526389956474304, "learning_rate": 1e-06, "loss": -0.0846, "num_tokens": 868246292.0, "reward": 0.6662946939468384, "reward_std": 0.12310773134231567, "rewards/simpleverify_reward/mean": 0.6662946343421936, "rewards/simpleverify_reward/std": 0.4716016352176666, "step": 1557 }, { "clip_ratio/high_max": 0.0022618406219407916, "clip_ratio/high_mean": 0.0007901416302047437, "clip_ratio/low_mean": 0.00032224217761722684, "clip_ratio/low_min": 1.800633799575735e-05, "clip_ratio/region_mean": 0.0011123837975901552, "epoch": 14.951603498542275, "grad_norm": 0.2414168268442154, "learning_rate": 1e-06, "loss": -0.0763, "step": 1558 }, { "clip_ratio/high_max": 0.0022048150131013244, "clip_ratio/high_mean": 0.0006838166154921055, "clip_ratio/low_mean": 0.0003554622508090688, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010392788572062273, "epoch": 14.960932944606414, "grad_norm": 0.2396661639213562, "learning_rate": 1e-06, "loss": -0.031, "step": 1559 }, { "clip_ratio/high_max": 0.0018608281206979882, "clip_ratio/high_mean": 0.0005978700050945918, "clip_ratio/low_mean": 0.0005000304245186271, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001097900421882514, "epoch": 14.970262390670554, "grad_norm": 0.218532532453537, "learning_rate": 1e-06, "loss": -0.0272, "step": 1560 }, { "epoch": 14.970262390670554, "step": 1560, "total_flos": 0.0, "train_loss": -0.03081917177627043, "train_runtime": 101643.0755, "train_samples_per_second": 14.104, "train_steps_per_second": 0.016 } ], "logging_steps": 1, "max_steps": 1600, "num_input_tokens_seen": 868246292, "num_train_epochs": 15, "save_steps": 160, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }