diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,17863 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 14.895626822157434, + "eval_steps": 500, + "global_step": 1440, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.013811383928571397, + "completions/max_length": 4096.0, + "completions/max_terminated_length": 4093.0, + "completions/mean_length": 604.600341796875, + "completions/mean_terminated_length": 555.7039184570312, + "completions/min_length": 2.0, + "completions/min_terminated_length": 2.0, + "epoch": 0.009329446064139942, + "grad_norm": 0.1397363245487213, + "learning_rate": 1e-06, + "loss": -0.048, + "num_tokens": 18561316.0, + "reward": 0.4968262016773224, + "reward_std": 0.2623644471168518, + "rewards/simpleverify_reward/mean": 0.496826171875, + "rewards/simpleverify_reward/std": 0.4999985992908478, + "step": 1 + }, + { + "clip_ratio/high_max": 0.0024105336706270464, + "clip_ratio/high_mean": 0.0011643338402791414, + "clip_ratio/low_mean": 0.0006752321896783542, + "clip_ratio/low_min": 4.17824294345337e-05, + "clip_ratio/region_mean": 0.0018395660154055804, + "epoch": 0.018658892128279883, + "grad_norm": 0.16730327904224396, + "learning_rate": 1e-06, + "loss": -0.0597, + "step": 2 + }, + { + "clip_ratio/high_max": 0.0023721187353658024, + "clip_ratio/high_mean": 0.0010058305524580646, + "clip_ratio/low_mean": 0.0006981411743254284, + "clip_ratio/low_min": 7.500308038288495e-05, + "clip_ratio/region_mean": 0.0017039716913131997, + "epoch": 0.027988338192419825, + "grad_norm": 0.1430075615644455, + "learning_rate": 1e-06, + "loss": -0.0098, + "step": 3 + }, + { + "clip_ratio/high_max": 0.00247967795439763, + "clip_ratio/high_mean": 0.0011456298452685587, + "clip_ratio/low_mean": 0.0007949689115775982, + "clip_ratio/low_min": 5.361851071938872e-05, + "clip_ratio/region_mean": 0.001940598725923337, + "epoch": 0.037317784256559766, + "grad_norm": 0.12244745343923569, + "learning_rate": 1e-06, + "loss": -0.0126, + "step": 4 + }, + { + "clip_ratio/high_max": 0.002327150032215286, + "clip_ratio/high_mean": 0.001072493227184168, + "clip_ratio/low_mean": 0.000989349700830644, + "clip_ratio/low_min": 0.00019277110914117657, + "clip_ratio/region_mean": 0.0020618429261958227, + "epoch": 0.04664723032069971, + "grad_norm": 0.12621773779392242, + "learning_rate": 1e-06, + "loss": 0.0301, + "step": 5 + }, + { + "clip_ratio/high_max": 0.003110960067715496, + "clip_ratio/high_mean": 0.0013379114570852835, + "clip_ratio/low_mean": 0.0014299873473646585, + "clip_ratio/low_min": 0.0001926458553498378, + "clip_ratio/region_mean": 0.0027678988117258996, + "epoch": 0.05597667638483965, + "grad_norm": 0.14269208908081055, + "learning_rate": 1e-06, + "loss": 0.0275, + "step": 6 + }, + { + "clip_ratio/high_max": 0.002738219467573799, + "clip_ratio/high_mean": 0.0013427227968350053, + "clip_ratio/low_mean": 0.0014824369209236465, + "clip_ratio/low_min": 0.00021844370439794147, + "clip_ratio/region_mean": 0.0028251596813788638, + "epoch": 0.0653061224489796, + "grad_norm": 0.1470305621623993, + "learning_rate": 1e-06, + "loss": 0.0004, + "step": 7 + }, + { + "clip_ratio/high_max": 0.002615711866383208, + "clip_ratio/high_mean": 0.0013420677387330215, + "clip_ratio/low_mean": 0.0012677105605689576, + "clip_ratio/low_min": 1.3812154975312296e-05, + "clip_ratio/region_mean": 0.0026097782974829897, + "epoch": 0.07463556851311953, + "grad_norm": 0.14808005094528198, + "learning_rate": 1e-06, + "loss": -0.0093, + "step": 8 + }, + { + "clip_ratio/high_max": 0.003145890757878078, + "clip_ratio/high_mean": 0.001340907871053787, + "clip_ratio/low_mean": 0.0014410102194233332, + "clip_ratio/low_min": 0.00028794246645702515, + "clip_ratio/region_mean": 0.0027819181050290354, + "epoch": 0.08396501457725948, + "grad_norm": 0.2074601799249649, + "learning_rate": 1e-06, + "loss": 0.0545, + "step": 9 + }, + { + "clip_ratio/high_max": 0.0027386108340579085, + "clip_ratio/high_mean": 0.0013568961912824307, + "clip_ratio/low_mean": 0.0011387128906790167, + "clip_ratio/low_min": 7.724008537479676e-05, + "clip_ratio/region_mean": 0.0024956090419436805, + "epoch": 0.09329446064139942, + "grad_norm": 0.12807653844356537, + "learning_rate": 1e-06, + "loss": 0.0025, + "step": 10 + }, + { + "clip_ratio/high_max": 0.0025042851120815612, + "clip_ratio/high_mean": 0.0010798836501635378, + "clip_ratio/low_mean": 0.0011460282512416597, + "clip_ratio/low_min": 0.00019961351790698245, + "clip_ratio/region_mean": 0.0022259119286900386, + "epoch": 0.10262390670553936, + "grad_norm": 0.12754510343074799, + "learning_rate": 1e-06, + "loss": 0.0195, + "step": 11 + }, + { + "clip_ratio/high_max": 0.0024633969951537438, + "clip_ratio/high_mean": 0.0011423286305216607, + "clip_ratio/low_mean": 0.0011185748808202334, + "clip_ratio/low_min": 9.579515335644828e-05, + "clip_ratio/region_mean": 0.002260903529531788, + "epoch": 0.1119533527696793, + "grad_norm": 0.12464673817157745, + "learning_rate": 1e-06, + "loss": -0.0152, + "step": 12 + }, + { + "clip_ratio/high_max": 0.0028245924768270925, + "clip_ratio/high_mean": 0.0012071984747308306, + "clip_ratio/low_mean": 0.0009489665426372085, + "clip_ratio/low_min": 0.0002143360579793807, + "clip_ratio/region_mean": 0.0021561651010415517, + "epoch": 0.12128279883381925, + "grad_norm": 0.12157316505908966, + "learning_rate": 1e-06, + "loss": -0.055, + "step": 13 + }, + { + "clip_ratio/high_max": 0.0021413482863863464, + "clip_ratio/high_mean": 0.0010344708316551987, + "clip_ratio/low_mean": 0.0009623366968298797, + "clip_ratio/low_min": 0.00021045880657766247, + "clip_ratio/region_mean": 0.0019968075284850784, + "epoch": 0.1306122448979592, + "grad_norm": 0.13257591426372528, + "learning_rate": 1e-06, + "loss": 0.0125, + "step": 14 + }, + { + "clip_ratio/high_max": 0.0022448954623541795, + "clip_ratio/high_mean": 0.0010418586498417426, + "clip_ratio/low_mean": 0.0009500270971329883, + "clip_ratio/low_min": 4.0889111915021203e-05, + "clip_ratio/region_mean": 0.00199188577971654, + "epoch": 0.13994169096209913, + "grad_norm": 0.12219729274511337, + "learning_rate": 1e-06, + "loss": 0.0268, + "step": 15 + }, + { + "clip_ratio/high_max": 0.0023817530236556195, + "clip_ratio/high_mean": 0.001076888573152246, + "clip_ratio/low_mean": 0.0010004579180531437, + "clip_ratio/low_min": 0.00018702058241615305, + "clip_ratio/region_mean": 0.0020773464857484214, + "epoch": 0.14927113702623906, + "grad_norm": 0.12375976145267487, + "learning_rate": 1e-06, + "loss": -0.0197, + "step": 16 + }, + { + "clip_ratio/high_max": 0.002389628625678597, + "clip_ratio/high_mean": 0.0011601394489844097, + "clip_ratio/low_mean": 0.000944834671827266, + "clip_ratio/low_min": 0.00019620048169599613, + "clip_ratio/region_mean": 0.002104974104440771, + "epoch": 0.158600583090379, + "grad_norm": 0.13005965948104858, + "learning_rate": 1e-06, + "loss": 0.0155, + "step": 17 + }, + { + "clip_ratio/high_max": 0.002444075100356713, + "clip_ratio/high_mean": 0.0010035866580437869, + "clip_ratio/low_mean": 0.0011323059152346104, + "clip_ratio/low_min": 0.0002343704682061798, + "clip_ratio/region_mean": 0.0021358925296226516, + "epoch": 0.16793002915451896, + "grad_norm": 0.11367172002792358, + "learning_rate": 1e-06, + "loss": 0.0525, + "step": 18 + }, + { + "clip_ratio/high_max": 0.0028036852600052953, + "clip_ratio/high_mean": 0.00110845327435527, + "clip_ratio/low_mean": 0.0010357285573263653, + "clip_ratio/low_min": 0.00016398551724705612, + "clip_ratio/region_mean": 0.0021441818826133385, + "epoch": 0.1772594752186589, + "grad_norm": 0.12272893637418747, + "learning_rate": 1e-06, + "loss": 0.0291, + "step": 19 + }, + { + "clip_ratio/high_max": 0.002739102936175186, + "clip_ratio/high_mean": 0.0012600917070813011, + "clip_ratio/low_mean": 0.0009373848279210506, + "clip_ratio/low_min": 4.1304773731098976e-05, + "clip_ratio/region_mean": 0.0021974765477352776, + "epoch": 0.18658892128279883, + "grad_norm": 0.12404225766658783, + "learning_rate": 1e-06, + "loss": -0.0447, + "step": 20 + }, + { + "clip_ratio/high_max": 0.002547814503486734, + "clip_ratio/high_mean": 0.0011831440642708912, + "clip_ratio/low_mean": 0.0011259725797572173, + "clip_ratio/low_min": 0.00013308052984939422, + "clip_ratio/region_mean": 0.0023091166876838543, + "epoch": 0.19591836734693877, + "grad_norm": 0.13162827491760254, + "learning_rate": 1e-06, + "loss": 0.0289, + "step": 21 + }, + { + "clip_ratio/high_max": 0.0024744910915615037, + "clip_ratio/high_mean": 0.001208832789416192, + "clip_ratio/low_mean": 0.0011467382500995882, + "clip_ratio/low_min": 0.00014194786672305781, + "clip_ratio/region_mean": 0.002355571064981632, + "epoch": 0.20524781341107873, + "grad_norm": 0.11738279461860657, + "learning_rate": 1e-06, + "loss": -0.0238, + "step": 22 + }, + { + "clip_ratio/high_max": 0.0028532593059935607, + "clip_ratio/high_mean": 0.0012577767956827302, + "clip_ratio/low_mean": 0.001016283655189909, + "clip_ratio/low_min": 7.209397517726757e-05, + "clip_ratio/region_mean": 0.0022740604690625332, + "epoch": 0.21457725947521866, + "grad_norm": 0.12402624636888504, + "learning_rate": 1e-06, + "loss": -0.0227, + "step": 23 + }, + { + "clip_ratio/high_max": 0.0025150998117169365, + "clip_ratio/high_mean": 0.001034343171340879, + "clip_ratio/low_mean": 0.0011613182941800915, + "clip_ratio/low_min": 0.00020887598293484189, + "clip_ratio/region_mean": 0.0021956614291411825, + "epoch": 0.2239067055393586, + "grad_norm": 0.13170954585075378, + "learning_rate": 1e-06, + "loss": -0.014, + "step": 24 + }, + { + "clip_ratio/high_max": 0.0026861606020247564, + "clip_ratio/high_mean": 0.0012759202218148857, + "clip_ratio/low_mean": 0.0012954406629432924, + "clip_ratio/low_min": 0.00022114499461167725, + "clip_ratio/region_mean": 0.0025713608774822205, + "epoch": 0.23323615160349853, + "grad_norm": 0.12683427333831787, + "learning_rate": 1e-06, + "loss": 0.0117, + "step": 25 + }, + { + "clip_ratio/high_max": 0.0024499935971107334, + "clip_ratio/high_mean": 0.0011090274092566688, + "clip_ratio/low_mean": 0.001383901217195671, + "clip_ratio/low_min": 0.0003855242366626044, + "clip_ratio/region_mean": 0.0024929286373662762, + "epoch": 0.2425655976676385, + "grad_norm": 0.12441050261259079, + "learning_rate": 1e-06, + "loss": 0.017, + "step": 26 + }, + { + "clip_ratio/high_max": 0.0027391781550250016, + "clip_ratio/high_mean": 0.001410774842952378, + "clip_ratio/low_mean": 0.0013861599618394393, + "clip_ratio/low_min": 0.0002453724773658905, + "clip_ratio/region_mean": 0.002796934793877881, + "epoch": 0.2518950437317784, + "grad_norm": 0.13218040764331818, + "learning_rate": 1e-06, + "loss": 0.0027, + "step": 27 + }, + { + "clip_ratio/high_max": 0.0027316369305481203, + "clip_ratio/high_mean": 0.0012518592266133055, + "clip_ratio/low_mean": 0.0013879930957045872, + "clip_ratio/low_min": 0.0002513652771085617, + "clip_ratio/region_mean": 0.0026398523114039563, + "epoch": 0.2612244897959184, + "grad_norm": 0.12012336403131485, + "learning_rate": 1e-06, + "loss": 0.022, + "step": 28 + }, + { + "clip_ratio/high_max": 0.00247526651219232, + "clip_ratio/high_mean": 0.0012796688424714375, + "clip_ratio/low_mean": 0.001452852968213847, + "clip_ratio/low_min": 0.00023619042349309893, + "clip_ratio/region_mean": 0.002732521796133369, + "epoch": 0.2705539358600583, + "grad_norm": 0.12793003022670746, + "learning_rate": 1e-06, + "loss": -0.0111, + "step": 29 + }, + { + "clip_ratio/high_max": 0.002717217183089815, + "clip_ratio/high_mean": 0.0012803186491510132, + "clip_ratio/low_mean": 0.001327859441516921, + "clip_ratio/low_min": 0.00014889834528730717, + "clip_ratio/region_mean": 0.002608178161608521, + "epoch": 0.27988338192419826, + "grad_norm": 0.12293572723865509, + "learning_rate": 1e-06, + "loss": -0.0498, + "step": 30 + }, + { + "clip_ratio/high_max": 0.002836730760463979, + "clip_ratio/high_mean": 0.0012897639244329184, + "clip_ratio/low_mean": 0.001453427266824292, + "clip_ratio/low_min": 0.0002986752597280429, + "clip_ratio/region_mean": 0.0027431912603788078, + "epoch": 0.2892128279883382, + "grad_norm": 0.12500274181365967, + "learning_rate": 1e-06, + "loss": 0.0282, + "step": 31 + }, + { + "clip_ratio/high_max": 0.002470735300448723, + "clip_ratio/high_mean": 0.0011536918573256116, + "clip_ratio/low_mean": 0.0013952195295132697, + "clip_ratio/low_min": 0.00020339445291028824, + "clip_ratio/region_mean": 0.0025489114195806906, + "epoch": 0.29854227405247813, + "grad_norm": 0.13123559951782227, + "learning_rate": 1e-06, + "loss": 0.0281, + "step": 32 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.015973772321428603, + "completions/max_length": 4096.0, + "completions/max_terminated_length": 4071.0, + "completions/mean_length": 605.833984375, + "completions/mean_terminated_length": 549.1778564453125, + "completions/min_length": 3.0, + "completions/min_terminated_length": 3.0, + "epoch": 0.30787172011661806, + "grad_norm": 0.1392851620912552, + "learning_rate": 1e-06, + "loss": 0.0085, + "num_tokens": 36893228.0, + "reward": 0.5213449001312256, + "reward_std": 0.2461894452571869, + "rewards/simpleverify_reward/mean": 0.5213448405265808, + "rewards/simpleverify_reward/std": 0.4995529055595398, + "step": 33 + }, + { + "clip_ratio/high_max": 0.002154866528144339, + "clip_ratio/high_mean": 0.0009558827587170526, + "clip_ratio/low_mean": 0.0006220669692993397, + "clip_ratio/low_min": 4.9355419832863845e-05, + "clip_ratio/region_mean": 0.0015779497516632546, + "epoch": 0.317201166180758, + "grad_norm": 0.14256572723388672, + "learning_rate": 1e-06, + "loss": -0.0135, + "step": 34 + }, + { + "clip_ratio/high_max": 0.002520695641578641, + "clip_ratio/high_mean": 0.0010729252098826692, + "clip_ratio/low_mean": 0.0006731027169735171, + "clip_ratio/low_min": 5.905377838644199e-05, + "clip_ratio/region_mean": 0.0017460279559600167, + "epoch": 0.32653061224489793, + "grad_norm": 0.14906616508960724, + "learning_rate": 1e-06, + "loss": 0.0183, + "step": 35 + }, + { + "clip_ratio/high_max": 0.0023729700624244288, + "clip_ratio/high_mean": 0.0010100933868670836, + "clip_ratio/low_mean": 0.000690194572598557, + "clip_ratio/low_min": 6.126456173660699e-05, + "clip_ratio/region_mean": 0.0017002879903884605, + "epoch": 0.3358600583090379, + "grad_norm": 0.13483555614948273, + "learning_rate": 1e-06, + "loss": -0.0105, + "step": 36 + }, + { + "clip_ratio/high_max": 0.002428283325571101, + "clip_ratio/high_mean": 0.0010372213218943216, + "clip_ratio/low_mean": 0.0006968513553147204, + "clip_ratio/low_min": 8.627349325251998e-05, + "clip_ratio/region_mean": 0.0017340726844849996, + "epoch": 0.34518950437317786, + "grad_norm": 0.12697209417819977, + "learning_rate": 1e-06, + "loss": 0.035, + "step": 37 + }, + { + "clip_ratio/high_max": 0.002426805476716254, + "clip_ratio/high_mean": 0.0010459239347255789, + "clip_ratio/low_mean": 0.0008609316391812172, + "clip_ratio/low_min": 2.8912171728734393e-05, + "clip_ratio/region_mean": 0.0019068555993726477, + "epoch": 0.3545189504373178, + "grad_norm": 0.12941433489322662, + "learning_rate": 1e-06, + "loss": -0.059, + "step": 38 + }, + { + "clip_ratio/high_max": 0.002500117028830573, + "clip_ratio/high_mean": 0.0009735620005812962, + "clip_ratio/low_mean": 0.0010004887990362477, + "clip_ratio/low_min": 0.00021374362040660344, + "clip_ratio/region_mean": 0.001974050741409883, + "epoch": 0.3638483965014577, + "grad_norm": 0.12562952935695648, + "learning_rate": 1e-06, + "loss": 0.004, + "step": 39 + }, + { + "clip_ratio/high_max": 0.002515172745916061, + "clip_ratio/high_mean": 0.0010752747330116108, + "clip_ratio/low_mean": 0.0009461420777370222, + "clip_ratio/low_min": 0.00011673462176986504, + "clip_ratio/region_mean": 0.0020214168616803363, + "epoch": 0.37317784256559766, + "grad_norm": 0.13119079172611237, + "learning_rate": 1e-06, + "loss": 0.0267, + "step": 40 + }, + { + "clip_ratio/high_max": 0.002653695162734948, + "clip_ratio/high_mean": 0.0011458707340352703, + "clip_ratio/low_mean": 0.0010781829259940423, + "clip_ratio/low_min": 0.0001908422373162466, + "clip_ratio/region_mean": 0.0022240536200115457, + "epoch": 0.3825072886297376, + "grad_norm": 0.1273055523633957, + "learning_rate": 1e-06, + "loss": 0.0333, + "step": 41 + }, + { + "clip_ratio/high_max": 0.002564333553891629, + "clip_ratio/high_mean": 0.001194845957797952, + "clip_ratio/low_mean": 0.0013181191789044533, + "clip_ratio/low_min": 0.0002340795017516939, + "clip_ratio/region_mean": 0.0025129651476163417, + "epoch": 0.39183673469387753, + "grad_norm": 0.13553135097026825, + "learning_rate": 1e-06, + "loss": 0.0488, + "step": 42 + }, + { + "clip_ratio/high_max": 0.002800189373374451, + "clip_ratio/high_mean": 0.0012944095906277653, + "clip_ratio/low_mean": 0.0011908328633580822, + "clip_ratio/low_min": 0.00015302571773645468, + "clip_ratio/region_mean": 0.002485242410330102, + "epoch": 0.40116618075801747, + "grad_norm": 0.1388290971517563, + "learning_rate": 1e-06, + "loss": -0.0287, + "step": 43 + }, + { + "clip_ratio/high_max": 0.0025861069443635643, + "clip_ratio/high_mean": 0.0011626795130723622, + "clip_ratio/low_mean": 0.0012623116344911978, + "clip_ratio/low_min": 0.00012071824676240794, + "clip_ratio/region_mean": 0.0024249911366496235, + "epoch": 0.41049562682215746, + "grad_norm": 0.13290996849536896, + "learning_rate": 1e-06, + "loss": 0.0106, + "step": 44 + }, + { + "clip_ratio/high_max": 0.002650840317073744, + "clip_ratio/high_mean": 0.0012656280014198273, + "clip_ratio/low_mean": 0.0010785593658511061, + "clip_ratio/low_min": 8.578704819228733e-05, + "clip_ratio/region_mean": 0.0023441873709089123, + "epoch": 0.4198250728862974, + "grad_norm": 0.12145407497882843, + "learning_rate": 1e-06, + "loss": -0.0204, + "step": 45 + }, + { + "clip_ratio/high_max": 0.0027479734053486027, + "clip_ratio/high_mean": 0.001259499247680651, + "clip_ratio/low_mean": 0.0013141406761860708, + "clip_ratio/low_min": 0.00020944891184626613, + "clip_ratio/region_mean": 0.0025736399620654993, + "epoch": 0.4291545189504373, + "grad_norm": 0.1341453492641449, + "learning_rate": 1e-06, + "loss": 0.0245, + "step": 46 + }, + { + "clip_ratio/high_max": 0.002799942667479627, + "clip_ratio/high_mean": 0.001302520242461469, + "clip_ratio/low_mean": 0.001102146818084293, + "clip_ratio/low_min": 0.00012456459808163345, + "clip_ratio/region_mean": 0.002404667073278688, + "epoch": 0.43848396501457726, + "grad_norm": 0.13179446756839752, + "learning_rate": 1e-06, + "loss": -0.0716, + "step": 47 + }, + { + "clip_ratio/high_max": 0.0019659979589050636, + "clip_ratio/high_mean": 0.0009074569661606802, + "clip_ratio/low_mean": 0.0010306584172212752, + "clip_ratio/low_min": 0.0001766020905051846, + "clip_ratio/region_mean": 0.0019381153761059977, + "epoch": 0.4478134110787172, + "grad_norm": 0.11361198127269745, + "learning_rate": 1e-06, + "loss": 0.0251, + "step": 48 + }, + { + "clip_ratio/high_max": 0.002903555490775034, + "clip_ratio/high_mean": 0.001160112125944579, + "clip_ratio/low_mean": 0.0010674140103219543, + "clip_ratio/low_min": 0.00013766964366368484, + "clip_ratio/region_mean": 0.002227526143542491, + "epoch": 0.45714285714285713, + "grad_norm": 0.12459854781627655, + "learning_rate": 1e-06, + "loss": -0.0059, + "step": 49 + }, + { + "clip_ratio/high_max": 0.0021034674427937716, + "clip_ratio/high_mean": 0.0009915491064020898, + "clip_ratio/low_mean": 0.0010019534202001523, + "clip_ratio/low_min": 8.00598563728272e-05, + "clip_ratio/region_mean": 0.001993502512050327, + "epoch": 0.46647230320699706, + "grad_norm": 0.1285279095172882, + "learning_rate": 1e-06, + "loss": 0.045, + "step": 50 + }, + { + "clip_ratio/high_max": 0.002185424615163356, + "clip_ratio/high_mean": 0.0010555866901995614, + "clip_ratio/low_mean": 0.0010579091213003267, + "clip_ratio/low_min": 0.00010131898125109728, + "clip_ratio/region_mean": 0.0021134958078619093, + "epoch": 0.47580174927113705, + "grad_norm": 0.12838125228881836, + "learning_rate": 1e-06, + "loss": -0.0143, + "step": 51 + }, + { + "clip_ratio/high_max": 0.00245238406205317, + "clip_ratio/high_mean": 0.0010763966020022053, + "clip_ratio/low_mean": 0.0010526140322326683, + "clip_ratio/low_min": 0.00015946100575092714, + "clip_ratio/region_mean": 0.002129010666976683, + "epoch": 0.485131195335277, + "grad_norm": 0.12845982611179352, + "learning_rate": 1e-06, + "loss": 0.001, + "step": 52 + }, + { + "clip_ratio/high_max": 0.0025971759750973433, + "clip_ratio/high_mean": 0.0010989311995217577, + "clip_ratio/low_mean": 0.000848133853651234, + "clip_ratio/low_min": 4.5184184273239225e-05, + "clip_ratio/region_mean": 0.0019470650586299598, + "epoch": 0.4944606413994169, + "grad_norm": 0.12556153535842896, + "learning_rate": 1e-06, + "loss": 0.0073, + "step": 53 + }, + { + "clip_ratio/high_max": 0.0018824939033947885, + "clip_ratio/high_mean": 0.0008808384827716509, + "clip_ratio/low_mean": 0.0011637852112471592, + "clip_ratio/low_min": 0.0001357701694360003, + "clip_ratio/region_mean": 0.0020446236740099266, + "epoch": 0.5037900874635568, + "grad_norm": 0.12901367247104645, + "learning_rate": 1e-06, + "loss": 0.0621, + "step": 54 + }, + { + "clip_ratio/high_max": 0.0025989841306000017, + "clip_ratio/high_mean": 0.0010859335525310598, + "clip_ratio/low_mean": 0.0011131775827379897, + "clip_ratio/low_min": 0.00014724131960974773, + "clip_ratio/region_mean": 0.00219911116437288, + "epoch": 0.5131195335276968, + "grad_norm": 0.13426685333251953, + "learning_rate": 1e-06, + "loss": 0.0392, + "step": 55 + }, + { + "clip_ratio/high_max": 0.0026810664203367196, + "clip_ratio/high_mean": 0.001214465457451297, + "clip_ratio/low_mean": 0.0010145335327251814, + "clip_ratio/low_min": 0.00021548767927015433, + "clip_ratio/region_mean": 0.002228998993814457, + "epoch": 0.5224489795918368, + "grad_norm": 0.13487224280834198, + "learning_rate": 1e-06, + "loss": -0.0119, + "step": 56 + }, + { + "clip_ratio/high_max": 0.0027388824746594764, + "clip_ratio/high_mean": 0.0012344783390290104, + "clip_ratio/low_mean": 0.0011397648631827906, + "clip_ratio/low_min": 6.230076996871503e-05, + "clip_ratio/region_mean": 0.002374243202211801, + "epoch": 0.5317784256559767, + "grad_norm": 0.1314925104379654, + "learning_rate": 1e-06, + "loss": -0.0286, + "step": 57 + }, + { + "clip_ratio/high_max": 0.0025781981530599296, + "clip_ratio/high_mean": 0.0012049272190779448, + "clip_ratio/low_mean": 0.0013043091021245345, + "clip_ratio/low_min": 0.00019696603885677177, + "clip_ratio/region_mean": 0.002509236292098649, + "epoch": 0.5411078717201167, + "grad_norm": 0.12213936448097229, + "learning_rate": 1e-06, + "loss": -0.0069, + "step": 58 + }, + { + "clip_ratio/high_max": 0.002602761422167532, + "clip_ratio/high_mean": 0.001203188716317527, + "clip_ratio/low_mean": 0.001175208184577059, + "clip_ratio/low_min": 6.338120056170737e-05, + "clip_ratio/region_mean": 0.0023783968717907555, + "epoch": 0.5504373177842565, + "grad_norm": 0.1302884817123413, + "learning_rate": 1e-06, + "loss": 0.003, + "step": 59 + }, + { + "clip_ratio/high_max": 0.00260514669935219, + "clip_ratio/high_mean": 0.0012445204774849117, + "clip_ratio/low_mean": 0.001062608298525447, + "clip_ratio/low_min": 0.0001771707375155529, + "clip_ratio/region_mean": 0.0023071288014762104, + "epoch": 0.5597667638483965, + "grad_norm": 0.12155602127313614, + "learning_rate": 1e-06, + "loss": -0.0112, + "step": 60 + }, + { + "clip_ratio/high_max": 0.0026845285610761493, + "clip_ratio/high_mean": 0.0012388419709168375, + "clip_ratio/low_mean": 0.0011018094155588187, + "clip_ratio/low_min": 0.00012176146992715076, + "clip_ratio/region_mean": 0.00234065131371608, + "epoch": 0.5690962099125364, + "grad_norm": 0.11204208433628082, + "learning_rate": 1e-06, + "loss": -0.0361, + "step": 61 + }, + { + "clip_ratio/high_max": 0.0023121083868318237, + "clip_ratio/high_mean": 0.0011512128767208196, + "clip_ratio/low_mean": 0.0011560534112504683, + "clip_ratio/low_min": 0.00012153263014624827, + "clip_ratio/region_mean": 0.0023072662224876694, + "epoch": 0.5784256559766764, + "grad_norm": 0.12549570202827454, + "learning_rate": 1e-06, + "loss": 0.0251, + "step": 62 + }, + { + "clip_ratio/high_max": 0.0026555501099210232, + "clip_ratio/high_mean": 0.0012307019424042664, + "clip_ratio/low_mean": 0.0012997674675716553, + "clip_ratio/low_min": 0.0002600036677904427, + "clip_ratio/region_mean": 0.002530469442717731, + "epoch": 0.5877551020408164, + "grad_norm": 0.13226014375686646, + "learning_rate": 1e-06, + "loss": 0.0108, + "step": 63 + }, + { + "clip_ratio/high_max": 0.002542233094573021, + "clip_ratio/high_mean": 0.0011151730759593192, + "clip_ratio/low_mean": 0.001162676668172935, + "clip_ratio/low_min": 9.582073016645154e-05, + "clip_ratio/region_mean": 0.0022778497732360847, + "epoch": 0.5970845481049563, + "grad_norm": 0.1157546266913414, + "learning_rate": 1e-06, + "loss": -0.0076, + "step": 64 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.014927455357142905, + "completions/max_length": 4096.0, + "completions/max_terminated_length": 4057.0, + "completions/mean_length": 616.0746459960938, + "completions/mean_terminated_length": 563.341064453125, + "completions/min_length": 3.0, + "completions/min_terminated_length": 3.0, + "epoch": 0.6064139941690962, + "grad_norm": 0.13193269073963165, + "learning_rate": 1e-06, + "loss": 0.0043, + "num_tokens": 55696457.0, + "reward": 0.5386091470718384, + "reward_std": 0.23196251690387726, + "rewards/simpleverify_reward/mean": 0.5386090874671936, + "rewards/simpleverify_reward/std": 0.498515784740448, + "step": 65 + }, + { + "clip_ratio/high_max": 0.002328980182937812, + "clip_ratio/high_mean": 0.0009799439176276792, + "clip_ratio/low_mean": 0.0006488139661087189, + "clip_ratio/low_min": 5.3973290050635114e-05, + "clip_ratio/region_mean": 0.001628757905564271, + "epoch": 0.6157434402332361, + "grad_norm": 0.1179637759923935, + "learning_rate": 1e-06, + "loss": -0.0024, + "step": 66 + }, + { + "clip_ratio/high_max": 0.002294747951964382, + "clip_ratio/high_mean": 0.0009669599385233596, + "clip_ratio/low_mean": 0.0005805053824587958, + "clip_ratio/low_min": 2.3299446183955297e-05, + "clip_ratio/region_mean": 0.0015474653046112508, + "epoch": 0.6250728862973761, + "grad_norm": 0.12232931703329086, + "learning_rate": 1e-06, + "loss": -0.0121, + "step": 67 + }, + { + "clip_ratio/high_max": 0.0019414879061514512, + "clip_ratio/high_mean": 0.0008062621145654703, + "clip_ratio/low_mean": 0.0006693491923215333, + "clip_ratio/low_min": 9.827238864090759e-05, + "clip_ratio/region_mean": 0.0014756113305338658, + "epoch": 0.634402332361516, + "grad_norm": 0.12383954226970673, + "learning_rate": 1e-06, + "loss": 0.0716, + "step": 68 + }, + { + "clip_ratio/high_max": 0.002290696393174585, + "clip_ratio/high_mean": 0.0010502979894226883, + "clip_ratio/low_mean": 0.0006360281422530534, + "clip_ratio/low_min": 3.04950572171947e-05, + "clip_ratio/region_mean": 0.0016863261116668582, + "epoch": 0.643731778425656, + "grad_norm": 0.12718434631824493, + "learning_rate": 1e-06, + "loss": 0.0009, + "step": 69 + }, + { + "clip_ratio/high_max": 0.0023595283746544737, + "clip_ratio/high_mean": 0.0010088914568768814, + "clip_ratio/low_mean": 0.000681449559124303, + "clip_ratio/low_min": 6.201896030688658e-05, + "clip_ratio/region_mean": 0.001690341036010068, + "epoch": 0.6530612244897959, + "grad_norm": 0.12831735610961914, + "learning_rate": 1e-06, + "loss": -0.0054, + "step": 70 + }, + { + "clip_ratio/high_max": 0.002613749631564133, + "clip_ratio/high_mean": 0.0012710145165328868, + "clip_ratio/low_mean": 0.0006204042911122087, + "clip_ratio/low_min": 3.946598280890612e-05, + "clip_ratio/region_mean": 0.001891418818559032, + "epoch": 0.6623906705539359, + "grad_norm": 0.11891865730285645, + "learning_rate": 1e-06, + "loss": -0.0785, + "step": 71 + }, + { + "clip_ratio/high_max": 0.0019938925761380233, + "clip_ratio/high_mean": 0.0008738418837310746, + "clip_ratio/low_mean": 0.0008569636047468521, + "clip_ratio/low_min": 0.00013989033141115215, + "clip_ratio/region_mean": 0.0017308055175817572, + "epoch": 0.6717201166180758, + "grad_norm": 0.12106315791606903, + "learning_rate": 1e-06, + "loss": -0.0, + "step": 72 + }, + { + "clip_ratio/high_max": 0.0020695106395578478, + "clip_ratio/high_mean": 0.0009604874248907436, + "clip_ratio/low_mean": 0.000847232684463961, + "clip_ratio/low_min": 9.820732611842686e-05, + "clip_ratio/region_mean": 0.0018077200875268318, + "epoch": 0.6810495626822157, + "grad_norm": 0.11928822845220566, + "learning_rate": 1e-06, + "loss": -0.0151, + "step": 73 + }, + { + "clip_ratio/high_max": 0.00259551964700222, + "clip_ratio/high_mean": 0.0011883105362358037, + "clip_ratio/low_mean": 0.0009380183073517401, + "clip_ratio/low_min": 0.0001481371818954358, + "clip_ratio/region_mean": 0.0021263288144837134, + "epoch": 0.6903790087463557, + "grad_norm": 0.11544797569513321, + "learning_rate": 1e-06, + "loss": -0.0434, + "step": 74 + }, + { + "clip_ratio/high_max": 0.002432831723126583, + "clip_ratio/high_mean": 0.0010206585338892182, + "clip_ratio/low_mean": 0.0010139798687305301, + "clip_ratio/low_min": 0.00013609098277811427, + "clip_ratio/region_mean": 0.0020346384335425682, + "epoch": 0.6997084548104956, + "grad_norm": 0.11292950809001923, + "learning_rate": 1e-06, + "loss": 0.0182, + "step": 75 + }, + { + "clip_ratio/high_max": 0.002513583305699285, + "clip_ratio/high_mean": 0.0010161203063034918, + "clip_ratio/low_mean": 0.0011661909265967552, + "clip_ratio/low_min": 0.00014336886943056015, + "clip_ratio/region_mean": 0.0021823112765559927, + "epoch": 0.7090379008746356, + "grad_norm": 0.12492461502552032, + "learning_rate": 1e-06, + "loss": 0.0654, + "step": 76 + }, + { + "clip_ratio/high_max": 0.0023220856455736794, + "clip_ratio/high_mean": 0.0011234527482884005, + "clip_ratio/low_mean": 0.0011635668743110728, + "clip_ratio/low_min": 0.00017433294124202803, + "clip_ratio/region_mean": 0.002287019589857664, + "epoch": 0.7183673469387755, + "grad_norm": 0.12138062715530396, + "learning_rate": 1e-06, + "loss": 0.0237, + "step": 77 + }, + { + "clip_ratio/high_max": 0.0021567578660324216, + "clip_ratio/high_mean": 0.0010381691681686789, + "clip_ratio/low_mean": 0.0011469302444311325, + "clip_ratio/low_min": 0.0001505892769273487, + "clip_ratio/region_mean": 0.0021850994307897054, + "epoch": 0.7276967930029155, + "grad_norm": 0.14336355030536652, + "learning_rate": 1e-06, + "loss": 0.0249, + "step": 78 + }, + { + "clip_ratio/high_max": 0.002793032857880462, + "clip_ratio/high_mean": 0.0012763544618792366, + "clip_ratio/low_mean": 0.0010872337788896402, + "clip_ratio/low_min": 6.249236321309581e-05, + "clip_ratio/region_mean": 0.0023635882171220146, + "epoch": 0.7370262390670554, + "grad_norm": 0.13491053879261017, + "learning_rate": 1e-06, + "loss": -0.0055, + "step": 79 + }, + { + "clip_ratio/high_max": 0.0023751558182993904, + "clip_ratio/high_mean": 0.001017768619931303, + "clip_ratio/low_mean": 0.0011549389055289794, + "clip_ratio/low_min": 9.652048993302742e-05, + "clip_ratio/region_mean": 0.0021727075218223035, + "epoch": 0.7463556851311953, + "grad_norm": 0.12230775505304337, + "learning_rate": 1e-06, + "loss": 0.0404, + "step": 80 + }, + { + "clip_ratio/high_max": 0.002409681343124248, + "clip_ratio/high_mean": 0.0011061313925893046, + "clip_ratio/low_mean": 0.0011409010257921182, + "clip_ratio/low_min": 3.7721911212429404e-05, + "clip_ratio/region_mean": 0.0022470324402092956, + "epoch": 0.7556851311953353, + "grad_norm": 0.11919881403446198, + "learning_rate": 1e-06, + "loss": -0.0187, + "step": 81 + }, + { + "clip_ratio/high_max": 0.0023957524172146805, + "clip_ratio/high_mean": 0.0011061043751396937, + "clip_ratio/low_mean": 0.0012182273931102827, + "clip_ratio/low_min": 0.0001539431141281966, + "clip_ratio/region_mean": 0.002324331711861305, + "epoch": 0.7650145772594752, + "grad_norm": 0.12190832942724228, + "learning_rate": 1e-06, + "loss": -0.0079, + "step": 82 + }, + { + "clip_ratio/high_max": 0.0023902300745248795, + "clip_ratio/high_mean": 0.0011275278434368374, + "clip_ratio/low_mean": 0.0009957713482435793, + "clip_ratio/low_min": 0.00010872465827560518, + "clip_ratio/region_mean": 0.002123299171216786, + "epoch": 0.7743440233236152, + "grad_norm": 0.11692412942647934, + "learning_rate": 1e-06, + "loss": -0.0226, + "step": 83 + }, + { + "clip_ratio/high_max": 0.0024930099170887843, + "clip_ratio/high_mean": 0.0010037593092420138, + "clip_ratio/low_mean": 0.0011639502772595733, + "clip_ratio/low_min": 0.00019019318006030517, + "clip_ratio/region_mean": 0.00216770960832946, + "epoch": 0.7836734693877551, + "grad_norm": 0.1270606964826584, + "learning_rate": 1e-06, + "loss": 0.0167, + "step": 84 + }, + { + "clip_ratio/high_max": 0.002333626318431925, + "clip_ratio/high_mean": 0.0010789451935124816, + "clip_ratio/low_mean": 0.0011098564245912712, + "clip_ratio/low_min": 0.00016086923460534308, + "clip_ratio/region_mean": 0.002188801568991039, + "epoch": 0.793002915451895, + "grad_norm": 0.13270266354084015, + "learning_rate": 1e-06, + "loss": 0.0276, + "step": 85 + }, + { + "clip_ratio/high_max": 0.002080409445625264, + "clip_ratio/high_mean": 0.0009758403903106228, + "clip_ratio/low_mean": 0.001007801060040947, + "clip_ratio/low_min": 0.00010497249058971647, + "clip_ratio/region_mean": 0.0019836414576275274, + "epoch": 0.8023323615160349, + "grad_norm": 0.12897315621376038, + "learning_rate": 1e-06, + "loss": 0.0203, + "step": 86 + }, + { + "clip_ratio/high_max": 0.0024895572860259563, + "clip_ratio/high_mean": 0.0010984627297148108, + "clip_ratio/low_mean": 0.0011370503343641758, + "clip_ratio/low_min": 0.00016351762496924493, + "clip_ratio/region_mean": 0.002235513020423241, + "epoch": 0.8116618075801749, + "grad_norm": 0.12234677374362946, + "learning_rate": 1e-06, + "loss": -0.0097, + "step": 87 + }, + { + "clip_ratio/high_max": 0.002670867368578911, + "clip_ratio/high_mean": 0.0011942313103645574, + "clip_ratio/low_mean": 0.0009526853827992454, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.002146916667697951, + "epoch": 0.8209912536443149, + "grad_norm": 0.12400404363870621, + "learning_rate": 1e-06, + "loss": -0.0188, + "step": 88 + }, + { + "clip_ratio/high_max": 0.0024363375050597824, + "clip_ratio/high_mean": 0.0011551420502655674, + "clip_ratio/low_mean": 0.001058746944181621, + "clip_ratio/low_min": 0.00020158013012405718, + "clip_ratio/region_mean": 0.002213888983533252, + "epoch": 0.8303206997084548, + "grad_norm": 0.12443777173757553, + "learning_rate": 1e-06, + "loss": -0.0388, + "step": 89 + }, + { + "clip_ratio/high_max": 0.0022709333352395333, + "clip_ratio/high_mean": 0.001171580715890741, + "clip_ratio/low_mean": 0.0010937659062619787, + "clip_ratio/low_min": 2.5980607460951433e-05, + "clip_ratio/region_mean": 0.002265346636704635, + "epoch": 0.8396501457725948, + "grad_norm": 0.11693009734153748, + "learning_rate": 1e-06, + "loss": -0.0212, + "step": 90 + }, + { + "clip_ratio/high_max": 0.0024615596048533916, + "clip_ratio/high_mean": 0.001110722660087049, + "clip_ratio/low_mean": 0.001268210409762105, + "clip_ratio/low_min": 6.843203846074175e-05, + "clip_ratio/region_mean": 0.002378933015279472, + "epoch": 0.8489795918367347, + "grad_norm": 0.12528090178966522, + "learning_rate": 1e-06, + "loss": 0.0024, + "step": 91 + }, + { + "clip_ratio/high_max": 0.002672517905011773, + "clip_ratio/high_mean": 0.001037338446622016, + "clip_ratio/low_mean": 0.0012742921826429665, + "clip_ratio/low_min": 0.0001510250622231979, + "clip_ratio/region_mean": 0.002311630640178919, + "epoch": 0.8583090379008746, + "grad_norm": 0.1276797354221344, + "learning_rate": 1e-06, + "loss": 0.0289, + "step": 92 + }, + { + "clip_ratio/high_max": 0.0025825381017057225, + "clip_ratio/high_mean": 0.0010888647302635945, + "clip_ratio/low_mean": 0.0015090270280779805, + "clip_ratio/low_min": 0.0001346614990325179, + "clip_ratio/region_mean": 0.002597891798359342, + "epoch": 0.8676384839650145, + "grad_norm": 0.12030808627605438, + "learning_rate": 1e-06, + "loss": 0.0248, + "step": 93 + }, + { + "clip_ratio/high_max": 0.002218307043222012, + "clip_ratio/high_mean": 0.0010351078635721933, + "clip_ratio/low_mean": 0.0012474247851059772, + "clip_ratio/low_min": 0.00011207708666916005, + "clip_ratio/region_mean": 0.0022825326450401917, + "epoch": 0.8769679300291545, + "grad_norm": 0.11305058002471924, + "learning_rate": 1e-06, + "loss": 0.0442, + "step": 94 + }, + { + "clip_ratio/high_max": 0.0023669503134442493, + "clip_ratio/high_mean": 0.0010745935978775378, + "clip_ratio/low_mean": 0.001287968538235873, + "clip_ratio/low_min": 0.00017820822722569574, + "clip_ratio/region_mean": 0.0023625621179235168, + "epoch": 0.8862973760932945, + "grad_norm": 0.12017299979925156, + "learning_rate": 1e-06, + "loss": -0.0124, + "step": 95 + }, + { + "clip_ratio/high_max": 0.0025466951410635374, + "clip_ratio/high_mean": 0.00105575274938019, + "clip_ratio/low_mean": 0.001242694845132064, + "clip_ratio/low_min": 4.785998498846311e-05, + "clip_ratio/region_mean": 0.0022984475508565083, + "epoch": 0.8956268221574344, + "grad_norm": 0.13566216826438904, + "learning_rate": 1e-06, + "loss": 0.0247, + "step": 96 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.015973772321428603, + "completions/max_length": 4096.0, + "completions/max_terminated_length": 4094.0, + "completions/mean_length": 608.2108154296875, + "completions/mean_terminated_length": 551.5933227539062, + "completions/min_length": 3.0, + "completions/min_terminated_length": 3.0, + "epoch": 1.00932944606414, + "grad_norm": 0.12515154480934143, + "learning_rate": 1e-06, + "loss": 0.01, + "num_tokens": 74133166.0, + "reward": 0.5551409125328064, + "reward_std": 0.2201182246208191, + "rewards/simpleverify_reward/mean": 0.5551409125328064, + "rewards/simpleverify_reward/std": 0.49695885181427, + "step": 97 + }, + { + "clip_ratio/high_max": 0.0022979457571636885, + "clip_ratio/high_mean": 0.0009395265988132451, + "clip_ratio/low_mean": 0.0005775715308118379, + "clip_ratio/low_min": 1.569563028169796e-05, + "clip_ratio/region_mean": 0.0015170981350820512, + "epoch": 1.01865889212828, + "grad_norm": 0.1306575983762741, + "learning_rate": 1e-06, + "loss": 0.0205, + "step": 98 + }, + { + "clip_ratio/high_max": 0.002436228205624502, + "clip_ratio/high_mean": 0.0009995441268983996, + "clip_ratio/low_mean": 0.0005524815983335429, + "clip_ratio/low_min": 5.513196447282098e-05, + "clip_ratio/region_mean": 0.0015520257002208382, + "epoch": 1.0279883381924197, + "grad_norm": 0.12664695084095, + "learning_rate": 1e-06, + "loss": -0.0136, + "step": 99 + }, + { + "clip_ratio/high_max": 0.002102709284372395, + "clip_ratio/high_mean": 0.0008546321787434863, + "clip_ratio/low_mean": 0.0005916793070355197, + "clip_ratio/low_min": 3.160142841807101e-05, + "clip_ratio/region_mean": 0.0014463114894169848, + "epoch": 1.0373177842565597, + "grad_norm": 0.12008006870746613, + "learning_rate": 1e-06, + "loss": 0.0151, + "step": 100 + }, + { + "clip_ratio/high_max": 0.0021189190629229415, + "clip_ratio/high_mean": 0.0008503895369358361, + "clip_ratio/low_mean": 0.0005945331467955839, + "clip_ratio/low_min": 3.941907471016748e-05, + "clip_ratio/region_mean": 0.0014449226728174835, + "epoch": 1.0466472303206997, + "grad_norm": 0.13553902506828308, + "learning_rate": 1e-06, + "loss": 0.0655, + "step": 101 + }, + { + "clip_ratio/high_max": 0.0021607013914035633, + "clip_ratio/high_mean": 0.0008976444769359659, + "clip_ratio/low_mean": 0.0006076663830754114, + "clip_ratio/low_min": 2.47099651460303e-05, + "clip_ratio/region_mean": 0.0015053108218126, + "epoch": 1.0559766763848397, + "grad_norm": 0.11779744178056717, + "learning_rate": 1e-06, + "loss": 0.0305, + "step": 102 + }, + { + "clip_ratio/high_max": 0.002528972690925002, + "clip_ratio/high_mean": 0.0009709200603538193, + "clip_ratio/low_mean": 0.0008465050232189242, + "clip_ratio/low_min": 0.00010089717852679314, + "clip_ratio/region_mean": 0.001817425072658807, + "epoch": 1.0653061224489795, + "grad_norm": 0.1320830136537552, + "learning_rate": 1e-06, + "loss": 0.0292, + "step": 103 + }, + { + "clip_ratio/high_max": 0.0023619561616214924, + "clip_ratio/high_mean": 0.0010557245223026257, + "clip_ratio/low_mean": 0.0007360655290540308, + "clip_ratio/low_min": 8.46889788590488e-05, + "clip_ratio/region_mean": 0.0017917900295287836, + "epoch": 1.0746355685131195, + "grad_norm": 0.12015572190284729, + "learning_rate": 1e-06, + "loss": -0.0462, + "step": 104 + }, + { + "clip_ratio/high_max": 0.002650026151968632, + "clip_ratio/high_mean": 0.001030772862577578, + "clip_ratio/low_mean": 0.0007596392151754117, + "clip_ratio/low_min": 1.6914749721763656e-05, + "clip_ratio/region_mean": 0.0017904120977618732, + "epoch": 1.0839650145772595, + "grad_norm": 0.12176768481731415, + "learning_rate": 1e-06, + "loss": -0.0513, + "step": 105 + }, + { + "clip_ratio/high_max": 0.001983782567549497, + "clip_ratio/high_mean": 0.0009059975091076922, + "clip_ratio/low_mean": 0.0009869285931927152, + "clip_ratio/low_min": 9.539515849610325e-05, + "clip_ratio/region_mean": 0.00189292603317881, + "epoch": 1.0932944606413995, + "grad_norm": 0.12015856802463531, + "learning_rate": 1e-06, + "loss": 0.0096, + "step": 106 + }, + { + "clip_ratio/high_max": 0.0022477297970908694, + "clip_ratio/high_mean": 0.0010037471292889677, + "clip_ratio/low_mean": 0.0009222691951435991, + "clip_ratio/low_min": 0.00017914639465743676, + "clip_ratio/region_mean": 0.001926016280776821, + "epoch": 1.1026239067055394, + "grad_norm": 0.1231522411108017, + "learning_rate": 1e-06, + "loss": -0.0026, + "step": 107 + }, + { + "clip_ratio/high_max": 0.002572487028373871, + "clip_ratio/high_mean": 0.0011812613702204544, + "clip_ratio/low_mean": 0.0010373023178544827, + "clip_ratio/low_min": 7.952180021675304e-05, + "clip_ratio/region_mean": 0.0022185637135407887, + "epoch": 1.1119533527696792, + "grad_norm": 0.12579306960105896, + "learning_rate": 1e-06, + "loss": -0.0262, + "step": 108 + }, + { + "clip_ratio/high_max": 0.0023505327699240297, + "clip_ratio/high_mean": 0.0010666778580343816, + "clip_ratio/low_mean": 0.0009655847279645968, + "clip_ratio/low_min": 4.217499645164935e-05, + "clip_ratio/region_mean": 0.002032262593274936, + "epoch": 1.1212827988338192, + "grad_norm": 0.12550635635852814, + "learning_rate": 1e-06, + "loss": 0.0071, + "step": 109 + }, + { + "clip_ratio/high_max": 0.0021835168881807476, + "clip_ratio/high_mean": 0.001035374612911255, + "clip_ratio/low_mean": 0.0010122447183675831, + "clip_ratio/low_min": 5.1894465286750346e-05, + "clip_ratio/region_mean": 0.0020476193458307534, + "epoch": 1.1306122448979592, + "grad_norm": 0.12222646176815033, + "learning_rate": 1e-06, + "loss": -0.0129, + "step": 110 + }, + { + "clip_ratio/high_max": 0.0022354327084030956, + "clip_ratio/high_mean": 0.0010449666770000476, + "clip_ratio/low_mean": 0.0011392355554562528, + "clip_ratio/low_min": 0.00027197795679967385, + "clip_ratio/region_mean": 0.002184202225180343, + "epoch": 1.1399416909620992, + "grad_norm": 0.12912093102931976, + "learning_rate": 1e-06, + "loss": 0.0143, + "step": 111 + }, + { + "clip_ratio/high_max": 0.0025578335698810406, + "clip_ratio/high_mean": 0.0010727523331297562, + "clip_ratio/low_mean": 0.001113414178689709, + "clip_ratio/low_min": 7.72684725234285e-05, + "clip_ratio/region_mean": 0.0021861665445612743, + "epoch": 1.149271137026239, + "grad_norm": 0.12282103300094604, + "learning_rate": 1e-06, + "loss": 0.0258, + "step": 112 + }, + { + "clip_ratio/high_max": 0.0025094529046327807, + "clip_ratio/high_mean": 0.0010429745743749663, + "clip_ratio/low_mean": 0.001107412033888977, + "clip_ratio/low_min": 0.00014175496471580118, + "clip_ratio/region_mean": 0.0021503865646081977, + "epoch": 1.158600583090379, + "grad_norm": 0.12653689086437225, + "learning_rate": 1e-06, + "loss": 0.0325, + "step": 113 + }, + { + "clip_ratio/high_max": 0.00234408591495594, + "clip_ratio/high_mean": 0.001093566283088876, + "clip_ratio/low_mean": 0.0009334662954643136, + "clip_ratio/low_min": 9.101541945710778e-05, + "clip_ratio/region_mean": 0.002027032518526539, + "epoch": 1.167930029154519, + "grad_norm": 0.1202814131975174, + "learning_rate": 1e-06, + "loss": -0.0067, + "step": 114 + }, + { + "clip_ratio/high_max": 0.002573394274804741, + "clip_ratio/high_mean": 0.0011231584248889703, + "clip_ratio/low_mean": 0.0011573454685276374, + "clip_ratio/low_min": 0.00014929425742593594, + "clip_ratio/region_mean": 0.002280503911606502, + "epoch": 1.177259475218659, + "grad_norm": 0.13579046726226807, + "learning_rate": 1e-06, + "loss": 0.0051, + "step": 115 + }, + { + "clip_ratio/high_max": 0.0018269551655976102, + "clip_ratio/high_mean": 0.0008515936824551318, + "clip_ratio/low_mean": 0.0009331661731266649, + "clip_ratio/low_min": 0.00018841306246031309, + "clip_ratio/region_mean": 0.0017847598646767437, + "epoch": 1.186588921282799, + "grad_norm": 0.10874009877443314, + "learning_rate": 1e-06, + "loss": -0.0038, + "step": 116 + }, + { + "clip_ratio/high_max": 0.002437371775158681, + "clip_ratio/high_mean": 0.0010400869177829009, + "clip_ratio/low_mean": 0.0009712751398183173, + "clip_ratio/low_min": 9.253769621864194e-05, + "clip_ratio/region_mean": 0.002011362048506271, + "epoch": 1.1959183673469387, + "grad_norm": 0.1319679468870163, + "learning_rate": 1e-06, + "loss": -0.0193, + "step": 117 + }, + { + "clip_ratio/high_max": 0.0021046325637144037, + "clip_ratio/high_mean": 0.000982396406470798, + "clip_ratio/low_mean": 0.001065889733581571, + "clip_ratio/low_min": 8.660030835017096e-05, + "clip_ratio/region_mean": 0.0020482861291384324, + "epoch": 1.2052478134110787, + "grad_norm": 0.12338002026081085, + "learning_rate": 1e-06, + "loss": 0.0261, + "step": 118 + }, + { + "clip_ratio/high_max": 0.00250383437378332, + "clip_ratio/high_mean": 0.001061306054907618, + "clip_ratio/low_mean": 0.001043384545482695, + "clip_ratio/low_min": 5.890324700885685e-05, + "clip_ratio/region_mean": 0.0021046906913397834, + "epoch": 1.2145772594752187, + "grad_norm": 0.12722188234329224, + "learning_rate": 1e-06, + "loss": -0.0103, + "step": 119 + }, + { + "clip_ratio/high_max": 0.0023383466686937027, + "clip_ratio/high_mean": 0.0011093730536231305, + "clip_ratio/low_mean": 0.0010856035423785215, + "clip_ratio/low_min": 0.00018352546248934232, + "clip_ratio/region_mean": 0.0021949766305624507, + "epoch": 1.2239067055393587, + "grad_norm": 0.13765819370746613, + "learning_rate": 1e-06, + "loss": -0.0071, + "step": 120 + }, + { + "clip_ratio/high_max": 0.00228412381693488, + "clip_ratio/high_mean": 0.0010354996848036535, + "clip_ratio/low_mean": 0.0011145055323140696, + "clip_ratio/low_min": 0.00011972517313552089, + "clip_ratio/region_mean": 0.002150005275325384, + "epoch": 1.2332361516034984, + "grad_norm": 0.12663955986499786, + "learning_rate": 1e-06, + "loss": 0.0045, + "step": 121 + }, + { + "clip_ratio/high_max": 0.002686190706299385, + "clip_ratio/high_mean": 0.0012359067841316573, + "clip_ratio/low_mean": 0.0011155900865560398, + "clip_ratio/low_min": 4.857255407841876e-05, + "clip_ratio/region_mean": 0.002351496856135782, + "epoch": 1.2425655976676384, + "grad_norm": 0.13473036885261536, + "learning_rate": 1e-06, + "loss": 0.0353, + "step": 122 + }, + { + "clip_ratio/high_max": 0.0022190124800545163, + "clip_ratio/high_mean": 0.0009583649407431949, + "clip_ratio/low_mean": 0.0012937010324094445, + "clip_ratio/low_min": 0.0002862225883291103, + "clip_ratio/region_mean": 0.00225206594041083, + "epoch": 1.2518950437317784, + "grad_norm": 0.12195934355258942, + "learning_rate": 1e-06, + "loss": 0.0541, + "step": 123 + }, + { + "clip_ratio/high_max": 0.002577113380539231, + "clip_ratio/high_mean": 0.0011613152055360842, + "clip_ratio/low_mean": 0.0010268922815157566, + "clip_ratio/low_min": 0.00016467454406665638, + "clip_ratio/region_mean": 0.0021882075307075866, + "epoch": 1.2612244897959184, + "grad_norm": 0.12652873992919922, + "learning_rate": 1e-06, + "loss": -0.0178, + "step": 124 + }, + { + "clip_ratio/high_max": 0.002815144332998898, + "clip_ratio/high_mean": 0.0012095953352400102, + "clip_ratio/low_mean": 0.0011194726648682263, + "clip_ratio/low_min": 0.00016523562953807414, + "clip_ratio/region_mean": 0.0023290679382625967, + "epoch": 1.2705539358600584, + "grad_norm": 0.14093166589736938, + "learning_rate": 1e-06, + "loss": 0.0056, + "step": 125 + }, + { + "clip_ratio/high_max": 0.002683347811398562, + "clip_ratio/high_mean": 0.0011711218357959297, + "clip_ratio/low_mean": 0.0010713923256844282, + "clip_ratio/low_min": 9.582078564562835e-05, + "clip_ratio/region_mean": 0.0022425140923587605, + "epoch": 1.2798833819241984, + "grad_norm": 0.126437246799469, + "learning_rate": 1e-06, + "loss": -0.0557, + "step": 126 + }, + { + "clip_ratio/high_max": 0.0026961914845742285, + "clip_ratio/high_mean": 0.0011760315064748283, + "clip_ratio/low_mean": 0.0011478004598757252, + "clip_ratio/low_min": 0.00014482657752523664, + "clip_ratio/region_mean": 0.002323831991816405, + "epoch": 1.2892128279883381, + "grad_norm": 0.12861402332782745, + "learning_rate": 1e-06, + "loss": 0.012, + "step": 127 + }, + { + "clip_ratio/high_max": 0.0026040639204438776, + "clip_ratio/high_mean": 0.001162967051641317, + "clip_ratio/low_mean": 0.0010441786362207495, + "clip_ratio/low_min": 0.00019241467089159414, + "clip_ratio/region_mean": 0.002207145713327918, + "epoch": 1.2985422740524781, + "grad_norm": 0.11767813563346863, + "learning_rate": 1e-06, + "loss": -0.0296, + "step": 128 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0185546875, + "completions/max_length": 4096.0, + "completions/max_terminated_length": 4092.0, + "completions/mean_length": 629.6763916015625, + "completions/mean_terminated_length": 564.1438598632812, + "completions/min_length": 11.0, + "completions/min_terminated_length": 11.0, + "epoch": 1.3078717201166181, + "grad_norm": 0.12247433513402939, + "learning_rate": 1e-06, + "loss": -0.0098, + "num_tokens": 92890087.0, + "reward": 0.5526646375656128, + "reward_std": 0.20904073119163513, + "rewards/simpleverify_reward/mean": 0.5526646375656128, + "rewards/simpleverify_reward/std": 0.49722740054130554, + "step": 129 + }, + { + "clip_ratio/high_max": 0.0019482800671539735, + "clip_ratio/high_mean": 0.0008655789733893471, + "clip_ratio/low_mean": 0.0005754319145125919, + "clip_ratio/low_min": 1.1814744539151434e-05, + "clip_ratio/region_mean": 0.0014410108924494125, + "epoch": 1.3172011661807579, + "grad_norm": 0.12304199486970901, + "learning_rate": 1e-06, + "loss": -0.0123, + "step": 130 + }, + { + "clip_ratio/high_max": 0.0020373328152345493, + "clip_ratio/high_mean": 0.0008123245115712052, + "clip_ratio/low_mean": 0.0005806810022477293, + "clip_ratio/low_min": 1.0511267646506894e-05, + "clip_ratio/region_mean": 0.001393005524732871, + "epoch": 1.3265306122448979, + "grad_norm": 0.12376153469085693, + "learning_rate": 1e-06, + "loss": 0.0191, + "step": 131 + }, + { + "clip_ratio/high_max": 0.002150004576833453, + "clip_ratio/high_mean": 0.0009839979193202453, + "clip_ratio/low_mean": 0.0006266257241804851, + "clip_ratio/low_min": 3.0013732612133026e-05, + "clip_ratio/region_mean": 0.0016106235707411543, + "epoch": 1.3358600583090379, + "grad_norm": 0.11202864348888397, + "learning_rate": 1e-06, + "loss": -0.0337, + "step": 132 + }, + { + "clip_ratio/high_max": 0.0023354754666797817, + "clip_ratio/high_mean": 0.0010488585885468638, + "clip_ratio/low_mean": 0.0007199145620688796, + "clip_ratio/low_min": 4.286346666049212e-05, + "clip_ratio/region_mean": 0.0017687731706246268, + "epoch": 1.3451895043731779, + "grad_norm": 0.1314229518175125, + "learning_rate": 1e-06, + "loss": -0.0123, + "step": 133 + }, + { + "clip_ratio/high_max": 0.002094859824865125, + "clip_ratio/high_mean": 0.0009514483353996184, + "clip_ratio/low_mean": 0.0009187509349430911, + "clip_ratio/low_min": 7.496398575312924e-05, + "clip_ratio/region_mean": 0.0018701992739806883, + "epoch": 1.3545189504373178, + "grad_norm": 0.13386139273643494, + "learning_rate": 1e-06, + "loss": 0.0007, + "step": 134 + }, + { + "clip_ratio/high_max": 0.0017033482108672615, + "clip_ratio/high_mean": 0.0007622610100952443, + "clip_ratio/low_mean": 0.0007751339417154668, + "clip_ratio/low_min": 8.541487386537483e-05, + "clip_ratio/region_mean": 0.0015373949499917217, + "epoch": 1.3638483965014578, + "grad_norm": 0.1234249547123909, + "learning_rate": 1e-06, + "loss": 0.0476, + "step": 135 + }, + { + "clip_ratio/high_max": 0.0021852707795915194, + "clip_ratio/high_mean": 0.0010210138134425506, + "clip_ratio/low_mean": 0.0007802446561981924, + "clip_ratio/low_min": 2.5228006052202545e-05, + "clip_ratio/region_mean": 0.001801258469640743, + "epoch": 1.3731778425655976, + "grad_norm": 0.12055256217718124, + "learning_rate": 1e-06, + "loss": -0.015, + "step": 136 + }, + { + "clip_ratio/high_max": 0.0022463522109319456, + "clip_ratio/high_mean": 0.0009425262169315829, + "clip_ratio/low_mean": 0.0009136901262536412, + "clip_ratio/low_min": 8.54077770782169e-05, + "clip_ratio/region_mean": 0.001856216367741581, + "epoch": 1.3825072886297376, + "grad_norm": 0.13432520627975464, + "learning_rate": 1e-06, + "loss": 0.0201, + "step": 137 + }, + { + "clip_ratio/high_max": 0.0023435970360878855, + "clip_ratio/high_mean": 0.0009898163880279753, + "clip_ratio/low_mean": 0.0008015044604690047, + "clip_ratio/low_min": 1.6099947970360518e-05, + "clip_ratio/region_mean": 0.0017913208430400118, + "epoch": 1.3918367346938776, + "grad_norm": 0.1121196523308754, + "learning_rate": 1e-06, + "loss": 0.0019, + "step": 138 + }, + { + "clip_ratio/high_max": 0.002259192791825626, + "clip_ratio/high_mean": 0.0009618518561183009, + "clip_ratio/low_mean": 0.0008269448953797109, + "clip_ratio/low_min": 0.00010682068659662036, + "clip_ratio/region_mean": 0.001788796769687906, + "epoch": 1.4011661807580174, + "grad_norm": 0.11160842329263687, + "learning_rate": 1e-06, + "loss": -0.0077, + "step": 139 + }, + { + "clip_ratio/high_max": 0.001967262251127977, + "clip_ratio/high_mean": 0.0009652902335801627, + "clip_ratio/low_mean": 0.000870268569997279, + "clip_ratio/low_min": 7.185075901361415e-05, + "clip_ratio/region_mean": 0.001835558796301484, + "epoch": 1.4104956268221573, + "grad_norm": 0.12078763544559479, + "learning_rate": 1e-06, + "loss": 0.0019, + "step": 140 + }, + { + "clip_ratio/high_max": 0.0021865140297450125, + "clip_ratio/high_mean": 0.0008542850664525758, + "clip_ratio/low_mean": 0.0009709040750749409, + "clip_ratio/low_min": 7.92704704508651e-05, + "clip_ratio/region_mean": 0.0018251891888212413, + "epoch": 1.4198250728862973, + "grad_norm": 0.12320893257856369, + "learning_rate": 1e-06, + "loss": 0.0114, + "step": 141 + }, + { + "clip_ratio/high_max": 0.001995073282159865, + "clip_ratio/high_mean": 0.0009057185598067008, + "clip_ratio/low_mean": 0.0009829190021264367, + "clip_ratio/low_min": 0.00010751451736723538, + "clip_ratio/region_mean": 0.0018886375473812222, + "epoch": 1.4291545189504373, + "grad_norm": 0.1212090477347374, + "learning_rate": 1e-06, + "loss": 0.0207, + "step": 142 + }, + { + "clip_ratio/high_max": 0.002361187922360841, + "clip_ratio/high_mean": 0.000939879784709774, + "clip_ratio/low_mean": 0.0010385727482571383, + "clip_ratio/low_min": 0.00013409857638180256, + "clip_ratio/region_mean": 0.001978452506591566, + "epoch": 1.4384839650145773, + "grad_norm": 0.1244337186217308, + "learning_rate": 1e-06, + "loss": 0.0468, + "step": 143 + }, + { + "clip_ratio/high_max": 0.002408552565611899, + "clip_ratio/high_mean": 0.0010358501604059711, + "clip_ratio/low_mean": 0.0010924177477136254, + "clip_ratio/low_min": 0.00013807321647618664, + "clip_ratio/region_mean": 0.0021282679299474694, + "epoch": 1.4478134110787173, + "grad_norm": 0.1246391236782074, + "learning_rate": 1e-06, + "loss": -0.0085, + "step": 144 + }, + { + "clip_ratio/high_max": 0.0023346201924141496, + "clip_ratio/high_mean": 0.0009564228494127747, + "clip_ratio/low_mean": 0.0009136104872595752, + "clip_ratio/low_min": 7.062887016218156e-05, + "clip_ratio/region_mean": 0.0018700333093875088, + "epoch": 1.457142857142857, + "grad_norm": 0.1140151098370552, + "learning_rate": 1e-06, + "loss": 0.0227, + "step": 145 + }, + { + "clip_ratio/high_max": 0.0025495362642686814, + "clip_ratio/high_mean": 0.0009596319250704255, + "clip_ratio/low_mean": 0.0009141252467088634, + "clip_ratio/low_min": 1.5096617971721571e-05, + "clip_ratio/region_mean": 0.0018737571299425326, + "epoch": 1.466472303206997, + "grad_norm": 0.12359322607517242, + "learning_rate": 1e-06, + "loss": 0.0032, + "step": 146 + }, + { + "clip_ratio/high_max": 0.002412621382973157, + "clip_ratio/high_mean": 0.0009882448630378349, + "clip_ratio/low_mean": 0.0010921952780336142, + "clip_ratio/low_min": 0.0001224439729412552, + "clip_ratio/region_mean": 0.0020804400628549047, + "epoch": 1.475801749271137, + "grad_norm": 0.11045132577419281, + "learning_rate": 1e-06, + "loss": -0.0131, + "step": 147 + }, + { + "clip_ratio/high_max": 0.0023295222272281535, + "clip_ratio/high_mean": 0.0009796738995646592, + "clip_ratio/low_mean": 0.0009771978675416904, + "clip_ratio/low_min": 6.716169991705101e-05, + "clip_ratio/region_mean": 0.0019568717543734238, + "epoch": 1.485131195335277, + "grad_norm": 0.11553800851106644, + "learning_rate": 1e-06, + "loss": 0.0044, + "step": 148 + }, + { + "clip_ratio/high_max": 0.002373781768255867, + "clip_ratio/high_mean": 0.0009506058595434297, + "clip_ratio/low_mean": 0.0011260525534453336, + "clip_ratio/low_min": 0.00017674227183306357, + "clip_ratio/region_mean": 0.0020766584493685514, + "epoch": 1.4944606413994168, + "grad_norm": 0.11694473773241043, + "learning_rate": 1e-06, + "loss": 0.0176, + "step": 149 + }, + { + "clip_ratio/high_max": 0.0024190633703256026, + "clip_ratio/high_mean": 0.0009742487691255519, + "clip_ratio/low_mean": 0.0010879485234909225, + "clip_ratio/low_min": 0.0001114521337512997, + "clip_ratio/region_mean": 0.0020621973380912095, + "epoch": 1.5037900874635568, + "grad_norm": 0.11433441936969757, + "learning_rate": 1e-06, + "loss": -0.008, + "step": 150 + }, + { + "clip_ratio/high_max": 0.0023994471193873324, + "clip_ratio/high_mean": 0.0010328274074709043, + "clip_ratio/low_mean": 0.0011116435671283398, + "clip_ratio/low_min": 7.758178071526345e-05, + "clip_ratio/region_mean": 0.0021444709709612653, + "epoch": 1.5131195335276968, + "grad_norm": 0.11905031651258469, + "learning_rate": 1e-06, + "loss": -0.0183, + "step": 151 + }, + { + "clip_ratio/high_max": 0.002360008511459455, + "clip_ratio/high_mean": 0.001005455069389427, + "clip_ratio/low_mean": 0.0013339525867195334, + "clip_ratio/low_min": 0.0002080884496535873, + "clip_ratio/region_mean": 0.0023394076415570453, + "epoch": 1.5224489795918368, + "grad_norm": 0.1792532503604889, + "learning_rate": 1e-06, + "loss": 0.003, + "step": 152 + }, + { + "clip_ratio/high_max": 0.002323104050447, + "clip_ratio/high_mean": 0.0010053261012217263, + "clip_ratio/low_mean": 0.0010526574060349958, + "clip_ratio/low_min": 5.6324959587072954e-05, + "clip_ratio/region_mean": 0.002057983489066828, + "epoch": 1.5317784256559768, + "grad_norm": 0.12376930564641953, + "learning_rate": 1e-06, + "loss": 0.0337, + "step": 153 + }, + { + "clip_ratio/high_max": 0.002311307114723604, + "clip_ratio/high_mean": 0.0009813068609219044, + "clip_ratio/low_mean": 0.0011149177771585528, + "clip_ratio/low_min": 0.00012089210304111475, + "clip_ratio/region_mean": 0.002096224641718436, + "epoch": 1.5411078717201168, + "grad_norm": 0.12247707694768906, + "learning_rate": 1e-06, + "loss": 0.0177, + "step": 154 + }, + { + "clip_ratio/high_max": 0.0023144585284171626, + "clip_ratio/high_mean": 0.0010975206787406933, + "clip_ratio/low_mean": 0.001081908972992096, + "clip_ratio/low_min": 0.00012427501314959954, + "clip_ratio/region_mean": 0.0021794296917505562, + "epoch": 1.5504373177842565, + "grad_norm": 0.1175316721200943, + "learning_rate": 1e-06, + "loss": -0.0327, + "step": 155 + }, + { + "clip_ratio/high_max": 0.0021606523951049894, + "clip_ratio/high_mean": 0.0009900249970087316, + "clip_ratio/low_mean": 0.001106171705032466, + "clip_ratio/low_min": 8.075089408521308e-05, + "clip_ratio/region_mean": 0.002096196672937367, + "epoch": 1.5597667638483965, + "grad_norm": 0.12338127195835114, + "learning_rate": 1e-06, + "loss": -0.0043, + "step": 156 + }, + { + "clip_ratio/high_max": 0.0019886453155777417, + "clip_ratio/high_mean": 0.0008289978686661925, + "clip_ratio/low_mean": 0.0014164040258037858, + "clip_ratio/low_min": 0.00022766150232200744, + "clip_ratio/region_mean": 0.002245401898107957, + "epoch": 1.5690962099125363, + "grad_norm": 0.12142599374055862, + "learning_rate": 1e-06, + "loss": 0.0823, + "step": 157 + }, + { + "clip_ratio/high_max": 0.002212329964095261, + "clip_ratio/high_mean": 0.0010394248220109148, + "clip_ratio/low_mean": 0.0010776175986393355, + "clip_ratio/low_min": 4.852474376093596e-05, + "clip_ratio/region_mean": 0.002117042415193282, + "epoch": 1.5784256559766763, + "grad_norm": 0.10937978327274323, + "learning_rate": 1e-06, + "loss": -0.0099, + "step": 158 + }, + { + "clip_ratio/high_max": 0.0022363809766829945, + "clip_ratio/high_mean": 0.0009165700976154767, + "clip_ratio/low_mean": 0.001409736189089017, + "clip_ratio/low_min": 0.00010802586075442377, + "clip_ratio/region_mean": 0.002326306239410769, + "epoch": 1.5877551020408163, + "grad_norm": 0.12906044721603394, + "learning_rate": 1e-06, + "loss": 0.0243, + "step": 159 + }, + { + "clip_ratio/high_max": 0.0022373147803591564, + "clip_ratio/high_mean": 0.0009547759400447831, + "clip_ratio/low_mean": 0.001126741235566442, + "clip_ratio/low_min": 5.19391751367948e-05, + "clip_ratio/region_mean": 0.0020815171737922356, + "epoch": 1.5970845481049563, + "grad_norm": 0.11226746439933777, + "learning_rate": 1e-06, + "loss": -0.0214, + "step": 160 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0193568638392857, + "completions/max_length": 4096.0, + "completions/max_terminated_length": 4089.0, + "completions/mean_length": 630.4431762695312, + "completions/mean_terminated_length": 562.0366821289062, + "completions/min_length": 3.0, + "completions/min_terminated_length": 3.0, + "epoch": 1.6064139941690962, + "grad_norm": 0.13052918016910553, + "learning_rate": 1e-06, + "loss": 0.0433, + "num_tokens": 111551057.0, + "reward": 0.559884250164032, + "reward_std": 0.2066090852022171, + "rewards/simpleverify_reward/mean": 0.5598841905593872, + "rewards/simpleverify_reward/std": 0.4964096248149872, + "step": 161 + }, + { + "clip_ratio/high_max": 0.0019473583088256419, + "clip_ratio/high_mean": 0.0007786882524669636, + "clip_ratio/low_mean": 0.0006195795758685563, + "clip_ratio/low_min": 3.413532340346137e-05, + "clip_ratio/region_mean": 0.0013982678392494563, + "epoch": 1.6157434402332362, + "grad_norm": 0.11692752689123154, + "learning_rate": 1e-06, + "loss": 0.0253, + "step": 162 + }, + { + "clip_ratio/high_max": 0.001971633617358748, + "clip_ratio/high_mean": 0.0008421267611993244, + "clip_ratio/low_mean": 0.000644899269900634, + "clip_ratio/low_min": 3.6869254472549073e-05, + "clip_ratio/region_mean": 0.0014870260274619795, + "epoch": 1.6250728862973762, + "grad_norm": 0.10882455110549927, + "learning_rate": 1e-06, + "loss": 0.0236, + "step": 163 + }, + { + "clip_ratio/high_max": 0.0020929321253788657, + "clip_ratio/high_mean": 0.0009205508504237514, + "clip_ratio/low_mean": 0.0005239095698925667, + "clip_ratio/low_min": 8.321128916577436e-06, + "clip_ratio/region_mean": 0.0014444604312302545, + "epoch": 1.634402332361516, + "grad_norm": 0.11553318798542023, + "learning_rate": 1e-06, + "loss": -0.0412, + "step": 164 + }, + { + "clip_ratio/high_max": 0.00216324757275288, + "clip_ratio/high_mean": 0.0009188975582219427, + "clip_ratio/low_mean": 0.0006150362187327119, + "clip_ratio/low_min": 8.641618114779703e-05, + "clip_ratio/region_mean": 0.0015339337915065698, + "epoch": 1.643731778425656, + "grad_norm": 0.1298108547925949, + "learning_rate": 1e-06, + "loss": 0.0091, + "step": 165 + }, + { + "clip_ratio/high_max": 0.0020830659559578635, + "clip_ratio/high_mean": 0.0008959771093941526, + "clip_ratio/low_mean": 0.0008042422614380484, + "clip_ratio/low_min": 0.00014302170984592522, + "clip_ratio/region_mean": 0.001700219352642307, + "epoch": 1.6530612244897958, + "grad_norm": 0.12637987732887268, + "learning_rate": 1e-06, + "loss": 0.0232, + "step": 166 + }, + { + "clip_ratio/high_max": 0.00219865715916967, + "clip_ratio/high_mean": 0.0009272287534258794, + "clip_ratio/low_mean": 0.0006882043526275083, + "clip_ratio/low_min": 2.6838769372261595e-05, + "clip_ratio/region_mean": 0.0016154331242432818, + "epoch": 1.6623906705539357, + "grad_norm": 0.14283263683319092, + "learning_rate": 1e-06, + "loss": 0.0111, + "step": 167 + }, + { + "clip_ratio/high_max": 0.0024597456213086843, + "clip_ratio/high_mean": 0.0010091853146150243, + "clip_ratio/low_mean": 0.0007544972431787755, + "clip_ratio/low_min": 2.8480291803134605e-05, + "clip_ratio/region_mean": 0.0017636826160014607, + "epoch": 1.6717201166180757, + "grad_norm": 0.11524340510368347, + "learning_rate": 1e-06, + "loss": 0.01, + "step": 168 + }, + { + "clip_ratio/high_max": 0.002631057708640583, + "clip_ratio/high_mean": 0.001049726277415175, + "clip_ratio/low_mean": 0.0007909286287031136, + "clip_ratio/low_min": 9.947924900188809e-05, + "clip_ratio/region_mean": 0.001840654898842331, + "epoch": 1.6810495626822157, + "grad_norm": 0.11886724829673767, + "learning_rate": 1e-06, + "loss": -0.0221, + "step": 169 + }, + { + "clip_ratio/high_max": 0.0023531218539574184, + "clip_ratio/high_mean": 0.0008936539852584247, + "clip_ratio/low_mean": 0.0009439257082703989, + "clip_ratio/low_min": 0.00010847598605323583, + "clip_ratio/region_mean": 0.0018375797080807388, + "epoch": 1.6903790087463557, + "grad_norm": 0.1280161738395691, + "learning_rate": 1e-06, + "loss": 0.0174, + "step": 170 + }, + { + "clip_ratio/high_max": 0.0024234568045358174, + "clip_ratio/high_mean": 0.0009716022650536615, + "clip_ratio/low_mean": 0.0008536897275916999, + "clip_ratio/low_min": 7.567417560494505e-05, + "clip_ratio/region_mean": 0.0018252919617225416, + "epoch": 1.6997084548104957, + "grad_norm": 0.11074575036764145, + "learning_rate": 1e-06, + "loss": 0.0065, + "step": 171 + }, + { + "clip_ratio/high_max": 0.002372522772930097, + "clip_ratio/high_mean": 0.000934289766519214, + "clip_ratio/low_mean": 0.0010841503899428062, + "clip_ratio/low_min": 0.000123855254969385, + "clip_ratio/region_mean": 0.002018440172832925, + "epoch": 1.7090379008746357, + "grad_norm": 0.11330542713403702, + "learning_rate": 1e-06, + "loss": 0.0198, + "step": 172 + }, + { + "clip_ratio/high_max": 0.002437242215819424, + "clip_ratio/high_mean": 0.0010677119189494988, + "clip_ratio/low_mean": 0.0009012334085127804, + "clip_ratio/low_min": 0.00010626952553138835, + "clip_ratio/region_mean": 0.0019689453547471203, + "epoch": 1.7183673469387755, + "grad_norm": 0.12956194579601288, + "learning_rate": 1e-06, + "loss": -0.0177, + "step": 173 + }, + { + "clip_ratio/high_max": 0.002794365653244313, + "clip_ratio/high_mean": 0.001146391859947471, + "clip_ratio/low_mean": 0.0009963155862351414, + "clip_ratio/low_min": 0.00014578002082998864, + "clip_ratio/region_mean": 0.002142707380698994, + "epoch": 1.7276967930029155, + "grad_norm": 0.1224517673254013, + "learning_rate": 1e-06, + "loss": -0.005, + "step": 174 + }, + { + "clip_ratio/high_max": 0.0023935651261126623, + "clip_ratio/high_mean": 0.0010778728283185046, + "clip_ratio/low_mean": 0.0008526510118826991, + "clip_ratio/low_min": 4.828855071536964e-05, + "clip_ratio/region_mean": 0.0019305238674860448, + "epoch": 1.7370262390670554, + "grad_norm": 0.11898007243871689, + "learning_rate": 1e-06, + "loss": -0.0189, + "step": 175 + }, + { + "clip_ratio/high_max": 0.0022502077554236166, + "clip_ratio/high_mean": 0.0009356172595289536, + "clip_ratio/low_mean": 0.000968181702774018, + "clip_ratio/low_min": 0.00015928049015201395, + "clip_ratio/region_mean": 0.0019037989986827597, + "epoch": 1.7463556851311952, + "grad_norm": 0.13075068593025208, + "learning_rate": 1e-06, + "loss": -0.0001, + "step": 176 + }, + { + "clip_ratio/high_max": 0.002857508196029812, + "clip_ratio/high_mean": 0.0010823418851941824, + "clip_ratio/low_mean": 0.0009438116758246906, + "clip_ratio/low_min": 7.394163912977092e-05, + "clip_ratio/region_mean": 0.002026153582846746, + "epoch": 1.7556851311953352, + "grad_norm": 0.1229390874505043, + "learning_rate": 1e-06, + "loss": -0.0202, + "step": 177 + }, + { + "clip_ratio/high_max": 0.002277478364703711, + "clip_ratio/high_mean": 0.000946376841966412, + "clip_ratio/low_mean": 0.0009406296576344175, + "clip_ratio/low_min": 8.676519428263418e-05, + "clip_ratio/region_mean": 0.0018870064959628507, + "epoch": 1.7650145772594752, + "grad_norm": 0.13334843516349792, + "learning_rate": 1e-06, + "loss": 0.0081, + "step": 178 + }, + { + "clip_ratio/high_max": 0.002178639391786419, + "clip_ratio/high_mean": 0.0009134401789197, + "clip_ratio/low_mean": 0.0009079107312572887, + "clip_ratio/low_min": 0.0001339919417659985, + "clip_ratio/region_mean": 0.0018213508883491158, + "epoch": 1.7743440233236152, + "grad_norm": 0.11784674227237701, + "learning_rate": 1e-06, + "loss": 0.0013, + "step": 179 + }, + { + "clip_ratio/high_max": 0.0022050546249374747, + "clip_ratio/high_mean": 0.0009802271133594331, + "clip_ratio/low_mean": 0.0009644347010180354, + "clip_ratio/low_min": 5.919880186411319e-05, + "clip_ratio/region_mean": 0.001944661773450207, + "epoch": 1.7836734693877552, + "grad_norm": 0.12492590397596359, + "learning_rate": 1e-06, + "loss": -0.0113, + "step": 180 + }, + { + "clip_ratio/high_max": 0.002152851040591486, + "clip_ratio/high_mean": 0.0009666321675467771, + "clip_ratio/low_mean": 0.001131426943175029, + "clip_ratio/low_min": 7.19606687198393e-05, + "clip_ratio/region_mean": 0.0020980590998078696, + "epoch": 1.7930029154518952, + "grad_norm": 0.12422338128089905, + "learning_rate": 1e-06, + "loss": 0.0284, + "step": 181 + }, + { + "clip_ratio/high_max": 0.0021346145804272965, + "clip_ratio/high_mean": 0.0010018986722570844, + "clip_ratio/low_mean": 0.0009776542992767645, + "clip_ratio/low_min": 8.275802974822e-05, + "clip_ratio/region_mean": 0.0019795529296970926, + "epoch": 1.802332361516035, + "grad_norm": 0.12853233516216278, + "learning_rate": 1e-06, + "loss": -0.0186, + "step": 182 + }, + { + "clip_ratio/high_max": 0.002379450190346688, + "clip_ratio/high_mean": 0.0010064417983812746, + "clip_ratio/low_mean": 0.0010600036548567005, + "clip_ratio/low_min": 7.705206371610984e-05, + "clip_ratio/region_mean": 0.0020664455150836147, + "epoch": 1.811661807580175, + "grad_norm": 0.12457560747861862, + "learning_rate": 1e-06, + "loss": 0.0193, + "step": 183 + }, + { + "clip_ratio/high_max": 0.0025077843893086538, + "clip_ratio/high_mean": 0.0011306777996651363, + "clip_ratio/low_mean": 0.001088579920178745, + "clip_ratio/low_min": 8.311457168019842e-05, + "clip_ratio/region_mean": 0.0022192577525856905, + "epoch": 1.820991253644315, + "grad_norm": 0.11980686336755753, + "learning_rate": 1e-06, + "loss": -0.021, + "step": 184 + }, + { + "clip_ratio/high_max": 0.002225047763204202, + "clip_ratio/high_mean": 0.0008986193679447751, + "clip_ratio/low_mean": 0.0010928749852610053, + "clip_ratio/low_min": 0.00013986617159389425, + "clip_ratio/region_mean": 0.001991494413232431, + "epoch": 1.8303206997084547, + "grad_norm": 0.1230274960398674, + "learning_rate": 1e-06, + "loss": 0.0392, + "step": 185 + }, + { + "clip_ratio/high_max": 0.002219788177171722, + "clip_ratio/high_mean": 0.0009121713774220552, + "clip_ratio/low_mean": 0.0012891650330857374, + "clip_ratio/low_min": 0.00010991991166520165, + "clip_ratio/region_mean": 0.0022013364141457714, + "epoch": 1.8396501457725947, + "grad_norm": 0.1255647838115692, + "learning_rate": 1e-06, + "loss": 0.0514, + "step": 186 + }, + { + "clip_ratio/high_max": 0.0024172653138521127, + "clip_ratio/high_mean": 0.001157182727183681, + "clip_ratio/low_mean": 0.0009896736228256486, + "clip_ratio/low_min": 3.2526670111110434e-05, + "clip_ratio/region_mean": 0.0021468563718372025, + "epoch": 1.8489795918367347, + "grad_norm": 0.14806382358074188, + "learning_rate": 1e-06, + "loss": -0.0618, + "step": 187 + }, + { + "clip_ratio/high_max": 0.00214373854760197, + "clip_ratio/high_mean": 0.0009269417587347561, + "clip_ratio/low_mean": 0.001136309394496493, + "clip_ratio/low_min": 0.0001882109063444659, + "clip_ratio/region_mean": 0.00206325115141226, + "epoch": 1.8583090379008746, + "grad_norm": 0.1317405104637146, + "learning_rate": 1e-06, + "loss": 0.0117, + "step": 188 + }, + { + "clip_ratio/high_max": 0.0022073624495533295, + "clip_ratio/high_mean": 0.000885869047124288, + "clip_ratio/low_mean": 0.0012851926330768038, + "clip_ratio/low_min": 0.00021439420379465446, + "clip_ratio/region_mean": 0.0021710616783821024, + "epoch": 1.8676384839650146, + "grad_norm": 0.12194634228944778, + "learning_rate": 1e-06, + "loss": 0.0226, + "step": 189 + }, + { + "clip_ratio/high_max": 0.002016449892835226, + "clip_ratio/high_mean": 0.000939275791097316, + "clip_ratio/low_mean": 0.0010900188663072186, + "clip_ratio/low_min": 0.00014371005454449914, + "clip_ratio/region_mean": 0.0020292946574045345, + "epoch": 1.8769679300291546, + "grad_norm": 0.1152808740735054, + "learning_rate": 1e-06, + "loss": -0.0167, + "step": 190 + }, + { + "clip_ratio/high_max": 0.002209578982728999, + "clip_ratio/high_mean": 0.0010171312333113747, + "clip_ratio/low_mean": 0.001362909661111189, + "clip_ratio/low_min": 0.00021611467036564136, + "clip_ratio/region_mean": 0.002380040859861765, + "epoch": 1.8862973760932946, + "grad_norm": 0.13314864039421082, + "learning_rate": 1e-06, + "loss": 0.0363, + "step": 191 + }, + { + "clip_ratio/high_max": 0.002335728539037518, + "clip_ratio/high_mean": 0.001085810299628065, + "clip_ratio/low_mean": 0.0011010053312929813, + "clip_ratio/low_min": 0.00011865987471537665, + "clip_ratio/region_mean": 0.0021868156763957813, + "epoch": 1.8956268221574344, + "grad_norm": 0.12692096829414368, + "learning_rate": 1e-06, + "loss": -0.027, + "step": 192 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.019810267857142905, + "completions/max_length": 4096.0, + "completions/max_terminated_length": 4025.0, + "completions/mean_length": 630.1661376953125, + "completions/mean_terminated_length": 560.119384765625, + "completions/min_length": 4.0, + "completions/min_terminated_length": 4.0, + "epoch": 2.00932944606414, + "grad_norm": 0.1092682033777237, + "learning_rate": 1e-06, + "loss": -0.0044, + "num_tokens": 130140299.0, + "reward": 0.579659640789032, + "reward_std": 0.19633613526821136, + "rewards/simpleverify_reward/mean": 0.5796595811843872, + "rewards/simpleverify_reward/std": 0.49362218379974365, + "step": 193 + }, + { + "clip_ratio/high_max": 0.0020895358466077596, + "clip_ratio/high_mean": 0.0008441342015430564, + "clip_ratio/low_mean": 0.0006324653386400314, + "clip_ratio/low_min": 3.0290034374047536e-05, + "clip_ratio/region_mean": 0.0014765995219931938, + "epoch": 2.01865889212828, + "grad_norm": 0.1230829507112503, + "learning_rate": 1e-06, + "loss": -0.0328, + "step": 194 + }, + { + "clip_ratio/high_max": 0.00207390798459528, + "clip_ratio/high_mean": 0.0008967022058641305, + "clip_ratio/low_mean": 0.0005146934408912784, + "clip_ratio/low_min": 1.3510591998056043e-05, + "clip_ratio/region_mean": 0.001411395671311766, + "epoch": 2.02798833819242, + "grad_norm": 0.1321178525686264, + "learning_rate": 1e-06, + "loss": -0.0412, + "step": 195 + }, + { + "clip_ratio/high_max": 0.0021295985134202056, + "clip_ratio/high_mean": 0.0009101082832785323, + "clip_ratio/low_mean": 0.0006119706013123505, + "clip_ratio/low_min": 3.116128482361091e-05, + "clip_ratio/region_mean": 0.0015220788918668404, + "epoch": 2.03731778425656, + "grad_norm": 0.13616017997264862, + "learning_rate": 1e-06, + "loss": 0.032, + "step": 196 + }, + { + "clip_ratio/high_max": 0.002133457033778541, + "clip_ratio/high_mean": 0.0008904576734494185, + "clip_ratio/low_mean": 0.0007426326465065358, + "clip_ratio/low_min": 5.287952535582008e-05, + "clip_ratio/region_mean": 0.0016330903490597848, + "epoch": 2.0466472303206995, + "grad_norm": 0.12524496018886566, + "learning_rate": 1e-06, + "loss": -0.0134, + "step": 197 + }, + { + "clip_ratio/high_max": 0.0019300127423775848, + "clip_ratio/high_mean": 0.0008277610777440714, + "clip_ratio/low_mean": 0.0005185083236938226, + "clip_ratio/low_min": 2.8496213417383842e-05, + "clip_ratio/region_mean": 0.0013462693896144629, + "epoch": 2.0559766763848395, + "grad_norm": 0.10855976492166519, + "learning_rate": 1e-06, + "loss": -0.0366, + "step": 198 + }, + { + "clip_ratio/high_max": 0.002347043984627817, + "clip_ratio/high_mean": 0.0009639352228987264, + "clip_ratio/low_mean": 0.0006995748717599781, + "clip_ratio/low_min": 9.55956120378687e-05, + "clip_ratio/region_mean": 0.0016635101201245561, + "epoch": 2.0653061224489795, + "grad_norm": 0.12525874376296997, + "learning_rate": 1e-06, + "loss": 0.0054, + "step": 199 + }, + { + "clip_ratio/high_max": 0.0022166262278915383, + "clip_ratio/high_mean": 0.0008933486005844316, + "clip_ratio/low_mean": 0.0008267901648650877, + "clip_ratio/low_min": 2.9530114261433482e-05, + "clip_ratio/region_mean": 0.0017201387527165934, + "epoch": 2.0746355685131195, + "grad_norm": 0.13545803725719452, + "learning_rate": 1e-06, + "loss": 0.0656, + "step": 200 + }, + { + "clip_ratio/high_max": 0.0021380551261245273, + "clip_ratio/high_mean": 0.0008750795986998128, + "clip_ratio/low_mean": 0.0007241222647280665, + "clip_ratio/low_min": 7.284074399649398e-05, + "clip_ratio/region_mean": 0.0015992018743418157, + "epoch": 2.0839650145772595, + "grad_norm": 0.13121257722377777, + "learning_rate": 1e-06, + "loss": 0.0272, + "step": 201 + }, + { + "clip_ratio/high_max": 0.0018240930658066645, + "clip_ratio/high_mean": 0.0008700213911652099, + "clip_ratio/low_mean": 0.0008648789225844666, + "clip_ratio/low_min": 5.121918093209388e-05, + "clip_ratio/region_mean": 0.001734900288283825, + "epoch": 2.0932944606413995, + "grad_norm": 0.12381664663553238, + "learning_rate": 1e-06, + "loss": 0.025, + "step": 202 + }, + { + "clip_ratio/high_max": 0.0019286003152956255, + "clip_ratio/high_mean": 0.0008171215304173529, + "clip_ratio/low_mean": 0.0007734704777249135, + "clip_ratio/low_min": 6.245358144951751e-05, + "clip_ratio/region_mean": 0.0015905919462966267, + "epoch": 2.1026239067055394, + "grad_norm": 0.11860410124063492, + "learning_rate": 1e-06, + "loss": -0.0229, + "step": 203 + }, + { + "clip_ratio/high_max": 0.002014702717133332, + "clip_ratio/high_mean": 0.0008024648914215504, + "clip_ratio/low_mean": 0.0009128984147537267, + "clip_ratio/low_min": 5.4807868764328305e-05, + "clip_ratio/region_mean": 0.0017153633452835493, + "epoch": 2.1119533527696794, + "grad_norm": 0.11724842339754105, + "learning_rate": 1e-06, + "loss": 0.0373, + "step": 204 + }, + { + "clip_ratio/high_max": 0.002323144584806869, + "clip_ratio/high_mean": 0.0009170987123070518, + "clip_ratio/low_mean": 0.0008524646545993164, + "clip_ratio/low_min": 5.6268603657372296e-05, + "clip_ratio/region_mean": 0.0017695634087431245, + "epoch": 2.1212827988338194, + "grad_norm": 0.10747351497411728, + "learning_rate": 1e-06, + "loss": -0.0159, + "step": 205 + }, + { + "clip_ratio/high_max": 0.002367728498938959, + "clip_ratio/high_mean": 0.0009401266124768881, + "clip_ratio/low_mean": 0.0009918116229528096, + "clip_ratio/low_min": 6.329270854621427e-05, + "clip_ratio/region_mean": 0.0019319382408866659, + "epoch": 2.130612244897959, + "grad_norm": 0.11633989214897156, + "learning_rate": 1e-06, + "loss": 0.0026, + "step": 206 + }, + { + "clip_ratio/high_max": 0.002109798457240686, + "clip_ratio/high_mean": 0.0008825089917081641, + "clip_ratio/low_mean": 0.0009929372818078264, + "clip_ratio/low_min": 5.883443282073131e-05, + "clip_ratio/region_mean": 0.0018754462616925593, + "epoch": 2.139941690962099, + "grad_norm": 0.12492082267999649, + "learning_rate": 1e-06, + "loss": -0.0004, + "step": 207 + }, + { + "clip_ratio/high_max": 0.0025152976595563814, + "clip_ratio/high_mean": 0.0010189184904447757, + "clip_ratio/low_mean": 0.0008982785839179996, + "clip_ratio/low_min": 7.195130820036866e-05, + "clip_ratio/region_mean": 0.0019171971071045846, + "epoch": 2.149271137026239, + "grad_norm": 0.1098606139421463, + "learning_rate": 1e-06, + "loss": -0.0135, + "step": 208 + }, + { + "clip_ratio/high_max": 0.0022413926890294533, + "clip_ratio/high_mean": 0.000899045386177022, + "clip_ratio/low_mean": 0.000838117346575018, + "clip_ratio/low_min": 0.0001263984631805215, + "clip_ratio/region_mean": 0.0017371627400279976, + "epoch": 2.158600583090379, + "grad_norm": 0.11806578189134598, + "learning_rate": 1e-06, + "loss": -0.03, + "step": 209 + }, + { + "clip_ratio/high_max": 0.0021019109262852, + "clip_ratio/high_mean": 0.0008178860243788222, + "clip_ratio/low_mean": 0.0009143050228885841, + "clip_ratio/low_min": 3.181555985065643e-05, + "clip_ratio/region_mean": 0.0017321909981546924, + "epoch": 2.167930029154519, + "grad_norm": 0.11653382331132889, + "learning_rate": 1e-06, + "loss": 0.0303, + "step": 210 + }, + { + "clip_ratio/high_max": 0.0023580261040478945, + "clip_ratio/high_mean": 0.0009531390278425533, + "clip_ratio/low_mean": 0.00113502951717237, + "clip_ratio/low_min": 8.734521907172166e-05, + "clip_ratio/region_mean": 0.00208816855592886, + "epoch": 2.177259475218659, + "grad_norm": 0.1219073086977005, + "learning_rate": 1e-06, + "loss": 0.0192, + "step": 211 + }, + { + "clip_ratio/high_max": 0.002315523350262083, + "clip_ratio/high_mean": 0.0009459816174057778, + "clip_ratio/low_mean": 0.0010800387535709888, + "clip_ratio/low_min": 0.00014688047667732462, + "clip_ratio/region_mean": 0.002026020381890703, + "epoch": 2.186588921282799, + "grad_norm": 0.127249076962471, + "learning_rate": 1e-06, + "loss": 0.0073, + "step": 212 + }, + { + "clip_ratio/high_max": 0.0023402224978781305, + "clip_ratio/high_mean": 0.0009386223282490391, + "clip_ratio/low_mean": 0.0011775325474445708, + "clip_ratio/low_min": 7.335049667744897e-05, + "clip_ratio/region_mean": 0.0021161549011594616, + "epoch": 2.195918367346939, + "grad_norm": 0.11784949153661728, + "learning_rate": 1e-06, + "loss": -0.0055, + "step": 213 + }, + { + "clip_ratio/high_max": 0.0022509518203150947, + "clip_ratio/high_mean": 0.0010360320047766436, + "clip_ratio/low_mean": 0.0010909208776865853, + "clip_ratio/low_min": 9.097713518713135e-05, + "clip_ratio/region_mean": 0.002126952924299985, + "epoch": 2.205247813411079, + "grad_norm": 0.12944884598255157, + "learning_rate": 1e-06, + "loss": 0.0245, + "step": 214 + }, + { + "clip_ratio/high_max": 0.002420110467937775, + "clip_ratio/high_mean": 0.0009371957312396262, + "clip_ratio/low_mean": 0.0010524861718295142, + "clip_ratio/low_min": 9.543488522467669e-05, + "clip_ratio/region_mean": 0.0019896818994311616, + "epoch": 2.2145772594752184, + "grad_norm": 0.13038748502731323, + "learning_rate": 1e-06, + "loss": -0.0012, + "step": 215 + }, + { + "clip_ratio/high_max": 0.002297401319083292, + "clip_ratio/high_mean": 0.0009000710924738087, + "clip_ratio/low_mean": 0.0010327846011932706, + "clip_ratio/low_min": 8.079760846158024e-05, + "clip_ratio/region_mean": 0.0019328556300024502, + "epoch": 2.2239067055393584, + "grad_norm": 0.11851400882005692, + "learning_rate": 1e-06, + "loss": 0.0023, + "step": 216 + }, + { + "clip_ratio/high_max": 0.00250308339309413, + "clip_ratio/high_mean": 0.0009396381992701208, + "clip_ratio/low_mean": 0.0010309597673767712, + "clip_ratio/low_min": 3.620628922362812e-05, + "clip_ratio/region_mean": 0.0019705979866557755, + "epoch": 2.2332361516034984, + "grad_norm": 0.1116507425904274, + "learning_rate": 1e-06, + "loss": 0.0264, + "step": 217 + }, + { + "clip_ratio/high_max": 0.0023662418461753987, + "clip_ratio/high_mean": 0.0010013960363721708, + "clip_ratio/low_mean": 0.0010428183886688203, + "clip_ratio/low_min": 2.7021183996112086e-05, + "clip_ratio/region_mean": 0.0020442144668777473, + "epoch": 2.2425655976676384, + "grad_norm": 0.12043320387601852, + "learning_rate": 1e-06, + "loss": 0.0043, + "step": 218 + }, + { + "clip_ratio/high_max": 0.002208399157098029, + "clip_ratio/high_mean": 0.0009531907053315081, + "clip_ratio/low_mean": 0.0010326984884159174, + "clip_ratio/low_min": 4.3137135435245e-05, + "clip_ratio/region_mean": 0.0019858891901094466, + "epoch": 2.2518950437317784, + "grad_norm": 0.11767147481441498, + "learning_rate": 1e-06, + "loss": -0.0127, + "step": 219 + }, + { + "clip_ratio/high_max": 0.0023339143881457858, + "clip_ratio/high_mean": 0.0009989120135287521, + "clip_ratio/low_mean": 0.0012673940327658784, + "clip_ratio/low_min": 8.345290279976325e-05, + "clip_ratio/region_mean": 0.002266306029923726, + "epoch": 2.2612244897959184, + "grad_norm": 0.13508707284927368, + "learning_rate": 1e-06, + "loss": -0.0055, + "step": 220 + }, + { + "clip_ratio/high_max": 0.0024647469690535218, + "clip_ratio/high_mean": 0.0010625124850776047, + "clip_ratio/low_mean": 0.0010962694032059517, + "clip_ratio/low_min": 0.0001967881407836103, + "clip_ratio/region_mean": 0.002158781891921535, + "epoch": 2.2705539358600584, + "grad_norm": 0.13242405652999878, + "learning_rate": 1e-06, + "loss": 0.0069, + "step": 221 + }, + { + "clip_ratio/high_max": 0.002287006995175034, + "clip_ratio/high_mean": 0.0010311212790838908, + "clip_ratio/low_mean": 0.0010112026575370692, + "clip_ratio/low_min": 7.01167264196556e-05, + "clip_ratio/region_mean": 0.002042323954810854, + "epoch": 2.2798833819241984, + "grad_norm": 0.11984199285507202, + "learning_rate": 1e-06, + "loss": -0.0283, + "step": 222 + }, + { + "clip_ratio/high_max": 0.002268641379487235, + "clip_ratio/high_mean": 0.0009501121567154769, + "clip_ratio/low_mean": 0.0011279360769549385, + "clip_ratio/low_min": 9.394236622028984e-05, + "clip_ratio/region_mean": 0.0020780483100679703, + "epoch": 2.2892128279883384, + "grad_norm": 0.11281422525644302, + "learning_rate": 1e-06, + "loss": -0.0141, + "step": 223 + }, + { + "clip_ratio/high_max": 0.0022647486330242828, + "clip_ratio/high_mean": 0.0010632389967213385, + "clip_ratio/low_mean": 0.0011535417233972112, + "clip_ratio/low_min": 5.966096432530321e-05, + "clip_ratio/region_mean": 0.0022167806964716874, + "epoch": 2.298542274052478, + "grad_norm": 0.12871526181697845, + "learning_rate": 1e-06, + "loss": 0.0013, + "step": 224 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.019566127232142905, + "completions/max_length": 4096.0, + "completions/max_terminated_length": 4087.0, + "completions/mean_length": 630.736328125, + "completions/mean_terminated_length": 561.5814208984375, + "completions/min_length": 3.0, + "completions/min_terminated_length": 3.0, + "epoch": 2.307871720116618, + "grad_norm": 0.13329946994781494, + "learning_rate": 1e-06, + "loss": -0.0225, + "num_tokens": 148803354.0, + "reward": 0.5794503688812256, + "reward_std": 0.19710879027843475, + "rewards/simpleverify_reward/mean": 0.5794503092765808, + "rewards/simpleverify_reward/std": 0.49365589022636414, + "step": 225 + }, + { + "clip_ratio/high_max": 0.001931429149408359, + "clip_ratio/high_mean": 0.0008306541731144534, + "clip_ratio/low_mean": 0.0006265867032198003, + "clip_ratio/low_min": 3.5744412343774457e-05, + "clip_ratio/region_mean": 0.0014572408763342537, + "epoch": 2.317201166180758, + "grad_norm": 0.12135004252195358, + "learning_rate": 1e-06, + "loss": 0.0056, + "step": 226 + }, + { + "clip_ratio/high_max": 0.0016768822570156772, + "clip_ratio/high_mean": 0.0008354935489478521, + "clip_ratio/low_mean": 0.0005116177226227592, + "clip_ratio/low_min": 1.498441633884795e-05, + "clip_ratio/region_mean": 0.0013471112761180848, + "epoch": 2.326530612244898, + "grad_norm": 0.11290162801742554, + "learning_rate": 1e-06, + "loss": 0.0062, + "step": 227 + }, + { + "clip_ratio/high_max": 0.0017869251059892122, + "clip_ratio/high_mean": 0.0008068411007116083, + "clip_ratio/low_mean": 0.0006611987846554257, + "clip_ratio/low_min": 4.146084211242851e-05, + "clip_ratio/region_mean": 0.0014680398926429916, + "epoch": 2.335860058309038, + "grad_norm": 0.13063408434391022, + "learning_rate": 1e-06, + "loss": 0.0431, + "step": 228 + }, + { + "clip_ratio/high_max": 0.0022113891027402133, + "clip_ratio/high_mean": 0.0008729533656151034, + "clip_ratio/low_mean": 0.0006115404485171894, + "clip_ratio/low_min": 2.9467566491803154e-05, + "clip_ratio/region_mean": 0.0014844938414171338, + "epoch": 2.345189504373178, + "grad_norm": 0.13261906802654266, + "learning_rate": 1e-06, + "loss": -0.0004, + "step": 229 + }, + { + "clip_ratio/high_max": 0.0024802484767860733, + "clip_ratio/high_mean": 0.0010233752946078312, + "clip_ratio/low_mean": 0.0005022480663683382, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0015256233964464627, + "epoch": 2.354518950437318, + "grad_norm": 0.11898131668567657, + "learning_rate": 1e-06, + "loss": -0.0371, + "step": 230 + }, + { + "clip_ratio/high_max": 0.0023322810884565115, + "clip_ratio/high_mean": 0.0008943115790316369, + "clip_ratio/low_mean": 0.0007600811477459501, + "clip_ratio/low_min": 3.761400239454815e-05, + "clip_ratio/region_mean": 0.0016543927558814175, + "epoch": 2.363848396501458, + "grad_norm": 0.1239127516746521, + "learning_rate": 1e-06, + "loss": 0.0069, + "step": 231 + }, + { + "clip_ratio/high_max": 0.002162837205105461, + "clip_ratio/high_mean": 0.0008315077648148872, + "clip_ratio/low_mean": 0.0007176635408541188, + "clip_ratio/low_min": 3.620522147684824e-05, + "clip_ratio/region_mean": 0.0015491713274968788, + "epoch": 2.373177842565598, + "grad_norm": 0.1227312833070755, + "learning_rate": 1e-06, + "loss": 0.02, + "step": 232 + }, + { + "clip_ratio/high_max": 0.00214016082463786, + "clip_ratio/high_mean": 0.000906252489585313, + "clip_ratio/low_mean": 0.000733572494937107, + "clip_ratio/low_min": 3.462603854131885e-05, + "clip_ratio/region_mean": 0.0016398249645135365, + "epoch": 2.3825072886297374, + "grad_norm": 0.11811984330415726, + "learning_rate": 1e-06, + "loss": 0.0029, + "step": 233 + }, + { + "clip_ratio/high_max": 0.0020093668572371826, + "clip_ratio/high_mean": 0.0008168029089574702, + "clip_ratio/low_mean": 0.0008965912747953553, + "clip_ratio/low_min": 7.428751814586576e-05, + "clip_ratio/region_mean": 0.0017133941582869738, + "epoch": 2.3918367346938774, + "grad_norm": 0.1290924847126007, + "learning_rate": 1e-06, + "loss": 0.039, + "step": 234 + }, + { + "clip_ratio/high_max": 0.001914288724947255, + "clip_ratio/high_mean": 0.0008547640100005083, + "clip_ratio/low_mean": 0.0008121297823890927, + "clip_ratio/low_min": 4.846118827117607e-05, + "clip_ratio/region_mean": 0.0016668937823851593, + "epoch": 2.4011661807580174, + "grad_norm": 0.12953390181064606, + "learning_rate": 1e-06, + "loss": -0.0013, + "step": 235 + }, + { + "clip_ratio/high_max": 0.0024019067714107223, + "clip_ratio/high_mean": 0.0008915145008359104, + "clip_ratio/low_mean": 0.0008578787019359879, + "clip_ratio/low_min": 9.866466643870808e-05, + "clip_ratio/region_mean": 0.0017493932027718984, + "epoch": 2.4104956268221573, + "grad_norm": 0.1239464282989502, + "learning_rate": 1e-06, + "loss": 0.0261, + "step": 236 + }, + { + "clip_ratio/high_max": 0.00219047720747767, + "clip_ratio/high_mean": 0.0009286063177569304, + "clip_ratio/low_mean": 0.00079981864655565, + "clip_ratio/low_min": 4.524950600170996e-05, + "clip_ratio/region_mean": 0.0017284249770455062, + "epoch": 2.4198250728862973, + "grad_norm": 0.12453047186136246, + "learning_rate": 1e-06, + "loss": 0.018, + "step": 237 + }, + { + "clip_ratio/high_max": 0.002245578245492652, + "clip_ratio/high_mean": 0.0010236589005216956, + "clip_ratio/low_mean": 0.0010214083795290207, + "clip_ratio/low_min": 7.412554805341642e-05, + "clip_ratio/region_mean": 0.0020450672527658753, + "epoch": 2.4291545189504373, + "grad_norm": 0.12429088354110718, + "learning_rate": 1e-06, + "loss": 0.0183, + "step": 238 + }, + { + "clip_ratio/high_max": 0.0021250942118058447, + "clip_ratio/high_mean": 0.0009698486210254487, + "clip_ratio/low_mean": 0.0009306999745604116, + "clip_ratio/low_min": 5.981852791592246e-05, + "clip_ratio/region_mean": 0.0019005486101377755, + "epoch": 2.4384839650145773, + "grad_norm": 0.12769249081611633, + "learning_rate": 1e-06, + "loss": 0.0028, + "step": 239 + }, + { + "clip_ratio/high_max": 0.0023410784924635664, + "clip_ratio/high_mean": 0.0010047145915450528, + "clip_ratio/low_mean": 0.0008066290483839111, + "clip_ratio/low_min": 9.084598423214629e-05, + "clip_ratio/region_mean": 0.0018113436599378474, + "epoch": 2.4478134110787173, + "grad_norm": 0.121663399040699, + "learning_rate": 1e-06, + "loss": -0.0246, + "step": 240 + }, + { + "clip_ratio/high_max": 0.0020401714500621893, + "clip_ratio/high_mean": 0.0009104738965106662, + "clip_ratio/low_mean": 0.0009578589706507046, + "clip_ratio/low_min": 5.7134509916068055e-05, + "clip_ratio/region_mean": 0.0018683328744373284, + "epoch": 2.4571428571428573, + "grad_norm": 0.11453356593847275, + "learning_rate": 1e-06, + "loss": 0.0201, + "step": 241 + }, + { + "clip_ratio/high_max": 0.00280039736389881, + "clip_ratio/high_mean": 0.0011022610815416556, + "clip_ratio/low_mean": 0.0010881947564485017, + "clip_ratio/low_min": 0.00011510461445141118, + "clip_ratio/region_mean": 0.002190455881645903, + "epoch": 2.466472303206997, + "grad_norm": 0.11957170069217682, + "learning_rate": 1e-06, + "loss": -0.0131, + "step": 242 + }, + { + "clip_ratio/high_max": 0.002436354843666777, + "clip_ratio/high_mean": 0.0010595270650810562, + "clip_ratio/low_mean": 0.0010193988691753475, + "clip_ratio/low_min": 0.0001645911943342071, + "clip_ratio/region_mean": 0.002078926030662842, + "epoch": 2.4758017492711373, + "grad_norm": 0.12470949441194534, + "learning_rate": 1e-06, + "loss": -0.0172, + "step": 243 + }, + { + "clip_ratio/high_max": 0.001885615049104672, + "clip_ratio/high_mean": 0.0009377739079354797, + "clip_ratio/low_mean": 0.0009602939389878884, + "clip_ratio/low_min": 8.107810572255403e-05, + "clip_ratio/region_mean": 0.0018980678432853892, + "epoch": 2.485131195335277, + "grad_norm": 0.12089991569519043, + "learning_rate": 1e-06, + "loss": -0.0086, + "step": 244 + }, + { + "clip_ratio/high_max": 0.002278977062815102, + "clip_ratio/high_mean": 0.0009759604818100343, + "clip_ratio/low_mean": 0.0009597634143574396, + "clip_ratio/low_min": 1.600102405063808e-05, + "clip_ratio/region_mean": 0.0019357238852535374, + "epoch": 2.494460641399417, + "grad_norm": 0.1121646985411644, + "learning_rate": 1e-06, + "loss": -0.0093, + "step": 245 + }, + { + "clip_ratio/high_max": 0.00190876182023203, + "clip_ratio/high_mean": 0.0008337687540915795, + "clip_ratio/low_mean": 0.0010464253391546663, + "clip_ratio/low_min": 0.00016330648304574424, + "clip_ratio/region_mean": 0.001880194067780394, + "epoch": 2.503790087463557, + "grad_norm": 0.11362891644239426, + "learning_rate": 1e-06, + "loss": 0.0079, + "step": 246 + }, + { + "clip_ratio/high_max": 0.001980891709536081, + "clip_ratio/high_mean": 0.0008288653825729853, + "clip_ratio/low_mean": 0.001052274064932135, + "clip_ratio/low_min": 0.00010290669888490811, + "clip_ratio/region_mean": 0.001881139432953205, + "epoch": 2.513119533527697, + "grad_norm": 0.1142832487821579, + "learning_rate": 1e-06, + "loss": 0.0023, + "step": 247 + }, + { + "clip_ratio/high_max": 0.002474515298672486, + "clip_ratio/high_mean": 0.0009529462004138622, + "clip_ratio/low_mean": 0.0013267577760416316, + "clip_ratio/low_min": 0.00015478871137020178, + "clip_ratio/region_mean": 0.0022797039346187375, + "epoch": 2.522448979591837, + "grad_norm": 0.11705332249403, + "learning_rate": 1e-06, + "loss": 0.0166, + "step": 248 + }, + { + "clip_ratio/high_max": 0.0026798795515787788, + "clip_ratio/high_mean": 0.0009887291053018998, + "clip_ratio/low_mean": 0.0010259262526233215, + "clip_ratio/low_min": 1.391052774124546e-05, + "clip_ratio/region_mean": 0.002014655343373306, + "epoch": 2.5317784256559768, + "grad_norm": 0.12924474477767944, + "learning_rate": 1e-06, + "loss": 0.0399, + "step": 249 + }, + { + "clip_ratio/high_max": 0.0022953186598897446, + "clip_ratio/high_mean": 0.001018163280605222, + "clip_ratio/low_mean": 0.0011129522281407844, + "clip_ratio/low_min": 2.8344958991510794e-05, + "clip_ratio/region_mean": 0.00213111552875489, + "epoch": 2.5411078717201168, + "grad_norm": 0.10528191179037094, + "learning_rate": 1e-06, + "loss": 0.0259, + "step": 250 + }, + { + "clip_ratio/high_max": 0.0022079543050494976, + "clip_ratio/high_mean": 0.0010806344398588408, + "clip_ratio/low_mean": 0.0009955247714970028, + "clip_ratio/low_min": 6.985532309045084e-05, + "clip_ratio/region_mean": 0.0020761592386406846, + "epoch": 2.5504373177842563, + "grad_norm": 0.1289607435464859, + "learning_rate": 1e-06, + "loss": -0.0497, + "step": 251 + }, + { + "clip_ratio/high_max": 0.0028587446286110207, + "clip_ratio/high_mean": 0.0011241016181884333, + "clip_ratio/low_mean": 0.0009463640653848415, + "clip_ratio/low_min": 6.59255401842529e-05, + "clip_ratio/region_mean": 0.002070465692668222, + "epoch": 2.5597667638483967, + "grad_norm": 0.12235015630722046, + "learning_rate": 1e-06, + "loss": -0.0364, + "step": 252 + }, + { + "clip_ratio/high_max": 0.0025497422539046966, + "clip_ratio/high_mean": 0.0010962882515741512, + "clip_ratio/low_mean": 0.0012082916218787432, + "clip_ratio/low_min": 0.00020721147757285507, + "clip_ratio/region_mean": 0.002304579902556725, + "epoch": 2.5690962099125363, + "grad_norm": 0.13087229430675507, + "learning_rate": 1e-06, + "loss": -0.0165, + "step": 253 + }, + { + "clip_ratio/high_max": 0.0027529015715117566, + "clip_ratio/high_mean": 0.0011465288989711553, + "clip_ratio/low_mean": 0.0011475638675619848, + "clip_ratio/low_min": 3.068127898586681e-05, + "clip_ratio/region_mean": 0.0022940927956369705, + "epoch": 2.5784256559766763, + "grad_norm": 0.13189953565597534, + "learning_rate": 1e-06, + "loss": -0.0203, + "step": 254 + }, + { + "clip_ratio/high_max": 0.0021623915672535077, + "clip_ratio/high_mean": 0.0009950409239536384, + "clip_ratio/low_mean": 0.001243158587385551, + "clip_ratio/low_min": 0.0001403538899467094, + "clip_ratio/region_mean": 0.002238199580460787, + "epoch": 2.5877551020408163, + "grad_norm": 0.12326417863368988, + "learning_rate": 1e-06, + "loss": 0.0205, + "step": 255 + }, + { + "clip_ratio/high_max": 0.002177543705329299, + "clip_ratio/high_mean": 0.0009701890376163647, + "clip_ratio/low_mean": 0.001118037096603075, + "clip_ratio/low_min": 0.0001399228831360233, + "clip_ratio/region_mean": 0.0020882261014776304, + "epoch": 2.5970845481049563, + "grad_norm": 0.1164150983095169, + "learning_rate": 1e-06, + "loss": 0.0143, + "step": 256 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0235072544642857, + "completions/max_length": 4096.0, + "completions/max_terminated_length": 4093.0, + "completions/mean_length": 645.2140502929688, + "completions/mean_terminated_length": 562.1427612304688, + "completions/min_length": 3.0, + "completions/min_terminated_length": 3.0, + "epoch": 2.6064139941690962, + "grad_norm": 0.13600456714630127, + "learning_rate": 1e-06, + "loss": -0.0001, + "num_tokens": 167366571.0, + "reward": 0.5756138563156128, + "reward_std": 0.19903387129306793, + "rewards/simpleverify_reward/mean": 0.5756138563156128, + "rewards/simpleverify_reward/std": 0.4942580759525299, + "step": 257 + }, + { + "clip_ratio/high_max": 0.00189734512969153, + "clip_ratio/high_mean": 0.0008366046840819763, + "clip_ratio/low_mean": 0.0006608965504710795, + "clip_ratio/low_min": 1.5144172721193172e-05, + "clip_ratio/region_mean": 0.0014975012600189075, + "epoch": 2.6157434402332362, + "grad_norm": 0.12501174211502075, + "learning_rate": 1e-06, + "loss": 0.0082, + "step": 258 + }, + { + "clip_ratio/high_max": 0.002123884820321109, + "clip_ratio/high_mean": 0.0008968435904534999, + "clip_ratio/low_mean": 0.0006447752157328068, + "clip_ratio/low_min": 1.1552680007298477e-05, + "clip_ratio/region_mean": 0.001541618796181865, + "epoch": 2.6250728862973762, + "grad_norm": 0.12504319846630096, + "learning_rate": 1e-06, + "loss": 0.0226, + "step": 259 + }, + { + "clip_ratio/high_max": 0.002071463044558186, + "clip_ratio/high_mean": 0.0009713429608382285, + "clip_ratio/low_mean": 0.0006334486652121996, + "clip_ratio/low_min": 4.235206506564282e-05, + "clip_ratio/region_mean": 0.0016047916797106154, + "epoch": 2.6344023323615158, + "grad_norm": 0.11961305141448975, + "learning_rate": 1e-06, + "loss": -0.0031, + "step": 260 + }, + { + "clip_ratio/high_max": 0.002227572003903333, + "clip_ratio/high_mean": 0.0009052047134900931, + "clip_ratio/low_mean": 0.0006609805350308307, + "clip_ratio/low_min": 7.137222746678162e-05, + "clip_ratio/region_mean": 0.0015661852557968814, + "epoch": 2.643731778425656, + "grad_norm": 0.12769760191440582, + "learning_rate": 1e-06, + "loss": 0.0215, + "step": 261 + }, + { + "clip_ratio/high_max": 0.0023088831221684813, + "clip_ratio/high_mean": 0.0009140516522165854, + "clip_ratio/low_mean": 0.0007019542936177459, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.001616005931282416, + "epoch": 2.6530612244897958, + "grad_norm": 0.1286620795726776, + "learning_rate": 1e-06, + "loss": -0.022, + "step": 262 + }, + { + "clip_ratio/high_max": 0.0018435072524880525, + "clip_ratio/high_mean": 0.0008288394874398364, + "clip_ratio/low_mean": 0.0006485298545158003, + "clip_ratio/low_min": 2.039151695498731e-05, + "clip_ratio/region_mean": 0.0014773693328606896, + "epoch": 2.6623906705539357, + "grad_norm": 0.12441161274909973, + "learning_rate": 1e-06, + "loss": -0.008, + "step": 263 + }, + { + "clip_ratio/high_max": 0.0023731261062494013, + "clip_ratio/high_mean": 0.0009713482832012232, + "clip_ratio/low_mean": 0.0007894066784501774, + "clip_ratio/low_min": 1.1546277164597996e-05, + "clip_ratio/region_mean": 0.001760754981660284, + "epoch": 2.6717201166180757, + "grad_norm": 0.1379632204771042, + "learning_rate": 1e-06, + "loss": 0.0174, + "step": 264 + }, + { + "clip_ratio/high_max": 0.0024549766967538744, + "clip_ratio/high_mean": 0.0009812457210500725, + "clip_ratio/low_mean": 0.0007618412128067575, + "clip_ratio/low_min": 3.2005257708078716e-05, + "clip_ratio/region_mean": 0.0017430869120289572, + "epoch": 2.6810495626822157, + "grad_norm": 0.12083766609430313, + "learning_rate": 1e-06, + "loss": -0.0256, + "step": 265 + }, + { + "clip_ratio/high_max": 0.0020812658258364536, + "clip_ratio/high_mean": 0.0009935599227901548, + "clip_ratio/low_mean": 0.0007724324095761403, + "clip_ratio/low_min": 3.0486315154121257e-05, + "clip_ratio/region_mean": 0.0017659923323662952, + "epoch": 2.6903790087463557, + "grad_norm": 0.12071080505847931, + "learning_rate": 1e-06, + "loss": -0.0133, + "step": 266 + }, + { + "clip_ratio/high_max": 0.002290940439706901, + "clip_ratio/high_mean": 0.0008696704117028276, + "clip_ratio/low_mean": 0.0008006323096196866, + "clip_ratio/low_min": 3.3404811802029144e-05, + "clip_ratio/region_mean": 0.0016703027649782598, + "epoch": 2.6997084548104957, + "grad_norm": 0.12483668327331543, + "learning_rate": 1e-06, + "loss": 0.0078, + "step": 267 + }, + { + "clip_ratio/high_max": 0.0021949295878584962, + "clip_ratio/high_mean": 0.0010906782563324668, + "clip_ratio/low_mean": 0.0006446836814575363, + "clip_ratio/low_min": 2.8325403036433272e-05, + "clip_ratio/region_mean": 0.0017353619332425296, + "epoch": 2.7090379008746357, + "grad_norm": 0.11658875644207001, + "learning_rate": 1e-06, + "loss": -0.0868, + "step": 268 + }, + { + "clip_ratio/high_max": 0.0025025291033671238, + "clip_ratio/high_mean": 0.0010405593529867474, + "clip_ratio/low_mean": 0.0009900394070427865, + "clip_ratio/low_min": 9.010043140733615e-05, + "clip_ratio/region_mean": 0.0020305987345636822, + "epoch": 2.7183673469387752, + "grad_norm": 0.1365434229373932, + "learning_rate": 1e-06, + "loss": -0.0118, + "step": 269 + }, + { + "clip_ratio/high_max": 0.0020535689436655957, + "clip_ratio/high_mean": 0.0007929516432341188, + "clip_ratio/low_mean": 0.0009509364226687467, + "clip_ratio/low_min": 0.00014552040920534637, + "clip_ratio/region_mean": 0.0017438880313420668, + "epoch": 2.7276967930029157, + "grad_norm": 0.11683172732591629, + "learning_rate": 1e-06, + "loss": 0.0009, + "step": 270 + }, + { + "clip_ratio/high_max": 0.0019331066941958852, + "clip_ratio/high_mean": 0.000874589912200463, + "clip_ratio/low_mean": 0.0009247772431990597, + "clip_ratio/low_min": 0.00011506138616823591, + "clip_ratio/region_mean": 0.0017993671935983002, + "epoch": 2.7370262390670552, + "grad_norm": 0.11880014836788177, + "learning_rate": 1e-06, + "loss": 0.0068, + "step": 271 + }, + { + "clip_ratio/high_max": 0.0023409619607264176, + "clip_ratio/high_mean": 0.0009553683612466557, + "clip_ratio/low_mean": 0.0010207238974544453, + "clip_ratio/low_min": 5.3695403039455414e-05, + "clip_ratio/region_mean": 0.001976092222321313, + "epoch": 2.746355685131195, + "grad_norm": 1.8470609188079834, + "learning_rate": 1e-06, + "loss": 0.0152, + "step": 272 + }, + { + "clip_ratio/high_max": 0.002387445463682525, + "clip_ratio/high_mean": 0.0009731576956255594, + "clip_ratio/low_mean": 0.0010243844062642893, + "clip_ratio/low_min": 8.229292325268034e-05, + "clip_ratio/region_mean": 0.0019975421455455944, + "epoch": 2.755685131195335, + "grad_norm": 0.11688001453876495, + "learning_rate": 1e-06, + "loss": 0.015, + "step": 273 + }, + { + "clip_ratio/high_max": 0.0024668668847880326, + "clip_ratio/high_mean": 0.0010067965176858706, + "clip_ratio/low_mean": 0.001008177121548215, + "clip_ratio/low_min": 0.00015318513851525495, + "clip_ratio/region_mean": 0.002014973622863181, + "epoch": 2.765014577259475, + "grad_norm": 0.12078815698623657, + "learning_rate": 1e-06, + "loss": 0.0151, + "step": 274 + }, + { + "clip_ratio/high_max": 0.002466472207743209, + "clip_ratio/high_mean": 0.0010551300292718224, + "clip_ratio/low_mean": 0.0010188171290792525, + "clip_ratio/low_min": 6.0372036386979744e-05, + "clip_ratio/region_mean": 0.002073947194730863, + "epoch": 2.774344023323615, + "grad_norm": 0.1232159286737442, + "learning_rate": 1e-06, + "loss": 0.0004, + "step": 275 + }, + { + "clip_ratio/high_max": 0.00244445198768517, + "clip_ratio/high_mean": 0.00110145202052081, + "clip_ratio/low_mean": 0.0009152304264716804, + "clip_ratio/low_min": 2.8493275749497116e-05, + "clip_ratio/region_mean": 0.0020166824615444057, + "epoch": 2.783673469387755, + "grad_norm": 0.12969809770584106, + "learning_rate": 1e-06, + "loss": -0.0251, + "step": 276 + }, + { + "clip_ratio/high_max": 0.00231015557073988, + "clip_ratio/high_mean": 0.0009708871039038058, + "clip_ratio/low_mean": 0.0010659317322279094, + "clip_ratio/low_min": 2.89754279947374e-05, + "clip_ratio/region_mean": 0.0020368187615531497, + "epoch": 2.793002915451895, + "grad_norm": 0.1141471341252327, + "learning_rate": 1e-06, + "loss": -0.0276, + "step": 277 + }, + { + "clip_ratio/high_max": 0.0024809273818391375, + "clip_ratio/high_mean": 0.0010630047945596743, + "clip_ratio/low_mean": 0.001058063775417395, + "clip_ratio/low_min": 8.341390639543533e-05, + "clip_ratio/region_mean": 0.002121068573615048, + "epoch": 2.8023323615160347, + "grad_norm": 0.12116222828626633, + "learning_rate": 1e-06, + "loss": -0.0181, + "step": 278 + }, + { + "clip_ratio/high_max": 0.002853109879652038, + "clip_ratio/high_mean": 0.001015771975289681, + "clip_ratio/low_mean": 0.001141551463661017, + "clip_ratio/low_min": 0.00010783191646623891, + "clip_ratio/region_mean": 0.002157323411665857, + "epoch": 2.811661807580175, + "grad_norm": 0.12891864776611328, + "learning_rate": 1e-06, + "loss": 0.0098, + "step": 279 + }, + { + "clip_ratio/high_max": 0.0024491413932992145, + "clip_ratio/high_mean": 0.0009487917632213794, + "clip_ratio/low_mean": 0.001186088913527783, + "clip_ratio/low_min": 0.00017845502134150593, + "clip_ratio/region_mean": 0.002134880742232781, + "epoch": 2.8209912536443147, + "grad_norm": 0.12250273674726486, + "learning_rate": 1e-06, + "loss": 0.0034, + "step": 280 + }, + { + "clip_ratio/high_max": 0.00224468921078369, + "clip_ratio/high_mean": 0.0009757881198311225, + "clip_ratio/low_mean": 0.0012388760733301751, + "clip_ratio/low_min": 0.0001514393552497495, + "clip_ratio/region_mean": 0.002214664244093001, + "epoch": 2.8303206997084547, + "grad_norm": 0.12632285058498383, + "learning_rate": 1e-06, + "loss": -0.0003, + "step": 281 + }, + { + "clip_ratio/high_max": 0.0022118565830169246, + "clip_ratio/high_mean": 0.0009181990608340129, + "clip_ratio/low_mean": 0.0011776583960454445, + "clip_ratio/low_min": 0.00015783298840688076, + "clip_ratio/region_mean": 0.0020958573804819025, + "epoch": 2.8396501457725947, + "grad_norm": 0.11565697938203812, + "learning_rate": 1e-06, + "loss": 0.0332, + "step": 282 + }, + { + "clip_ratio/high_max": 0.0024144740746123716, + "clip_ratio/high_mean": 0.0010288224366377108, + "clip_ratio/low_mean": 0.0012387945535010658, + "clip_ratio/low_min": 0.0001487179906689562, + "clip_ratio/region_mean": 0.0022676170265185647, + "epoch": 2.8489795918367347, + "grad_norm": 0.11546675115823746, + "learning_rate": 1e-06, + "loss": 0.0167, + "step": 283 + }, + { + "clip_ratio/high_max": 0.0025626233327784576, + "clip_ratio/high_mean": 0.0010337062776670791, + "clip_ratio/low_mean": 0.0012107248803658877, + "clip_ratio/low_min": 0.00011483960952318739, + "clip_ratio/region_mean": 0.002244431132567115, + "epoch": 2.8583090379008746, + "grad_norm": 0.12259412556886673, + "learning_rate": 1e-06, + "loss": 0.02, + "step": 284 + }, + { + "clip_ratio/high_max": 0.0023987821296032052, + "clip_ratio/high_mean": 0.0010659938106982736, + "clip_ratio/low_mean": 0.00112719381286297, + "clip_ratio/low_min": 5.6721270084381104e-05, + "clip_ratio/region_mean": 0.0021931875817244872, + "epoch": 2.8676384839650146, + "grad_norm": 0.14035850763320923, + "learning_rate": 1e-06, + "loss": -0.0136, + "step": 285 + }, + { + "clip_ratio/high_max": 0.002533859576942632, + "clip_ratio/high_mean": 0.0009809613566176267, + "clip_ratio/low_mean": 0.0012055512779625133, + "clip_ratio/low_min": 0.00013177785331208725, + "clip_ratio/region_mean": 0.002186512661864981, + "epoch": 2.8769679300291546, + "grad_norm": 0.1203429102897644, + "learning_rate": 1e-06, + "loss": 0.0101, + "step": 286 + }, + { + "clip_ratio/high_max": 0.0023078008016454987, + "clip_ratio/high_mean": 0.0009636809063522378, + "clip_ratio/low_mean": 0.0012416589415806811, + "clip_ratio/low_min": 0.00014422896219912218, + "clip_ratio/region_mean": 0.0022053398060961626, + "epoch": 2.8862973760932946, + "grad_norm": 0.12423661351203918, + "learning_rate": 1e-06, + "loss": 0.0359, + "step": 287 + }, + { + "clip_ratio/high_max": 0.0025863796399789862, + "clip_ratio/high_mean": 0.0010663768753147451, + "clip_ratio/low_mean": 0.0009776043771125842, + "clip_ratio/low_min": 1.75168170244433e-05, + "clip_ratio/region_mean": 0.0020439811996766366, + "epoch": 2.8956268221574346, + "grad_norm": 0.11673915386199951, + "learning_rate": 1e-06, + "loss": -0.021, + "step": 288 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.025913783482142905, + "completions/max_length": 4096.0, + "completions/max_terminated_length": 4047.0, + "completions/mean_length": 661.5585327148438, + "completions/mean_terminated_length": 570.1915283203125, + "completions/min_length": 3.0, + "completions/min_terminated_length": 3.0, + "epoch": 3.00932944606414, + "grad_norm": 0.12585154175758362, + "learning_rate": 1e-06, + "loss": -0.0473, + "num_tokens": 186191657.0, + "reward": 0.5730329751968384, + "reward_std": 0.19180303812026978, + "rewards/simpleverify_reward/mean": 0.5730329155921936, + "rewards/simpleverify_reward/std": 0.4946460723876953, + "step": 289 + }, + { + "clip_ratio/high_max": 0.002137782401405275, + "clip_ratio/high_mean": 0.0008485483122058213, + "clip_ratio/low_mean": 0.0006079304330341984, + "clip_ratio/low_min": 4.293019082979299e-05, + "clip_ratio/region_mean": 0.0014564787707058713, + "epoch": 3.01865889212828, + "grad_norm": 0.1152670606970787, + "learning_rate": 1e-06, + "loss": 0.0319, + "step": 290 + }, + { + "clip_ratio/high_max": 0.0022789640570408665, + "clip_ratio/high_mean": 0.0009204337584378663, + "clip_ratio/low_mean": 0.0006990835590841016, + "clip_ratio/low_min": 4.5441803194989916e-05, + "clip_ratio/region_mean": 0.0016195172938751057, + "epoch": 3.02798833819242, + "grad_norm": 0.1447540521621704, + "learning_rate": 1e-06, + "loss": 0.0363, + "step": 291 + }, + { + "clip_ratio/high_max": 0.0018935483803943498, + "clip_ratio/high_mean": 0.0008393884145334596, + "clip_ratio/low_mean": 0.0005619536314043216, + "clip_ratio/low_min": 1.2286220226087607e-05, + "clip_ratio/region_mean": 0.001401342022290919, + "epoch": 3.03731778425656, + "grad_norm": 0.1221446692943573, + "learning_rate": 1e-06, + "loss": -0.0261, + "step": 292 + }, + { + "clip_ratio/high_max": 0.0018411432683933526, + "clip_ratio/high_mean": 0.0008469480471831048, + "clip_ratio/low_mean": 0.0006023529585945653, + "clip_ratio/low_min": 2.819645669660531e-05, + "clip_ratio/region_mean": 0.0014493010166916065, + "epoch": 3.0466472303206995, + "grad_norm": 0.12960156798362732, + "learning_rate": 1e-06, + "loss": -0.0301, + "step": 293 + }, + { + "clip_ratio/high_max": 0.0019150481602991931, + "clip_ratio/high_mean": 0.0007621312506671529, + "clip_ratio/low_mean": 0.000683142608977505, + "clip_ratio/low_min": 6.466388458647998e-05, + "clip_ratio/region_mean": 0.0014452738832915202, + "epoch": 3.0559766763848395, + "grad_norm": 0.12239605188369751, + "learning_rate": 1e-06, + "loss": 0.0301, + "step": 294 + }, + { + "clip_ratio/high_max": 0.002183535609219689, + "clip_ratio/high_mean": 0.0009533421907690354, + "clip_ratio/low_mean": 0.0007072594580677105, + "clip_ratio/low_min": 4.035053825646173e-05, + "clip_ratio/region_mean": 0.0016606016215519048, + "epoch": 3.0653061224489795, + "grad_norm": 0.13197290897369385, + "learning_rate": 1e-06, + "loss": -0.0128, + "step": 295 + }, + { + "clip_ratio/high_max": 0.001975508115720004, + "clip_ratio/high_mean": 0.0008964744829427218, + "clip_ratio/low_mean": 0.0007166163568399497, + "clip_ratio/low_min": 5.755432994192233e-05, + "clip_ratio/region_mean": 0.001613090844330145, + "epoch": 3.0746355685131195, + "grad_norm": 0.1390487551689148, + "learning_rate": 1e-06, + "loss": -0.017, + "step": 296 + }, + { + "clip_ratio/high_max": 0.0018032961306744255, + "clip_ratio/high_mean": 0.000826057068479713, + "clip_ratio/low_mean": 0.0006996557622187538, + "clip_ratio/low_min": 5.9933614465990104e-05, + "clip_ratio/region_mean": 0.0015257128106895834, + "epoch": 3.0839650145772595, + "grad_norm": 0.12430579960346222, + "learning_rate": 1e-06, + "loss": 0.0099, + "step": 297 + }, + { + "clip_ratio/high_max": 0.002024217421421781, + "clip_ratio/high_mean": 0.0008760691252973629, + "clip_ratio/low_mean": 0.0008207828032027464, + "clip_ratio/low_min": 9.32840766836307e-05, + "clip_ratio/region_mean": 0.00169685192668112, + "epoch": 3.0932944606413995, + "grad_norm": 0.12031778693199158, + "learning_rate": 1e-06, + "loss": 0.0288, + "step": 298 + }, + { + "clip_ratio/high_max": 0.0019508445147948805, + "clip_ratio/high_mean": 0.0008287017099064542, + "clip_ratio/low_mean": 0.0008302461119455984, + "clip_ratio/low_min": 0.00011111687399534276, + "clip_ratio/region_mean": 0.0016589478109381162, + "epoch": 3.1026239067055394, + "grad_norm": 0.12004540860652924, + "learning_rate": 1e-06, + "loss": 0.0319, + "step": 299 + }, + { + "clip_ratio/high_max": 0.0020195316392346285, + "clip_ratio/high_mean": 0.0008908537165552843, + "clip_ratio/low_mean": 0.0008761511599004734, + "clip_ratio/low_min": 7.549395741079934e-05, + "clip_ratio/region_mean": 0.0017670048837317154, + "epoch": 3.1119533527696794, + "grad_norm": 0.12134338170289993, + "learning_rate": 1e-06, + "loss": 0.0148, + "step": 300 + }, + { + "clip_ratio/high_max": 0.0022909149483894, + "clip_ratio/high_mean": 0.0010250048399029765, + "clip_ratio/low_mean": 0.0008124986998154782, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0018375034997006878, + "epoch": 3.1212827988338194, + "grad_norm": 0.11783631891012192, + "learning_rate": 1e-06, + "loss": -0.0093, + "step": 301 + }, + { + "clip_ratio/high_max": 0.0020253909897292033, + "clip_ratio/high_mean": 0.0008927894159569405, + "clip_ratio/low_mean": 0.000938629043957917, + "clip_ratio/low_min": 0.0001440833921151352, + "clip_ratio/region_mean": 0.0018314184417249635, + "epoch": 3.130612244897959, + "grad_norm": 0.13193082809448242, + "learning_rate": 1e-06, + "loss": 0.03, + "step": 302 + }, + { + "clip_ratio/high_max": 0.0021273178717819974, + "clip_ratio/high_mean": 0.0009635814203647897, + "clip_ratio/low_mean": 0.000997285282210214, + "clip_ratio/low_min": 0.00011542265019670594, + "clip_ratio/region_mean": 0.0019608666843851097, + "epoch": 3.139941690962099, + "grad_norm": 0.11437549442052841, + "learning_rate": 1e-06, + "loss": 0.0228, + "step": 303 + }, + { + "clip_ratio/high_max": 0.0022696309024468064, + "clip_ratio/high_mean": 0.0008982518356788205, + "clip_ratio/low_mean": 0.0009465095863561146, + "clip_ratio/low_min": 8.449858705716906e-05, + "clip_ratio/region_mean": 0.001844761412939988, + "epoch": 3.149271137026239, + "grad_norm": 0.12392532080411911, + "learning_rate": 1e-06, + "loss": 0.0046, + "step": 304 + }, + { + "clip_ratio/high_max": 0.002378022516495548, + "clip_ratio/high_mean": 0.0011258928370807553, + "clip_ratio/low_mean": 0.0008753391516620468, + "clip_ratio/low_min": 4.3759836444223765e-05, + "clip_ratio/region_mean": 0.0020012320201203693, + "epoch": 3.158600583090379, + "grad_norm": 0.13241757452487946, + "learning_rate": 1e-06, + "loss": -0.0413, + "step": 305 + }, + { + "clip_ratio/high_max": 0.0021378254059527535, + "clip_ratio/high_mean": 0.0008881575868144864, + "clip_ratio/low_mean": 0.0010044291338999756, + "clip_ratio/low_min": 4.371010982140433e-05, + "clip_ratio/region_mean": 0.0018925867625512183, + "epoch": 3.167930029154519, + "grad_norm": 0.12934567034244537, + "learning_rate": 1e-06, + "loss": -0.0051, + "step": 306 + }, + { + "clip_ratio/high_max": 0.002058826637949096, + "clip_ratio/high_mean": 0.0009078570565179689, + "clip_ratio/low_mean": 0.001123340851336252, + "clip_ratio/low_min": 9.600896737538278e-05, + "clip_ratio/region_mean": 0.0020311979242251255, + "epoch": 3.177259475218659, + "grad_norm": 0.11368083208799362, + "learning_rate": 1e-06, + "loss": -0.0034, + "step": 307 + }, + { + "clip_ratio/high_max": 0.002240779111161828, + "clip_ratio/high_mean": 0.0008956037800089689, + "clip_ratio/low_mean": 0.0010629086536937393, + "clip_ratio/low_min": 4.8934634833130985e-05, + "clip_ratio/region_mean": 0.0019585124027798884, + "epoch": 3.186588921282799, + "grad_norm": 0.11548085510730743, + "learning_rate": 1e-06, + "loss": 0.0053, + "step": 308 + }, + { + "clip_ratio/high_max": 0.0019406232968322001, + "clip_ratio/high_mean": 0.0008760828750382643, + "clip_ratio/low_mean": 0.0012027405537082814, + "clip_ratio/low_min": 0.00011959243602177594, + "clip_ratio/region_mean": 0.0020788234687643126, + "epoch": 3.195918367346939, + "grad_norm": 0.12193051725625992, + "learning_rate": 1e-06, + "loss": 0.0304, + "step": 309 + }, + { + "clip_ratio/high_max": 0.0019967893094872124, + "clip_ratio/high_mean": 0.000845413793285843, + "clip_ratio/low_mean": 0.0011639131953415927, + "clip_ratio/low_min": 0.00018040690065390663, + "clip_ratio/region_mean": 0.002009326984989457, + "epoch": 3.205247813411079, + "grad_norm": 0.13364604115486145, + "learning_rate": 1e-06, + "loss": 0.0107, + "step": 310 + }, + { + "clip_ratio/high_max": 0.00245230059226742, + "clip_ratio/high_mean": 0.0010746660082077142, + "clip_ratio/low_mean": 0.0010397617770649958, + "clip_ratio/low_min": 4.257760610926198e-05, + "clip_ratio/region_mean": 0.0021144277998246253, + "epoch": 3.2145772594752184, + "grad_norm": 0.11715587973594666, + "learning_rate": 1e-06, + "loss": -0.0047, + "step": 311 + }, + { + "clip_ratio/high_max": 0.00253189260547515, + "clip_ratio/high_mean": 0.0010551489103818312, + "clip_ratio/low_mean": 0.001098928429200896, + "clip_ratio/low_min": 0.0001630687866054359, + "clip_ratio/region_mean": 0.002154077374143526, + "epoch": 3.2239067055393584, + "grad_norm": 0.12289279699325562, + "learning_rate": 1e-06, + "loss": -0.025, + "step": 312 + }, + { + "clip_ratio/high_max": 0.002433523208310362, + "clip_ratio/high_mean": 0.0009889512821246171, + "clip_ratio/low_mean": 0.0011194155713383225, + "clip_ratio/low_min": 7.554292278655339e-05, + "clip_ratio/region_mean": 0.0021083668834762648, + "epoch": 3.2332361516034984, + "grad_norm": 0.11821790039539337, + "learning_rate": 1e-06, + "loss": -0.0127, + "step": 313 + }, + { + "clip_ratio/high_max": 0.0023131492198444903, + "clip_ratio/high_mean": 0.0009166020208795089, + "clip_ratio/low_mean": 0.0010815071509568952, + "clip_ratio/low_min": 8.322376197611447e-05, + "clip_ratio/region_mean": 0.001998109211854171, + "epoch": 3.2425655976676384, + "grad_norm": 0.11479413509368896, + "learning_rate": 1e-06, + "loss": 0.0202, + "step": 314 + }, + { + "clip_ratio/high_max": 0.002730425709160045, + "clip_ratio/high_mean": 0.0011020803649444133, + "clip_ratio/low_mean": 0.001084739196812734, + "clip_ratio/low_min": 5.358674025046639e-05, + "clip_ratio/region_mean": 0.0021868195617571473, + "epoch": 3.2518950437317784, + "grad_norm": 0.12977640330791473, + "learning_rate": 1e-06, + "loss": -0.0089, + "step": 315 + }, + { + "clip_ratio/high_max": 0.0025748576008481905, + "clip_ratio/high_mean": 0.0009752476362336893, + "clip_ratio/low_mean": 0.0011016235148417763, + "clip_ratio/low_min": 6.810174363636179e-05, + "clip_ratio/region_mean": 0.002076871147437487, + "epoch": 3.2612244897959184, + "grad_norm": 0.12590360641479492, + "learning_rate": 1e-06, + "loss": -0.0117, + "step": 316 + }, + { + "clip_ratio/high_max": 0.0022781192892580293, + "clip_ratio/high_mean": 0.0010031983838416636, + "clip_ratio/low_mean": 0.0011138434056192636, + "clip_ratio/low_min": 4.892947072221432e-05, + "clip_ratio/region_mean": 0.0021170417821849696, + "epoch": 3.2705539358600584, + "grad_norm": 0.1301450878381729, + "learning_rate": 1e-06, + "loss": 0.0104, + "step": 317 + }, + { + "clip_ratio/high_max": 0.002370061061810702, + "clip_ratio/high_mean": 0.00111892673157854, + "clip_ratio/low_mean": 0.0008830380111248815, + "clip_ratio/low_min": 4.488076410780195e-05, + "clip_ratio/region_mean": 0.002001964741793927, + "epoch": 3.2798833819241984, + "grad_norm": 0.12157122045755386, + "learning_rate": 1e-06, + "loss": -0.0382, + "step": 318 + }, + { + "clip_ratio/high_max": 0.0024365238241443876, + "clip_ratio/high_mean": 0.0010116751200257568, + "clip_ratio/low_mean": 0.0009448562050238252, + "clip_ratio/low_min": 5.7639103033579886e-05, + "clip_ratio/region_mean": 0.00195653139235219, + "epoch": 3.2892128279883384, + "grad_norm": 0.1190371960401535, + "learning_rate": 1e-06, + "loss": -0.0196, + "step": 319 + }, + { + "clip_ratio/high_max": 0.0023169510532170534, + "clip_ratio/high_mean": 0.0010022043934441172, + "clip_ratio/low_mean": 0.0008849693440424744, + "clip_ratio/low_min": 6.96743718435755e-05, + "clip_ratio/region_mean": 0.0018871736974688247, + "epoch": 3.298542274052478, + "grad_norm": 0.11660901457071304, + "learning_rate": 1e-06, + "loss": -0.0, + "step": 320 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.024762834821428603, + "completions/max_length": 4096.0, + "completions/max_terminated_length": 4043.0, + "completions/mean_length": 641.1109619140625, + "completions/mean_terminated_length": 553.3856811523438, + "completions/min_length": 5.0, + "completions/min_terminated_length": 5.0, + "epoch": 3.307871720116618, + "grad_norm": 0.13143830001354218, + "learning_rate": 1e-06, + "loss": -0.0357, + "num_tokens": 204588540.0, + "reward": 0.5855538845062256, + "reward_std": 0.18952247500419617, + "rewards/simpleverify_reward/mean": 0.5855538249015808, + "rewards/simpleverify_reward/std": 0.49263474345207214, + "step": 321 + }, + { + "clip_ratio/high_max": 0.002081824990455061, + "clip_ratio/high_mean": 0.0008205740396078909, + "clip_ratio/low_mean": 0.0006175091480145056, + "clip_ratio/low_min": 9.681323626864469e-05, + "clip_ratio/region_mean": 0.0014380832144524902, + "epoch": 3.317201166180758, + "grad_norm": 0.12832291424274445, + "learning_rate": 1e-06, + "loss": 0.0216, + "step": 322 + }, + { + "clip_ratio/high_max": 0.002198897178459447, + "clip_ratio/high_mean": 0.0008805768675301806, + "clip_ratio/low_mean": 0.0005757844255640521, + "clip_ratio/low_min": 7.385367462120485e-05, + "clip_ratio/region_mean": 0.0014563612530764658, + "epoch": 3.326530612244898, + "grad_norm": 0.12534086406230927, + "learning_rate": 1e-06, + "loss": 0.018, + "step": 323 + }, + { + "clip_ratio/high_max": 0.0015997640330169816, + "clip_ratio/high_mean": 0.0006852754158899188, + "clip_ratio/low_mean": 0.0005495695841091219, + "clip_ratio/low_min": 4.164241727266926e-05, + "clip_ratio/region_mean": 0.0012348450109129772, + "epoch": 3.335860058309038, + "grad_norm": 0.12279549241065979, + "learning_rate": 1e-06, + "loss": -0.0018, + "step": 324 + }, + { + "clip_ratio/high_max": 0.001829999739129562, + "clip_ratio/high_mean": 0.0007402574010484386, + "clip_ratio/low_mean": 0.0007085395145622897, + "clip_ratio/low_min": 5.3915501666779164e-05, + "clip_ratio/region_mean": 0.0014487969128822442, + "epoch": 3.345189504373178, + "grad_norm": 0.13169774413108826, + "learning_rate": 1e-06, + "loss": 0.0094, + "step": 325 + }, + { + "clip_ratio/high_max": 0.002394345690845512, + "clip_ratio/high_mean": 0.000877591624885099, + "clip_ratio/low_mean": 0.0007403057497867849, + "clip_ratio/low_min": 6.64371509628836e-05, + "clip_ratio/region_mean": 0.001617897352844011, + "epoch": 3.354518950437318, + "grad_norm": 0.2514910399913788, + "learning_rate": 1e-06, + "loss": -0.0004, + "step": 326 + }, + { + "clip_ratio/high_max": 0.0021359779057092965, + "clip_ratio/high_mean": 0.000823063757707132, + "clip_ratio/low_mean": 0.000759678914619144, + "clip_ratio/low_min": 2.57315850831219e-05, + "clip_ratio/region_mean": 0.0015827426395844668, + "epoch": 3.363848396501458, + "grad_norm": 0.13429471850395203, + "learning_rate": 1e-06, + "loss": -0.0016, + "step": 327 + }, + { + "clip_ratio/high_max": 0.0020860579461441375, + "clip_ratio/high_mean": 0.0009367309721710626, + "clip_ratio/low_mean": 0.0006439800363295944, + "clip_ratio/low_min": 1.3525211215892341e-05, + "clip_ratio/region_mean": 0.0015807109957677312, + "epoch": 3.373177842565598, + "grad_norm": 0.13580140471458435, + "learning_rate": 1e-06, + "loss": -0.028, + "step": 328 + }, + { + "clip_ratio/high_max": 0.002187342368415557, + "clip_ratio/high_mean": 0.0009016236690513324, + "clip_ratio/low_mean": 0.0007877179668867029, + "clip_ratio/low_min": 2.0298797608120367e-05, + "clip_ratio/region_mean": 0.001689341625024099, + "epoch": 3.3825072886297374, + "grad_norm": 0.12560588121414185, + "learning_rate": 1e-06, + "loss": -0.0256, + "step": 329 + }, + { + "clip_ratio/high_max": 0.0021584258756774943, + "clip_ratio/high_mean": 0.0008691073508089175, + "clip_ratio/low_mean": 0.0008270080215879716, + "clip_ratio/low_min": 7.240215199999511e-05, + "clip_ratio/region_mean": 0.0016961153742158785, + "epoch": 3.3918367346938774, + "grad_norm": 0.12447452545166016, + "learning_rate": 1e-06, + "loss": 0.0266, + "step": 330 + }, + { + "clip_ratio/high_max": 0.0020215578115312383, + "clip_ratio/high_mean": 0.0008933574263210176, + "clip_ratio/low_mean": 0.0009193233445330407, + "clip_ratio/low_min": 0.00010749709053925471, + "clip_ratio/region_mean": 0.0018126807772205211, + "epoch": 3.4011661807580174, + "grad_norm": 0.12126223742961884, + "learning_rate": 1e-06, + "loss": -0.0146, + "step": 331 + }, + { + "clip_ratio/high_max": 0.00199752864864422, + "clip_ratio/high_mean": 0.0008398552072321763, + "clip_ratio/low_mean": 0.000931416725507006, + "clip_ratio/low_min": 5.21423698955914e-05, + "clip_ratio/region_mean": 0.001771271912730299, + "epoch": 3.4104956268221573, + "grad_norm": 0.12881004810333252, + "learning_rate": 1e-06, + "loss": 0.0245, + "step": 332 + }, + { + "clip_ratio/high_max": 0.0021911727671977133, + "clip_ratio/high_mean": 0.0009148283461399842, + "clip_ratio/low_mean": 0.0009713266081234906, + "clip_ratio/low_min": 4.489740058488678e-05, + "clip_ratio/region_mean": 0.0018861549397115596, + "epoch": 3.4198250728862973, + "grad_norm": 0.12141360342502594, + "learning_rate": 1e-06, + "loss": -0.0069, + "step": 333 + }, + { + "clip_ratio/high_max": 0.002078093330055708, + "clip_ratio/high_mean": 0.0009021739751915447, + "clip_ratio/low_mean": 0.0008745343566260999, + "clip_ratio/low_min": 1.9218941815779544e-05, + "clip_ratio/region_mean": 0.001776708279066952, + "epoch": 3.4291545189504373, + "grad_norm": 0.12522944808006287, + "learning_rate": 1e-06, + "loss": -0.0024, + "step": 334 + }, + { + "clip_ratio/high_max": 0.0020181925283395685, + "clip_ratio/high_mean": 0.0008280649199150503, + "clip_ratio/low_mean": 0.0008462995047011646, + "clip_ratio/low_min": 3.677912900457159e-05, + "clip_ratio/region_mean": 0.0016743644227972254, + "epoch": 3.4384839650145773, + "grad_norm": 0.11012354493141174, + "learning_rate": 1e-06, + "loss": 0.0217, + "step": 335 + }, + { + "clip_ratio/high_max": 0.0025067896640393883, + "clip_ratio/high_mean": 0.0010184146703977603, + "clip_ratio/low_mean": 0.000887044805494952, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0019054595031775534, + "epoch": 3.4478134110787173, + "grad_norm": 0.1256856620311737, + "learning_rate": 1e-06, + "loss": -0.0193, + "step": 336 + }, + { + "clip_ratio/high_max": 0.002139975727914134, + "clip_ratio/high_mean": 0.000913791202037828, + "clip_ratio/low_mean": 0.0009576292795827612, + "clip_ratio/low_min": 5.092341234558262e-05, + "clip_ratio/region_mean": 0.0018714204561547376, + "epoch": 3.4571428571428573, + "grad_norm": 0.11477341502904892, + "learning_rate": 1e-06, + "loss": -0.028, + "step": 337 + }, + { + "clip_ratio/high_max": 0.0022000954413670115, + "clip_ratio/high_mean": 0.0008663976386742434, + "clip_ratio/low_mean": 0.0010844683711184189, + "clip_ratio/low_min": 0.00013487641263054684, + "clip_ratio/region_mean": 0.0019508659897837788, + "epoch": 3.466472303206997, + "grad_norm": 0.11449246108531952, + "learning_rate": 1e-06, + "loss": 0.0444, + "step": 338 + }, + { + "clip_ratio/high_max": 0.0023874429971328937, + "clip_ratio/high_mean": 0.0010054052509076428, + "clip_ratio/low_mean": 0.001076638613085379, + "clip_ratio/low_min": 6.585190112673445e-05, + "clip_ratio/region_mean": 0.002082043924019672, + "epoch": 3.4758017492711373, + "grad_norm": 0.12477981299161911, + "learning_rate": 1e-06, + "loss": -0.0163, + "step": 339 + }, + { + "clip_ratio/high_max": 0.0026525383509579115, + "clip_ratio/high_mean": 0.0010539999675529543, + "clip_ratio/low_mean": 0.0009186538081848994, + "clip_ratio/low_min": 7.1315276727546e-05, + "clip_ratio/region_mean": 0.0019726537939277478, + "epoch": 3.485131195335277, + "grad_norm": 0.12025745213031769, + "learning_rate": 1e-06, + "loss": -0.0332, + "step": 340 + }, + { + "clip_ratio/high_max": 0.0023425274484907277, + "clip_ratio/high_mean": 0.0009884111386782024, + "clip_ratio/low_mean": 0.0010804726916830987, + "clip_ratio/low_min": 0.00011691356212395476, + "clip_ratio/region_mean": 0.002068883790343534, + "epoch": 3.494460641399417, + "grad_norm": 0.12572072446346283, + "learning_rate": 1e-06, + "loss": 0.0239, + "step": 341 + }, + { + "clip_ratio/high_max": 0.0024941686606325675, + "clip_ratio/high_mean": 0.0009909349610097706, + "clip_ratio/low_mean": 0.0008940499683376402, + "clip_ratio/low_min": 2.4437509637209587e-05, + "clip_ratio/region_mean": 0.0018849849584512413, + "epoch": 3.503790087463557, + "grad_norm": 0.11796656250953674, + "learning_rate": 1e-06, + "loss": -0.0111, + "step": 342 + }, + { + "clip_ratio/high_max": 0.0023195863541332074, + "clip_ratio/high_mean": 0.000934428697291878, + "clip_ratio/low_mean": 0.0009639933014113922, + "clip_ratio/low_min": 2.5450475732213818e-05, + "clip_ratio/region_mean": 0.0018984220077982172, + "epoch": 3.513119533527697, + "grad_norm": 0.1288626492023468, + "learning_rate": 1e-06, + "loss": 0.0287, + "step": 343 + }, + { + "clip_ratio/high_max": 0.002286431972606806, + "clip_ratio/high_mean": 0.0008174578824764467, + "clip_ratio/low_mean": 0.0010058387506433064, + "clip_ratio/low_min": 0.0001309450399276102, + "clip_ratio/region_mean": 0.0018232966467621736, + "epoch": 3.522448979591837, + "grad_norm": 0.13154514133930206, + "learning_rate": 1e-06, + "loss": 0.0587, + "step": 344 + }, + { + "clip_ratio/high_max": 0.002750756233581342, + "clip_ratio/high_mean": 0.0010733838680607732, + "clip_ratio/low_mean": 0.0010943487650365569, + "clip_ratio/low_min": 0.00012019909354421543, + "clip_ratio/region_mean": 0.002167732636735309, + "epoch": 3.5317784256559768, + "grad_norm": 0.12297602742910385, + "learning_rate": 1e-06, + "loss": -0.01, + "step": 345 + }, + { + "clip_ratio/high_max": 0.0024775554265943356, + "clip_ratio/high_mean": 0.0010784051792143146, + "clip_ratio/low_mean": 0.0009597678636055207, + "clip_ratio/low_min": 2.7895559469470754e-05, + "clip_ratio/region_mean": 0.00203817302826792, + "epoch": 3.5411078717201168, + "grad_norm": 0.12645237147808075, + "learning_rate": 1e-06, + "loss": -0.0024, + "step": 346 + }, + { + "clip_ratio/high_max": 0.0021589628449874, + "clip_ratio/high_mean": 0.0009194501835736446, + "clip_ratio/low_mean": 0.0010389943017798942, + "clip_ratio/low_min": 5.7541479691280983e-05, + "clip_ratio/region_mean": 0.001958444496267475, + "epoch": 3.5504373177842563, + "grad_norm": 0.12677398324012756, + "learning_rate": 1e-06, + "loss": 0.0225, + "step": 347 + }, + { + "clip_ratio/high_max": 0.0023575429804623127, + "clip_ratio/high_mean": 0.000881888310686918, + "clip_ratio/low_mean": 0.001199412508867681, + "clip_ratio/low_min": 8.382833038922399e-05, + "clip_ratio/region_mean": 0.002081300764984917, + "epoch": 3.5597667638483967, + "grad_norm": 0.12611868977546692, + "learning_rate": 1e-06, + "loss": 0.0482, + "step": 348 + }, + { + "clip_ratio/high_max": 0.002348348185478244, + "clip_ratio/high_mean": 0.0009638598930905573, + "clip_ratio/low_mean": 0.0009683600837888662, + "clip_ratio/low_min": 7.389452457573498e-05, + "clip_ratio/region_mean": 0.0019322199950693175, + "epoch": 3.5690962099125363, + "grad_norm": 0.11160127073526382, + "learning_rate": 1e-06, + "loss": -0.0218, + "step": 349 + }, + { + "clip_ratio/high_max": 0.0027372134500183165, + "clip_ratio/high_mean": 0.0011217370083613787, + "clip_ratio/low_mean": 0.001023474003886804, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0021452109576785006, + "epoch": 3.5784256559766763, + "grad_norm": 0.13964039087295532, + "learning_rate": 1e-06, + "loss": -0.0759, + "step": 350 + }, + { + "clip_ratio/high_max": 0.0023659457729081623, + "clip_ratio/high_mean": 0.001062861530954251, + "clip_ratio/low_mean": 0.0009581357580827898, + "clip_ratio/low_min": 5.1994421482959297e-05, + "clip_ratio/region_mean": 0.002020997279032599, + "epoch": 3.5877551020408163, + "grad_norm": 0.13347944617271423, + "learning_rate": 1e-06, + "loss": -0.0418, + "step": 351 + }, + { + "clip_ratio/high_max": 0.0024200145271606743, + "clip_ratio/high_mean": 0.0010322356520191533, + "clip_ratio/low_mean": 0.0011306301748845726, + "clip_ratio/low_min": 8.044620335567743e-05, + "clip_ratio/region_mean": 0.0021628658214467578, + "epoch": 3.5970845481049563, + "grad_norm": 0.12605777382850647, + "learning_rate": 1e-06, + "loss": 0.0252, + "step": 352 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0276576450892857, + "completions/max_length": 4096.0, + "completions/max_terminated_length": 4082.0, + "completions/mean_length": 654.5404663085938, + "completions/mean_terminated_length": 556.650390625, + "completions/min_length": 4.0, + "completions/min_terminated_length": 4.0, + "epoch": 3.6064139941690962, + "grad_norm": 0.13812322914600372, + "learning_rate": 1e-06, + "loss": 0.0277, + "num_tokens": 222893389.0, + "reward": 0.6016671657562256, + "reward_std": 0.1861223429441452, + "rewards/simpleverify_reward/mean": 0.6016671061515808, + "rewards/simpleverify_reward/std": 0.4895632266998291, + "step": 353 + }, + { + "clip_ratio/high_max": 0.0018422315115458332, + "clip_ratio/high_mean": 0.0008274892297777114, + "clip_ratio/low_mean": 0.0005405372648965567, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0013680264855793212, + "epoch": 3.6157434402332362, + "grad_norm": 0.1303921788930893, + "learning_rate": 1e-06, + "loss": 0.0182, + "step": 354 + }, + { + "clip_ratio/high_max": 0.002004551875870675, + "clip_ratio/high_mean": 0.000853116056532599, + "clip_ratio/low_mean": 0.0005362414376577362, + "clip_ratio/low_min": 5.404989406088134e-05, + "clip_ratio/region_mean": 0.001389357497828314, + "epoch": 3.6250728862973762, + "grad_norm": 0.12853002548217773, + "learning_rate": 1e-06, + "loss": -0.0176, + "step": 355 + }, + { + "clip_ratio/high_max": 0.001959608773177024, + "clip_ratio/high_mean": 0.0008215340203605592, + "clip_ratio/low_mean": 0.0006006097537465394, + "clip_ratio/low_min": 1.2077294741175137e-05, + "clip_ratio/region_mean": 0.0014221437741070986, + "epoch": 3.6344023323615158, + "grad_norm": 0.124964639544487, + "learning_rate": 1e-06, + "loss": 0.0205, + "step": 356 + }, + { + "clip_ratio/high_max": 0.002032963962847134, + "clip_ratio/high_mean": 0.0007587872114527272, + "clip_ratio/low_mean": 0.0006263037448661635, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0013850909745087847, + "epoch": 3.643731778425656, + "grad_norm": 0.12105780839920044, + "learning_rate": 1e-06, + "loss": -0.0116, + "step": 357 + }, + { + "clip_ratio/high_max": 0.00184320433618268, + "clip_ratio/high_mean": 0.000894339007572853, + "clip_ratio/low_mean": 0.0006461292405219865, + "clip_ratio/low_min": 5.668202902597841e-05, + "clip_ratio/region_mean": 0.0015404682562802918, + "epoch": 3.6530612244897958, + "grad_norm": 0.1163407415151596, + "learning_rate": 1e-06, + "loss": -0.0305, + "step": 358 + }, + { + "clip_ratio/high_max": 0.0020696637948276475, + "clip_ratio/high_mean": 0.0009150950991170248, + "clip_ratio/low_mean": 0.000654317571388674, + "clip_ratio/low_min": 8.144675121002365e-05, + "clip_ratio/region_mean": 0.0015694126777816564, + "epoch": 3.6623906705539357, + "grad_norm": 0.1236238107085228, + "learning_rate": 1e-06, + "loss": -0.0165, + "step": 359 + }, + { + "clip_ratio/high_max": 0.0020081195543752983, + "clip_ratio/high_mean": 0.0007979373476700857, + "clip_ratio/low_mean": 0.0007721411038801307, + "clip_ratio/low_min": 1.3796909115626477e-05, + "clip_ratio/region_mean": 0.001570078449731227, + "epoch": 3.6717201166180757, + "grad_norm": 0.13558252155780792, + "learning_rate": 1e-06, + "loss": 0.0429, + "step": 360 + }, + { + "clip_ratio/high_max": 0.0016852559238031972, + "clip_ratio/high_mean": 0.0007392789998448279, + "clip_ratio/low_mean": 0.0007591812009195564, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0014984601948526688, + "epoch": 3.6810495626822157, + "grad_norm": 0.1261928379535675, + "learning_rate": 1e-06, + "loss": 0.012, + "step": 361 + }, + { + "clip_ratio/high_max": 0.0020385598800203297, + "clip_ratio/high_mean": 0.0009380048486491432, + "clip_ratio/low_mean": 0.0008080076513579115, + "clip_ratio/low_min": 3.826955253316555e-05, + "clip_ratio/region_mean": 0.0017460125018260442, + "epoch": 3.6903790087463557, + "grad_norm": 0.1256881207227707, + "learning_rate": 1e-06, + "loss": -0.0075, + "step": 362 + }, + { + "clip_ratio/high_max": 0.0019200395836378448, + "clip_ratio/high_mean": 0.0009273255309381057, + "clip_ratio/low_mean": 0.0007339983148995088, + "clip_ratio/low_min": 1.0578875844657887e-05, + "clip_ratio/region_mean": 0.0016613238731224556, + "epoch": 3.6997084548104957, + "grad_norm": 0.1309698224067688, + "learning_rate": 1e-06, + "loss": -0.014, + "step": 363 + }, + { + "clip_ratio/high_max": 0.002184991230024025, + "clip_ratio/high_mean": 0.0010025849041994661, + "clip_ratio/low_mean": 0.0007874047514633276, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.001789989612007048, + "epoch": 3.7090379008746357, + "grad_norm": 0.12068425863981247, + "learning_rate": 1e-06, + "loss": 0.0022, + "step": 364 + }, + { + "clip_ratio/high_max": 0.002338721649721265, + "clip_ratio/high_mean": 0.0009570841084496351, + "clip_ratio/low_mean": 0.0008360726988030365, + "clip_ratio/low_min": 7.409069257846568e-05, + "clip_ratio/region_mean": 0.00179315678178682, + "epoch": 3.7183673469387752, + "grad_norm": 0.13551045954227448, + "learning_rate": 1e-06, + "loss": 0.019, + "step": 365 + }, + { + "clip_ratio/high_max": 0.002144247839169111, + "clip_ratio/high_mean": 0.0010146544918825384, + "clip_ratio/low_mean": 0.0008285586518468335, + "clip_ratio/low_min": 8.70294970809482e-05, + "clip_ratio/region_mean": 0.0018432131328154355, + "epoch": 3.7276967930029157, + "grad_norm": 0.13341477513313293, + "learning_rate": 1e-06, + "loss": -0.0283, + "step": 366 + }, + { + "clip_ratio/high_max": 0.0022179721199790947, + "clip_ratio/high_mean": 0.0009786770642676856, + "clip_ratio/low_mean": 0.0007432454131048871, + "clip_ratio/low_min": 8.223997792811133e-05, + "clip_ratio/region_mean": 0.0017219224901054986, + "epoch": 3.7370262390670552, + "grad_norm": 0.12940876185894012, + "learning_rate": 1e-06, + "loss": -0.0302, + "step": 367 + }, + { + "clip_ratio/high_max": 0.002196530156652443, + "clip_ratio/high_mean": 0.0010002526360040065, + "clip_ratio/low_mean": 0.0008059659430728061, + "clip_ratio/low_min": 3.0508474083035253e-05, + "clip_ratio/region_mean": 0.0018062185772578232, + "epoch": 3.746355685131195, + "grad_norm": 0.11200443655252457, + "learning_rate": 1e-06, + "loss": -0.0069, + "step": 368 + }, + { + "clip_ratio/high_max": 0.002153107438061852, + "clip_ratio/high_mean": 0.0009139909452642314, + "clip_ratio/low_mean": 0.0008407793829974253, + "clip_ratio/low_min": 6.661612587777199e-05, + "clip_ratio/region_mean": 0.001754770339175593, + "epoch": 3.755685131195335, + "grad_norm": 0.12188612669706345, + "learning_rate": 1e-06, + "loss": -0.0438, + "step": 369 + }, + { + "clip_ratio/high_max": 0.00217586851795204, + "clip_ratio/high_mean": 0.0009423261890333379, + "clip_ratio/low_mean": 0.000926724766031839, + "clip_ratio/low_min": 4.10402362831519e-05, + "clip_ratio/region_mean": 0.0018690509314183146, + "epoch": 3.765014577259475, + "grad_norm": 0.11991658806800842, + "learning_rate": 1e-06, + "loss": -0.017, + "step": 370 + }, + { + "clip_ratio/high_max": 0.002388638698903378, + "clip_ratio/high_mean": 0.001035690598655492, + "clip_ratio/low_mean": 0.0008325787930516526, + "clip_ratio/low_min": 2.7808289814856835e-05, + "clip_ratio/region_mean": 0.0018682693917071447, + "epoch": 3.774344023323615, + "grad_norm": 0.13602958619594574, + "learning_rate": 1e-06, + "loss": -0.0342, + "step": 371 + }, + { + "clip_ratio/high_max": 0.00253827672713669, + "clip_ratio/high_mean": 0.0009370409970870242, + "clip_ratio/low_mean": 0.0008679752863827161, + "clip_ratio/low_min": 1.0249262231809553e-05, + "clip_ratio/region_mean": 0.001805016290745698, + "epoch": 3.783673469387755, + "grad_norm": 0.12156805396080017, + "learning_rate": 1e-06, + "loss": 0.0102, + "step": 372 + }, + { + "clip_ratio/high_max": 0.002167011196434032, + "clip_ratio/high_mean": 0.0008962552565208171, + "clip_ratio/low_mean": 0.0009696360484667821, + "clip_ratio/low_min": 5.224735650699586e-05, + "clip_ratio/region_mean": 0.0018658912958926521, + "epoch": 3.793002915451895, + "grad_norm": 0.1260608732700348, + "learning_rate": 1e-06, + "loss": 0.0242, + "step": 373 + }, + { + "clip_ratio/high_max": 0.002384423372859601, + "clip_ratio/high_mean": 0.0010103317763423547, + "clip_ratio/low_mean": 0.0009639660765969893, + "clip_ratio/low_min": 2.18150089494884e-05, + "clip_ratio/region_mean": 0.0019742978547583334, + "epoch": 3.8023323615160347, + "grad_norm": 0.12216995656490326, + "learning_rate": 1e-06, + "loss": -0.0221, + "step": 374 + }, + { + "clip_ratio/high_max": 0.0023978524113772437, + "clip_ratio/high_mean": 0.0010300275062036235, + "clip_ratio/low_mean": 0.001245647443283815, + "clip_ratio/low_min": 0.00014081780500418972, + "clip_ratio/region_mean": 0.002275674960401375, + "epoch": 3.811661807580175, + "grad_norm": 0.12235193699598312, + "learning_rate": 1e-06, + "loss": 0.0046, + "step": 375 + }, + { + "clip_ratio/high_max": 0.002689565109903924, + "clip_ratio/high_mean": 0.0010174690614803694, + "clip_ratio/low_mean": 0.000994558442471316, + "clip_ratio/low_min": 2.5107806322921533e-05, + "clip_ratio/region_mean": 0.002012027493037749, + "epoch": 3.8209912536443147, + "grad_norm": 0.13323216140270233, + "learning_rate": 1e-06, + "loss": 0.0106, + "step": 376 + }, + { + "clip_ratio/high_max": 0.002252488280646503, + "clip_ratio/high_mean": 0.0009471086705161724, + "clip_ratio/low_mean": 0.0010546732955845073, + "clip_ratio/low_min": 9.687905276223319e-05, + "clip_ratio/region_mean": 0.002001781962462701, + "epoch": 3.8303206997084547, + "grad_norm": 0.13442502915859222, + "learning_rate": 1e-06, + "loss": -0.0271, + "step": 377 + }, + { + "clip_ratio/high_max": 0.0022082783034420572, + "clip_ratio/high_mean": 0.0009712189985293662, + "clip_ratio/low_mean": 0.0009713226254461915, + "clip_ratio/low_min": 1.72604250110453e-05, + "clip_ratio/region_mean": 0.0019425416066951584, + "epoch": 3.8396501457725947, + "grad_norm": 0.11734303086996078, + "learning_rate": 1e-06, + "loss": 0.0118, + "step": 378 + }, + { + "clip_ratio/high_max": 0.0023211157858895604, + "clip_ratio/high_mean": 0.000983304671535734, + "clip_ratio/low_mean": 0.0010854787615244277, + "clip_ratio/low_min": 4.8139254431589507e-05, + "clip_ratio/region_mean": 0.0020687834330601618, + "epoch": 3.8489795918367347, + "grad_norm": 0.1344214528799057, + "learning_rate": 1e-06, + "loss": -0.0048, + "step": 379 + }, + { + "clip_ratio/high_max": 0.002492693529347889, + "clip_ratio/high_mean": 0.0009986599870899227, + "clip_ratio/low_mean": 0.0012119804705434944, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.002210640443081502, + "epoch": 3.8583090379008746, + "grad_norm": 0.13017284870147705, + "learning_rate": 1e-06, + "loss": 0.0267, + "step": 380 + }, + { + "clip_ratio/high_max": 0.0023947299996507354, + "clip_ratio/high_mean": 0.0009566030093992595, + "clip_ratio/low_mean": 0.0012776065541402204, + "clip_ratio/low_min": 0.00023669379515922628, + "clip_ratio/region_mean": 0.0022342095981002785, + "epoch": 3.8676384839650146, + "grad_norm": 0.1225247010588646, + "learning_rate": 1e-06, + "loss": 0.0279, + "step": 381 + }, + { + "clip_ratio/high_max": 0.002490529881470138, + "clip_ratio/high_mean": 0.0009996802818932338, + "clip_ratio/low_mean": 0.001070929425623035, + "clip_ratio/low_min": 0.00013358558408071985, + "clip_ratio/region_mean": 0.002070609712973237, + "epoch": 3.8769679300291546, + "grad_norm": 0.13048617541790009, + "learning_rate": 1e-06, + "loss": -0.014, + "step": 382 + }, + { + "clip_ratio/high_max": 0.0021763901750091463, + "clip_ratio/high_mean": 0.0009103734992095269, + "clip_ratio/low_mean": 0.0008946762300183764, + "clip_ratio/low_min": 3.4789731216733344e-05, + "clip_ratio/region_mean": 0.0018050497528747655, + "epoch": 3.8862973760932946, + "grad_norm": 0.11429255455732346, + "learning_rate": 1e-06, + "loss": -0.0015, + "step": 383 + }, + { + "clip_ratio/high_max": 0.0023308118325076066, + "clip_ratio/high_mean": 0.0008925222246034537, + "clip_ratio/low_mean": 0.0011587132430577185, + "clip_ratio/low_min": 0.00010190919147134991, + "clip_ratio/region_mean": 0.002051235467661172, + "epoch": 3.8956268221574346, + "grad_norm": 0.11595192551612854, + "learning_rate": 1e-06, + "loss": 0.0287, + "step": 384 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.027692522321428603, + "completions/max_length": 4096.0, + "completions/max_terminated_length": 4032.0, + "completions/mean_length": 663.2836303710938, + "completions/mean_terminated_length": 565.5156860351562, + "completions/min_length": 92.0, + "completions/min_terminated_length": 92.0, + "epoch": 4.0093294460641395, + "grad_norm": 0.1376991719007492, + "learning_rate": 1e-06, + "loss": 0.0001, + "num_tokens": 241514562.0, + "reward": 0.5964006781578064, + "reward_std": 0.1858140379190445, + "rewards/simpleverify_reward/mean": 0.5964006781578064, + "rewards/simpleverify_reward/std": 0.4906274676322937, + "step": 385 + }, + { + "clip_ratio/high_max": 0.0021251889993436635, + "clip_ratio/high_mean": 0.0008307242787850555, + "clip_ratio/low_mean": 0.000551025665117777, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0013817499675496947, + "epoch": 4.01865889212828, + "grad_norm": 0.16409343481063843, + "learning_rate": 1e-06, + "loss": -0.0385, + "step": 386 + }, + { + "clip_ratio/high_max": 0.0018537556170485914, + "clip_ratio/high_mean": 0.0007750992826913716, + "clip_ratio/low_mean": 0.0004142519519518828, + "clip_ratio/low_min": 1.605445686436724e-05, + "clip_ratio/region_mean": 0.0011893512000824558, + "epoch": 4.0279883381924195, + "grad_norm": 0.126278355717659, + "learning_rate": 1e-06, + "loss": -0.0305, + "step": 387 + }, + { + "clip_ratio/high_max": 0.0021483809978235513, + "clip_ratio/high_mean": 0.0008904324367904337, + "clip_ratio/low_mean": 0.0005418080754679977, + "clip_ratio/low_min": 2.6358845389040653e-05, + "clip_ratio/region_mean": 0.0014322405186248943, + "epoch": 4.03731778425656, + "grad_norm": 0.13643008470535278, + "learning_rate": 1e-06, + "loss": -0.0375, + "step": 388 + }, + { + "clip_ratio/high_max": 0.001825592560635414, + "clip_ratio/high_mean": 0.0008252827028627507, + "clip_ratio/low_mean": 0.0005161194167158101, + "clip_ratio/low_min": 4.147367417317582e-05, + "clip_ratio/region_mean": 0.0013414021450444125, + "epoch": 4.0466472303206995, + "grad_norm": 0.11691092699766159, + "learning_rate": 1e-06, + "loss": -0.0049, + "step": 389 + }, + { + "clip_ratio/high_max": 0.0018002502911258489, + "clip_ratio/high_mean": 0.0007384081254713237, + "clip_ratio/low_mean": 0.0006537441067848704, + "clip_ratio/low_min": 6.329604002530687e-05, + "clip_ratio/region_mean": 0.0013921522513555828, + "epoch": 4.05597667638484, + "grad_norm": 0.11648301035165787, + "learning_rate": 1e-06, + "loss": 0.038, + "step": 390 + }, + { + "clip_ratio/high_max": 0.0021319622101145796, + "clip_ratio/high_mean": 0.0008681447943672538, + "clip_ratio/low_mean": 0.0006683531482849503, + "clip_ratio/low_min": 1.7356289390590973e-05, + "clip_ratio/region_mean": 0.001536497933557257, + "epoch": 4.0653061224489795, + "grad_norm": 0.12540303170681, + "learning_rate": 1e-06, + "loss": 0.0174, + "step": 391 + }, + { + "clip_ratio/high_max": 0.001953968392626848, + "clip_ratio/high_mean": 0.0007701072099735029, + "clip_ratio/low_mean": 0.0006839515426690923, + "clip_ratio/low_min": 3.984321574534988e-05, + "clip_ratio/region_mean": 0.001454058728995733, + "epoch": 4.07463556851312, + "grad_norm": 0.11700475215911865, + "learning_rate": 1e-06, + "loss": -0.0003, + "step": 392 + }, + { + "clip_ratio/high_max": 0.0021675254974979907, + "clip_ratio/high_mean": 0.0009661978147050831, + "clip_ratio/low_mean": 0.0007228480244521052, + "clip_ratio/low_min": 2.3316544684348628e-05, + "clip_ratio/region_mean": 0.0016890458100533579, + "epoch": 4.0839650145772595, + "grad_norm": 0.12342169880867004, + "learning_rate": 1e-06, + "loss": 0.0181, + "step": 393 + }, + { + "clip_ratio/high_max": 0.0021691713627660647, + "clip_ratio/high_mean": 0.0008687900899531087, + "clip_ratio/low_mean": 0.0008125464610202471, + "clip_ratio/low_min": 5.02940674778074e-05, + "clip_ratio/region_mean": 0.001681336565525271, + "epoch": 4.093294460641399, + "grad_norm": 0.121994249522686, + "learning_rate": 1e-06, + "loss": 0.0254, + "step": 394 + }, + { + "clip_ratio/high_max": 0.001994244616071228, + "clip_ratio/high_mean": 0.000891088569915155, + "clip_ratio/low_mean": 0.0007734938753856113, + "clip_ratio/low_min": 9.669539304013597e-05, + "clip_ratio/region_mean": 0.0016645824434817769, + "epoch": 4.1026239067055394, + "grad_norm": 0.1204499751329422, + "learning_rate": 1e-06, + "loss": -0.0036, + "step": 395 + }, + { + "clip_ratio/high_max": 0.0020316431546234526, + "clip_ratio/high_mean": 0.0008603325077274349, + "clip_ratio/low_mean": 0.0007531925039074849, + "clip_ratio/low_min": 6.459650103352033e-05, + "clip_ratio/region_mean": 0.001613525007996941, + "epoch": 4.111953352769679, + "grad_norm": 0.1130305752158165, + "learning_rate": 1e-06, + "loss": -0.0257, + "step": 396 + }, + { + "clip_ratio/high_max": 0.002003321271331515, + "clip_ratio/high_mean": 0.0008780704683886142, + "clip_ratio/low_mean": 0.0008695145625097211, + "clip_ratio/low_min": 4.928806265525054e-05, + "clip_ratio/region_mean": 0.0017475850399932824, + "epoch": 4.121282798833819, + "grad_norm": 0.13229753077030182, + "learning_rate": 1e-06, + "loss": 0.022, + "step": 397 + }, + { + "clip_ratio/high_max": 0.001952392078237608, + "clip_ratio/high_mean": 0.0008767041690589394, + "clip_ratio/low_mean": 0.0009058000978257041, + "clip_ratio/low_min": 2.510012564016506e-05, + "clip_ratio/region_mean": 0.0017825042305048555, + "epoch": 4.130612244897959, + "grad_norm": 0.13723908364772797, + "learning_rate": 1e-06, + "loss": -0.021, + "step": 398 + }, + { + "clip_ratio/high_max": 0.0022944982047192752, + "clip_ratio/high_mean": 0.0008759930078667821, + "clip_ratio/low_mean": 0.0008892982114048209, + "clip_ratio/low_min": 4.741707016364671e-05, + "clip_ratio/region_mean": 0.0017652911992627196, + "epoch": 4.139941690962099, + "grad_norm": 0.1374003142118454, + "learning_rate": 1e-06, + "loss": -0.0061, + "step": 399 + }, + { + "clip_ratio/high_max": 0.0021894147284911014, + "clip_ratio/high_mean": 0.0009306346437369939, + "clip_ratio/low_mean": 0.0010372229226049967, + "clip_ratio/low_min": 9.53391172515694e-05, + "clip_ratio/region_mean": 0.0019678575481520966, + "epoch": 4.149271137026239, + "grad_norm": 0.1257951706647873, + "learning_rate": 1e-06, + "loss": 0.0418, + "step": 400 + }, + { + "clip_ratio/high_max": 0.002013164856180083, + "clip_ratio/high_mean": 0.0008204853465940687, + "clip_ratio/low_mean": 0.0009291273418057244, + "clip_ratio/low_min": 6.969223613850772e-05, + "clip_ratio/region_mean": 0.0017496126747573726, + "epoch": 4.158600583090379, + "grad_norm": 0.1225176602602005, + "learning_rate": 1e-06, + "loss": 0.0114, + "step": 401 + }, + { + "clip_ratio/high_max": 0.001962927562999539, + "clip_ratio/high_mean": 0.0008756719180382788, + "clip_ratio/low_mean": 0.001089063793187961, + "clip_ratio/low_min": 0.00016013995991670527, + "clip_ratio/region_mean": 0.0019647356966743246, + "epoch": 4.167930029154519, + "grad_norm": 0.13065384328365326, + "learning_rate": 1e-06, + "loss": 0.0411, + "step": 402 + }, + { + "clip_ratio/high_max": 0.00213753947900841, + "clip_ratio/high_mean": 0.0009142789604084101, + "clip_ratio/low_mean": 0.0009486467824899592, + "clip_ratio/low_min": 0.00010948617818939965, + "clip_ratio/region_mean": 0.00186292571015656, + "epoch": 4.1772594752186585, + "grad_norm": 0.12291523069143295, + "learning_rate": 1e-06, + "loss": 0.0106, + "step": 403 + }, + { + "clip_ratio/high_max": 0.0022747897091903724, + "clip_ratio/high_mean": 0.0010808745573740453, + "clip_ratio/low_mean": 0.0008359467228729045, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0019168212747899815, + "epoch": 4.186588921282799, + "grad_norm": 0.11953331530094147, + "learning_rate": 1e-06, + "loss": -0.0291, + "step": 404 + }, + { + "clip_ratio/high_max": 0.0023405523170367815, + "clip_ratio/high_mean": 0.0010126458400918636, + "clip_ratio/low_mean": 0.0008475766117044259, + "clip_ratio/low_min": 7.245269898703555e-05, + "clip_ratio/region_mean": 0.00186022248090012, + "epoch": 4.1959183673469385, + "grad_norm": 0.1285480558872223, + "learning_rate": 1e-06, + "loss": -0.0412, + "step": 405 + }, + { + "clip_ratio/high_max": 0.002241056543425657, + "clip_ratio/high_mean": 0.0008542670420865761, + "clip_ratio/low_mean": 0.0009642466357036028, + "clip_ratio/low_min": 4.7311974412878044e-05, + "clip_ratio/region_mean": 0.0018185136505053379, + "epoch": 4.205247813411079, + "grad_norm": 0.12093605846166611, + "learning_rate": 1e-06, + "loss": 0.0293, + "step": 406 + }, + { + "clip_ratio/high_max": 0.0026399049020255916, + "clip_ratio/high_mean": 0.001024092507577734, + "clip_ratio/low_mean": 0.0010683339824026916, + "clip_ratio/low_min": 5.9075033277622424e-05, + "clip_ratio/region_mean": 0.0020924264899804257, + "epoch": 4.214577259475218, + "grad_norm": 0.13123777508735657, + "learning_rate": 1e-06, + "loss": -0.0315, + "step": 407 + }, + { + "clip_ratio/high_max": 0.0019685833176481538, + "clip_ratio/high_mean": 0.0008986030152300373, + "clip_ratio/low_mean": 0.0010061929096991662, + "clip_ratio/low_min": 2.45643268499407e-05, + "clip_ratio/region_mean": 0.001904795914015267, + "epoch": 4.223906705539359, + "grad_norm": 0.10999801754951477, + "learning_rate": 1e-06, + "loss": -0.0016, + "step": 408 + }, + { + "clip_ratio/high_max": 0.002010038551816251, + "clip_ratio/high_mean": 0.0008622361056040972, + "clip_ratio/low_mean": 0.0010035943432740169, + "clip_ratio/low_min": 1.2320126188569702e-05, + "clip_ratio/region_mean": 0.0018658304761629552, + "epoch": 4.233236151603498, + "grad_norm": 0.13467088341712952, + "learning_rate": 1e-06, + "loss": -0.0136, + "step": 409 + }, + { + "clip_ratio/high_max": 0.002251534075185191, + "clip_ratio/high_mean": 0.0010714400887081865, + "clip_ratio/low_mean": 0.0010257547110086307, + "clip_ratio/low_min": 5.750133004767122e-05, + "clip_ratio/region_mean": 0.002097194803354796, + "epoch": 4.242565597667639, + "grad_norm": 0.12701420485973358, + "learning_rate": 1e-06, + "loss": -0.0327, + "step": 410 + }, + { + "clip_ratio/high_max": 0.002209433303505648, + "clip_ratio/high_mean": 0.0008623511330370093, + "clip_ratio/low_mean": 0.0009772533121576998, + "clip_ratio/low_min": 2.49351687671151e-05, + "clip_ratio/region_mean": 0.0018396044324617833, + "epoch": 4.251895043731778, + "grad_norm": 0.12217708677053452, + "learning_rate": 1e-06, + "loss": 0.0025, + "step": 411 + }, + { + "clip_ratio/high_max": 0.002287858478666749, + "clip_ratio/high_mean": 0.0009879158860712778, + "clip_ratio/low_mean": 0.001147979961388046, + "clip_ratio/low_min": 0.00010002726048696786, + "clip_ratio/region_mean": 0.002135895876563154, + "epoch": 4.261224489795918, + "grad_norm": 0.13534663617610931, + "learning_rate": 1e-06, + "loss": -0.031, + "step": 412 + }, + { + "clip_ratio/high_max": 0.0024083175958367065, + "clip_ratio/high_mean": 0.0010091956610267516, + "clip_ratio/low_mean": 0.00103961595959845, + "clip_ratio/low_min": 5.982574657537043e-05, + "clip_ratio/region_mean": 0.0020488116642809473, + "epoch": 4.270553935860058, + "grad_norm": 0.12116041034460068, + "learning_rate": 1e-06, + "loss": -0.0161, + "step": 413 + }, + { + "clip_ratio/high_max": 0.002476684378052596, + "clip_ratio/high_mean": 0.001020087773213163, + "clip_ratio/low_mean": 0.0011294773466943298, + "clip_ratio/low_min": 0.0001158672375822789, + "clip_ratio/region_mean": 0.0021495650726137683, + "epoch": 4.279883381924198, + "grad_norm": 0.13888022303581238, + "learning_rate": 1e-06, + "loss": 0.0413, + "step": 414 + }, + { + "clip_ratio/high_max": 0.0025053853896679357, + "clip_ratio/high_mean": 0.001063837975380011, + "clip_ratio/low_mean": 0.001142918521509273, + "clip_ratio/low_min": 6.753106572432443e-05, + "clip_ratio/region_mean": 0.002206756515079178, + "epoch": 4.289212827988338, + "grad_norm": 0.12599118053913116, + "learning_rate": 1e-06, + "loss": 0.0153, + "step": 415 + }, + { + "clip_ratio/high_max": 0.002442953544232296, + "clip_ratio/high_mean": 0.0008535168371963664, + "clip_ratio/low_mean": 0.0011593517228902783, + "clip_ratio/low_min": 0.00012190126744826557, + "clip_ratio/region_mean": 0.0020128685573581606, + "epoch": 4.298542274052478, + "grad_norm": 0.12028119713068008, + "learning_rate": 1e-06, + "loss": 0.0342, + "step": 416 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0281459263392857, + "completions/max_length": 4096.0, + "completions/max_terminated_length": 4047.0, + "completions/mean_length": 648.642578125, + "completions/mean_terminated_length": 548.803466796875, + "completions/min_length": 3.0, + "completions/min_terminated_length": 3.0, + "epoch": 4.307871720116618, + "grad_norm": 0.14324797689914703, + "learning_rate": 1e-06, + "loss": 0.0201, + "num_tokens": 259663763.0, + "reward": 0.6117815375328064, + "reward_std": 0.18697793781757355, + "rewards/simpleverify_reward/mean": 0.6117815375328064, + "rewards/simpleverify_reward/std": 0.48735329508781433, + "step": 417 + }, + { + "clip_ratio/high_max": 0.0025448784217587672, + "clip_ratio/high_mean": 0.0009797757647902472, + "clip_ratio/low_mean": 0.0005918473639212607, + "clip_ratio/low_min": 3.9396413740178104e-05, + "clip_ratio/region_mean": 0.001571623120980803, + "epoch": 4.317201166180758, + "grad_norm": 0.15288770198822021, + "learning_rate": 1e-06, + "loss": 0.0297, + "step": 418 + }, + { + "clip_ratio/high_max": 0.0019862193912558723, + "clip_ratio/high_mean": 0.0008217500235332409, + "clip_ratio/low_mean": 0.0005634796780213946, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0013852296797267627, + "epoch": 4.326530612244898, + "grad_norm": 0.11524496227502823, + "learning_rate": 1e-06, + "loss": 0.0023, + "step": 419 + }, + { + "clip_ratio/high_max": 0.0025638552178861573, + "clip_ratio/high_mean": 0.0009641898523113923, + "clip_ratio/low_mean": 0.000555866122340376, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.00152005601557903, + "epoch": 4.335860058309038, + "grad_norm": 0.2770618498325348, + "learning_rate": 1e-06, + "loss": -0.0141, + "step": 420 + }, + { + "clip_ratio/high_max": 0.0019508068799041212, + "clip_ratio/high_mean": 0.0007618555764565826, + "clip_ratio/low_mean": 0.0005801574561701273, + "clip_ratio/low_min": 1.6622339899186045e-05, + "clip_ratio/region_mean": 0.001342013041721657, + "epoch": 4.345189504373177, + "grad_norm": 0.12275660783052444, + "learning_rate": 1e-06, + "loss": -0.0113, + "step": 421 + }, + { + "clip_ratio/high_max": 0.0023303804409806617, + "clip_ratio/high_mean": 0.0009664339995651972, + "clip_ratio/low_mean": 0.0005654622100337292, + "clip_ratio/low_min": 4.6924190428399015e-05, + "clip_ratio/region_mean": 0.0015318961814045906, + "epoch": 4.354518950437318, + "grad_norm": 0.11312326043844223, + "learning_rate": 1e-06, + "loss": -0.0149, + "step": 422 + }, + { + "clip_ratio/high_max": 0.001929619895236101, + "clip_ratio/high_mean": 0.0007667033469260787, + "clip_ratio/low_mean": 0.0006466154791269219, + "clip_ratio/low_min": 1.1927480954909697e-05, + "clip_ratio/region_mean": 0.0014133188560663257, + "epoch": 4.363848396501457, + "grad_norm": 0.11987412720918655, + "learning_rate": 1e-06, + "loss": -0.0113, + "step": 423 + }, + { + "clip_ratio/high_max": 0.001966964566236129, + "clip_ratio/high_mean": 0.0008856988042680314, + "clip_ratio/low_mean": 0.0007417448632622836, + "clip_ratio/low_min": 3.838701741187833e-05, + "clip_ratio/region_mean": 0.0016274436893581878, + "epoch": 4.373177842565598, + "grad_norm": 0.14740286767482758, + "learning_rate": 1e-06, + "loss": -0.016, + "step": 424 + }, + { + "clip_ratio/high_max": 0.00222642799053574, + "clip_ratio/high_mean": 0.0009224833866028348, + "clip_ratio/low_mean": 0.0006897337052578223, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0016122171000461094, + "epoch": 4.382507288629737, + "grad_norm": 0.12216822803020477, + "learning_rate": 1e-06, + "loss": -0.0019, + "step": 425 + }, + { + "clip_ratio/high_max": 0.002756641391897574, + "clip_ratio/high_mean": 0.001089139046598575, + "clip_ratio/low_mean": 0.0007599700657010544, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0018491091395844705, + "epoch": 4.391836734693878, + "grad_norm": 0.12467877566814423, + "learning_rate": 1e-06, + "loss": -0.0565, + "step": 426 + }, + { + "clip_ratio/high_max": 0.0023627905029570684, + "clip_ratio/high_mean": 0.000987551855359925, + "clip_ratio/low_mean": 0.0008300238441734109, + "clip_ratio/low_min": 6.853339073131792e-05, + "clip_ratio/region_mean": 0.0018175756849814206, + "epoch": 4.401166180758017, + "grad_norm": 0.13413630425930023, + "learning_rate": 1e-06, + "loss": -0.0134, + "step": 427 + }, + { + "clip_ratio/high_max": 0.002305820496985689, + "clip_ratio/high_mean": 0.0009572233175276779, + "clip_ratio/low_mean": 0.0007406642334899516, + "clip_ratio/low_min": 5.1544185225793626e-05, + "clip_ratio/region_mean": 0.00169788756466005, + "epoch": 4.410495626822158, + "grad_norm": 0.12544167041778564, + "learning_rate": 1e-06, + "loss": 0.0108, + "step": 428 + }, + { + "clip_ratio/high_max": 0.0024296663541463204, + "clip_ratio/high_mean": 0.0008782034237810876, + "clip_ratio/low_mean": 0.0007698386643824051, + "clip_ratio/low_min": 2.9733916562690865e-05, + "clip_ratio/region_mean": 0.0016480420745210722, + "epoch": 4.419825072886297, + "grad_norm": 0.11271168291568756, + "learning_rate": 1e-06, + "loss": -0.0123, + "step": 429 + }, + { + "clip_ratio/high_max": 0.001878272378235124, + "clip_ratio/high_mean": 0.0007897759987827158, + "clip_ratio/low_mean": 0.0008921427961467998, + "clip_ratio/low_min": 3.39891357725719e-05, + "clip_ratio/region_mean": 0.0016819188313093036, + "epoch": 4.429154518950437, + "grad_norm": 0.14495782554149628, + "learning_rate": 1e-06, + "loss": 0.0303, + "step": 430 + }, + { + "clip_ratio/high_max": 0.0023858800195739605, + "clip_ratio/high_mean": 0.001056421398970997, + "clip_ratio/low_mean": 0.0011328381842758972, + "clip_ratio/low_min": 7.150277724576881e-05, + "clip_ratio/region_mean": 0.0021892595977988094, + "epoch": 4.438483965014577, + "grad_norm": 0.18605196475982666, + "learning_rate": 1e-06, + "loss": 0.0194, + "step": 431 + }, + { + "clip_ratio/high_max": 0.0026346297236159444, + "clip_ratio/high_mean": 0.0010570780796115287, + "clip_ratio/low_mean": 0.0009099434473682777, + "clip_ratio/low_min": 1.806358341127634e-05, + "clip_ratio/region_mean": 0.001967021598829888, + "epoch": 4.447813411078717, + "grad_norm": 0.1287396252155304, + "learning_rate": 1e-06, + "loss": -0.0099, + "step": 432 + }, + { + "clip_ratio/high_max": 0.0025009670280269347, + "clip_ratio/high_mean": 0.0009114547774515813, + "clip_ratio/low_mean": 0.0008759734300838318, + "clip_ratio/low_min": 5.6137723731808364e-05, + "clip_ratio/region_mean": 0.0017874281838885508, + "epoch": 4.457142857142857, + "grad_norm": 0.12383487075567245, + "learning_rate": 1e-06, + "loss": 0.0088, + "step": 433 + }, + { + "clip_ratio/high_max": 0.0027183496567886323, + "clip_ratio/high_mean": 0.0012200498313177377, + "clip_ratio/low_mean": 0.0010797456052387133, + "clip_ratio/low_min": 6.000670327921398e-05, + "clip_ratio/region_mean": 0.002299795414728578, + "epoch": 4.466472303206997, + "grad_norm": 0.1346307098865509, + "learning_rate": 1e-06, + "loss": -0.0343, + "step": 434 + }, + { + "clip_ratio/high_max": 0.0023476571586797945, + "clip_ratio/high_mean": 0.0009690739752841182, + "clip_ratio/low_mean": 0.0009940544059645617, + "clip_ratio/low_min": 1.4269406165112741e-05, + "clip_ratio/region_mean": 0.001963128408533521, + "epoch": 4.475801749271137, + "grad_norm": 0.11996542662382126, + "learning_rate": 1e-06, + "loss": -0.0017, + "step": 435 + }, + { + "clip_ratio/high_max": 0.002613077071146108, + "clip_ratio/high_mean": 0.0009575607000442687, + "clip_ratio/low_mean": 0.0010433072020532563, + "clip_ratio/low_min": 2.778395173663739e-05, + "clip_ratio/region_mean": 0.002000867942115292, + "epoch": 4.485131195335277, + "grad_norm": 0.1359536349773407, + "learning_rate": 1e-06, + "loss": 0.0155, + "step": 436 + }, + { + "clip_ratio/high_max": 0.0025177682618959807, + "clip_ratio/high_mean": 0.001103411090298323, + "clip_ratio/low_mean": 0.0010170936766371597, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.002120504781487398, + "epoch": 4.494460641399417, + "grad_norm": 0.1333197057247162, + "learning_rate": 1e-06, + "loss": 0.0116, + "step": 437 + }, + { + "clip_ratio/high_max": 0.002562261142884381, + "clip_ratio/high_mean": 0.0010776235758385155, + "clip_ratio/low_mean": 0.000865392614286975, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.001943016228324268, + "epoch": 4.503790087463557, + "grad_norm": 0.11565488576889038, + "learning_rate": 1e-06, + "loss": -0.0177, + "step": 438 + }, + { + "clip_ratio/high_max": 0.0025387959540239535, + "clip_ratio/high_mean": 0.00101970440846344, + "clip_ratio/low_mean": 0.0008223590139095904, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0018420633932691999, + "epoch": 4.513119533527696, + "grad_norm": 0.11977821588516235, + "learning_rate": 1e-06, + "loss": -0.0517, + "step": 439 + }, + { + "clip_ratio/high_max": 0.0027230137056903914, + "clip_ratio/high_mean": 0.0010951757521979744, + "clip_ratio/low_mean": 0.0008794847781246062, + "clip_ratio/low_min": 4.546409400063567e-05, + "clip_ratio/region_mean": 0.0019746605175896548, + "epoch": 4.522448979591837, + "grad_norm": 0.12791550159454346, + "learning_rate": 1e-06, + "loss": 0.0017, + "step": 440 + }, + { + "clip_ratio/high_max": 0.002204522388637997, + "clip_ratio/high_mean": 0.0009462918096687645, + "clip_ratio/low_mean": 0.0009360012409160845, + "clip_ratio/low_min": 6.7650882556336e-05, + "clip_ratio/region_mean": 0.0018822930651367642, + "epoch": 4.531778425655976, + "grad_norm": 0.12334809452295303, + "learning_rate": 1e-06, + "loss": 0.0424, + "step": 441 + }, + { + "clip_ratio/high_max": 0.0024424805378657766, + "clip_ratio/high_mean": 0.0010569582154857926, + "clip_ratio/low_mean": 0.00098892788264493, + "clip_ratio/low_min": 0.00017586808553460287, + "clip_ratio/region_mean": 0.0020458860672079027, + "epoch": 4.541107871720117, + "grad_norm": 0.1356218457221985, + "learning_rate": 1e-06, + "loss": -0.0341, + "step": 442 + }, + { + "clip_ratio/high_max": 0.002113778260536492, + "clip_ratio/high_mean": 0.000924423870856117, + "clip_ratio/low_mean": 0.0011204129204998026, + "clip_ratio/low_min": 4.631707270164043e-05, + "clip_ratio/region_mean": 0.0020448367940844037, + "epoch": 4.550437317784256, + "grad_norm": 0.13238206505775452, + "learning_rate": 1e-06, + "loss": 0.0154, + "step": 443 + }, + { + "clip_ratio/high_max": 0.00255790411029011, + "clip_ratio/high_mean": 0.0010612423939164728, + "clip_ratio/low_mean": 0.0010988476660713786, + "clip_ratio/low_min": 0.00015866332523728488, + "clip_ratio/region_mean": 0.002160090058168862, + "epoch": 4.559766763848397, + "grad_norm": 0.13470828533172607, + "learning_rate": 1e-06, + "loss": 0.0117, + "step": 444 + }, + { + "clip_ratio/high_max": 0.0025511191561236046, + "clip_ratio/high_mean": 0.0010549079743213952, + "clip_ratio/low_mean": 0.00101842683943687, + "clip_ratio/low_min": 5.866337778570596e-05, + "clip_ratio/region_mean": 0.002073334762826562, + "epoch": 4.569096209912536, + "grad_norm": 0.13952285051345825, + "learning_rate": 1e-06, + "loss": -0.0541, + "step": 445 + }, + { + "clip_ratio/high_max": 0.002576140424935147, + "clip_ratio/high_mean": 0.0010571445636742283, + "clip_ratio/low_mean": 0.0009249123468180187, + "clip_ratio/low_min": 6.193603894644184e-05, + "clip_ratio/region_mean": 0.001982056928682141, + "epoch": 4.578425655976677, + "grad_norm": 0.13626143336296082, + "learning_rate": 1e-06, + "loss": -0.0096, + "step": 446 + }, + { + "clip_ratio/high_max": 0.0025502821008558385, + "clip_ratio/high_mean": 0.0010889160766964778, + "clip_ratio/low_mean": 0.0011650592969090212, + "clip_ratio/low_min": 4.91139835503418e-05, + "clip_ratio/region_mean": 0.0022539753481396474, + "epoch": 4.587755102040816, + "grad_norm": 0.13684609532356262, + "learning_rate": 1e-06, + "loss": 0.034, + "step": 447 + }, + { + "clip_ratio/high_max": 0.002484923774318304, + "clip_ratio/high_mean": 0.0011634185539151076, + "clip_ratio/low_mean": 0.0010489358355698641, + "clip_ratio/low_min": 9.727147880767006e-05, + "clip_ratio/region_mean": 0.0022123544331407174, + "epoch": 4.597084548104956, + "grad_norm": 0.14760440587997437, + "learning_rate": 1e-06, + "loss": -0.0015, + "step": 448 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0335170200892857, + "completions/max_length": 4096.0, + "completions/max_terminated_length": 4081.0, + "completions/mean_length": 668.672119140625, + "completions/mean_terminated_length": 549.8145141601562, + "completions/min_length": 5.0, + "completions/min_terminated_length": 5.0, + "epoch": 4.606413994169096, + "grad_norm": 0.12598927319049835, + "learning_rate": 1e-06, + "loss": -0.0079, + "num_tokens": 277791545.0, + "reward": 0.598946750164032, + "reward_std": 0.18085338175296783, + "rewards/simpleverify_reward/mean": 0.5989466905593872, + "rewards/simpleverify_reward/std": 0.4901203513145447, + "step": 449 + }, + { + "clip_ratio/high_max": 0.0017125042431871407, + "clip_ratio/high_mean": 0.0007223903448903002, + "clip_ratio/low_mean": 0.0006181180142448284, + "clip_ratio/low_min": 2.3982824131962843e-05, + "clip_ratio/region_mean": 0.0013405084027908742, + "epoch": 4.615743440233236, + "grad_norm": 0.12622013688087463, + "learning_rate": 1e-06, + "loss": 0.0176, + "step": 450 + }, + { + "clip_ratio/high_max": 0.002197796042310074, + "clip_ratio/high_mean": 0.0008766838600422489, + "clip_ratio/low_mean": 0.0004888315133939614, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0013655154180014506, + "epoch": 4.625072886297376, + "grad_norm": 0.13169193267822266, + "learning_rate": 1e-06, + "loss": -0.0409, + "step": 451 + }, + { + "clip_ratio/high_max": 0.001849649994255742, + "clip_ratio/high_mean": 0.0008005681011127308, + "clip_ratio/low_mean": 0.000547487103176536, + "clip_ratio/low_min": 2.8700626899080817e-05, + "clip_ratio/region_mean": 0.0013480551751854364, + "epoch": 4.634402332361516, + "grad_norm": 0.1239888146519661, + "learning_rate": 1e-06, + "loss": 0.0058, + "step": 452 + }, + { + "clip_ratio/high_max": 0.0020362655450298917, + "clip_ratio/high_mean": 0.0008718002318346407, + "clip_ratio/low_mean": 0.0006089451840125548, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0014807453808316495, + "epoch": 4.643731778425656, + "grad_norm": 0.1250065714120865, + "learning_rate": 1e-06, + "loss": -0.0606, + "step": 453 + }, + { + "clip_ratio/high_max": 0.002317130725714378, + "clip_ratio/high_mean": 0.0008930037583922967, + "clip_ratio/low_mean": 0.0005384467485782807, + "clip_ratio/low_min": 5.730386692448519e-05, + "clip_ratio/region_mean": 0.0014314504660433158, + "epoch": 4.653061224489796, + "grad_norm": 0.12810203433036804, + "learning_rate": 1e-06, + "loss": -0.0191, + "step": 454 + }, + { + "clip_ratio/high_max": 0.0019615373894339427, + "clip_ratio/high_mean": 0.0008948678896558704, + "clip_ratio/low_mean": 0.0007931599084258778, + "clip_ratio/low_min": 2.9082890250720084e-05, + "clip_ratio/region_mean": 0.0016880278126336634, + "epoch": 4.662390670553936, + "grad_norm": 0.12833471596240997, + "learning_rate": 1e-06, + "loss": -0.0417, + "step": 455 + }, + { + "clip_ratio/high_max": 0.002077327713777777, + "clip_ratio/high_mean": 0.0008009091206986341, + "clip_ratio/low_mean": 0.0008868475961207878, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.001687756710452959, + "epoch": 4.671720116618076, + "grad_norm": 0.12376552075147629, + "learning_rate": 1e-06, + "loss": -0.0187, + "step": 456 + }, + { + "clip_ratio/high_max": 0.002086305736156646, + "clip_ratio/high_mean": 0.000859870402564411, + "clip_ratio/low_mean": 0.0008675882581883343, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0017274586898565758, + "epoch": 4.681049562682215, + "grad_norm": 0.12905630469322205, + "learning_rate": 1e-06, + "loss": -0.0077, + "step": 457 + }, + { + "clip_ratio/high_max": 0.001968920205399627, + "clip_ratio/high_mean": 0.0007975597563927295, + "clip_ratio/low_mean": 0.0008436562675342429, + "clip_ratio/low_min": 7.116145025065634e-05, + "clip_ratio/region_mean": 0.0016412159930041526, + "epoch": 4.690379008746356, + "grad_norm": 0.1319509744644165, + "learning_rate": 1e-06, + "loss": 0.0173, + "step": 458 + }, + { + "clip_ratio/high_max": 0.002110439232637873, + "clip_ratio/high_mean": 0.0009003827472042758, + "clip_ratio/low_mean": 0.0008657902708364418, + "clip_ratio/low_min": 3.33158886860474e-05, + "clip_ratio/region_mean": 0.0017661730453255586, + "epoch": 4.699708454810495, + "grad_norm": 0.12854984402656555, + "learning_rate": 1e-06, + "loss": 0.0002, + "step": 459 + }, + { + "clip_ratio/high_max": 0.002188599086366594, + "clip_ratio/high_mean": 0.0008464736656605965, + "clip_ratio/low_mean": 0.000951001449720934, + "clip_ratio/low_min": 0.00011905482642760035, + "clip_ratio/region_mean": 0.0017974751317524351, + "epoch": 4.709037900874636, + "grad_norm": 0.1228955015540123, + "learning_rate": 1e-06, + "loss": 0.0206, + "step": 460 + }, + { + "clip_ratio/high_max": 0.002151340391719714, + "clip_ratio/high_mean": 0.0008006596581253689, + "clip_ratio/low_mean": 0.0008776649119681679, + "clip_ratio/low_min": 0.00011294292562524788, + "clip_ratio/region_mean": 0.0016783245373517275, + "epoch": 4.718367346938775, + "grad_norm": 0.1404651403427124, + "learning_rate": 1e-06, + "loss": 0.0247, + "step": 461 + }, + { + "clip_ratio/high_max": 0.0021559811357292347, + "clip_ratio/high_mean": 0.0008401388895435957, + "clip_ratio/low_mean": 0.0007999353802006226, + "clip_ratio/low_min": 2.0051331375725567e-05, + "clip_ratio/region_mean": 0.0016400742897531018, + "epoch": 4.727696793002916, + "grad_norm": 0.12330736964941025, + "learning_rate": 1e-06, + "loss": -0.0252, + "step": 462 + }, + { + "clip_ratio/high_max": 0.002180338080506772, + "clip_ratio/high_mean": 0.0010245344492432196, + "clip_ratio/low_mean": 0.0010881009948207065, + "clip_ratio/low_min": 3.4293552744202316e-05, + "clip_ratio/region_mean": 0.0021126354404259473, + "epoch": 4.737026239067055, + "grad_norm": 0.14122845232486725, + "learning_rate": 1e-06, + "loss": -0.0143, + "step": 463 + }, + { + "clip_ratio/high_max": 0.001972054633370135, + "clip_ratio/high_mean": 0.0007304179962375201, + "clip_ratio/low_mean": 0.0009653888482716866, + "clip_ratio/low_min": 5.0627086238819174e-05, + "clip_ratio/region_mean": 0.0016958068299572915, + "epoch": 4.746355685131196, + "grad_norm": 0.12829378247261047, + "learning_rate": 1e-06, + "loss": 0.0416, + "step": 464 + }, + { + "clip_ratio/high_max": 0.002340914521482773, + "clip_ratio/high_mean": 0.0009467333129578037, + "clip_ratio/low_mean": 0.001042510961269727, + "clip_ratio/low_min": 0.00013667773419001605, + "clip_ratio/region_mean": 0.0019892442505806684, + "epoch": 4.755685131195335, + "grad_norm": 0.12634426355361938, + "learning_rate": 1e-06, + "loss": 0.0085, + "step": 465 + }, + { + "clip_ratio/high_max": 0.0024798499362077564, + "clip_ratio/high_mean": 0.0008589172230131226, + "clip_ratio/low_mean": 0.0008599830016464693, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0017189002537634224, + "epoch": 4.765014577259475, + "grad_norm": 0.1335812211036682, + "learning_rate": 1e-06, + "loss": 0.0035, + "step": 466 + }, + { + "clip_ratio/high_max": 0.0020122003843425773, + "clip_ratio/high_mean": 0.0008412773731834022, + "clip_ratio/low_mean": 0.001073557401468861, + "clip_ratio/low_min": 5.362647607398685e-05, + "clip_ratio/region_mean": 0.0019148348001181148, + "epoch": 4.774344023323615, + "grad_norm": 0.13003897666931152, + "learning_rate": 1e-06, + "loss": -0.0069, + "step": 467 + }, + { + "clip_ratio/high_max": 0.0024020030941755977, + "clip_ratio/high_mean": 0.0009109533384616952, + "clip_ratio/low_mean": 0.0010316131920262706, + "clip_ratio/low_min": 9.37678760237759e-05, + "clip_ratio/region_mean": 0.001942566508660093, + "epoch": 4.783673469387755, + "grad_norm": 0.24173766374588013, + "learning_rate": 1e-06, + "loss": 0.0193, + "step": 468 + }, + { + "clip_ratio/high_max": 0.002175915622501634, + "clip_ratio/high_mean": 0.0008120271522784606, + "clip_ratio/low_mean": 0.0011344612612447236, + "clip_ratio/low_min": 0.00013007072539039655, + "clip_ratio/region_mean": 0.0019464884753688239, + "epoch": 4.793002915451895, + "grad_norm": 0.12008558213710785, + "learning_rate": 1e-06, + "loss": 0.0567, + "step": 469 + }, + { + "clip_ratio/high_max": 0.0027870368503499776, + "clip_ratio/high_mean": 0.0010877357472054427, + "clip_ratio/low_mean": 0.0009601725487300428, + "clip_ratio/low_min": 2.267368108732626e-05, + "clip_ratio/region_mean": 0.002047908303211443, + "epoch": 4.802332361516035, + "grad_norm": 0.13638655841350555, + "learning_rate": 1e-06, + "loss": -0.0196, + "step": 470 + }, + { + "clip_ratio/high_max": 0.0022125025425339118, + "clip_ratio/high_mean": 0.000991207820334239, + "clip_ratio/low_mean": 0.0009885704912449, + "clip_ratio/low_min": 2.6331716071581468e-05, + "clip_ratio/region_mean": 0.001979778360691853, + "epoch": 4.811661807580175, + "grad_norm": 0.1521136313676834, + "learning_rate": 1e-06, + "loss": -0.0188, + "step": 471 + }, + { + "clip_ratio/high_max": 0.002700241580896545, + "clip_ratio/high_mean": 0.00097161745725316, + "clip_ratio/low_mean": 0.0010361361401010072, + "clip_ratio/low_min": 8.029135642573237e-05, + "clip_ratio/region_mean": 0.0020077536028111354, + "epoch": 4.820991253644315, + "grad_norm": 0.12461855262517929, + "learning_rate": 1e-06, + "loss": 0.0036, + "step": 472 + }, + { + "clip_ratio/high_max": 0.0024424837101832964, + "clip_ratio/high_mean": 0.0008810730960249202, + "clip_ratio/low_mean": 0.0010888123833865393, + "clip_ratio/low_min": 9.56364983721869e-05, + "clip_ratio/region_mean": 0.0019698855030583218, + "epoch": 4.830320699708455, + "grad_norm": 0.12171190232038498, + "learning_rate": 1e-06, + "loss": 0.0292, + "step": 473 + }, + { + "clip_ratio/high_max": 0.0026358924151281826, + "clip_ratio/high_mean": 0.0009962538479157956, + "clip_ratio/low_mean": 0.0009987189896492055, + "clip_ratio/low_min": 0.00012843935201090062, + "clip_ratio/region_mean": 0.0019949728448409587, + "epoch": 4.839650145772595, + "grad_norm": 0.1612969934940338, + "learning_rate": 1e-06, + "loss": 0.018, + "step": 474 + }, + { + "clip_ratio/high_max": 0.002331630268599838, + "clip_ratio/high_mean": 0.0009881180340016726, + "clip_ratio/low_mean": 0.0009393481104780221, + "clip_ratio/low_min": 6.486190250143409e-05, + "clip_ratio/region_mean": 0.0019274661462986842, + "epoch": 4.848979591836734, + "grad_norm": 0.12759144604206085, + "learning_rate": 1e-06, + "loss": -0.0316, + "step": 475 + }, + { + "clip_ratio/high_max": 0.002235203282907605, + "clip_ratio/high_mean": 0.0009293396842622315, + "clip_ratio/low_mean": 0.000855879417485994, + "clip_ratio/low_min": 4.241055648890324e-05, + "clip_ratio/region_mean": 0.001785219086741563, + "epoch": 4.858309037900875, + "grad_norm": 0.12324231117963791, + "learning_rate": 1e-06, + "loss": -0.018, + "step": 476 + }, + { + "clip_ratio/high_max": 0.0026894152324530296, + "clip_ratio/high_mean": 0.0010632195844664238, + "clip_ratio/low_mean": 0.0009725290656206198, + "clip_ratio/low_min": 4.775536945089698e-05, + "clip_ratio/region_mean": 0.0020357486355351284, + "epoch": 4.867638483965014, + "grad_norm": 0.13588637113571167, + "learning_rate": 1e-06, + "loss": -0.0654, + "step": 477 + }, + { + "clip_ratio/high_max": 0.002452754713885952, + "clip_ratio/high_mean": 0.0009118791931541637, + "clip_ratio/low_mean": 0.0009465403363719815, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0018584195204311982, + "epoch": 4.876967930029155, + "grad_norm": 0.1308816522359848, + "learning_rate": 1e-06, + "loss": 0.0089, + "step": 478 + }, + { + "clip_ratio/high_max": 0.002619025945023168, + "clip_ratio/high_mean": 0.0010607020703901071, + "clip_ratio/low_mean": 0.001078895766113419, + "clip_ratio/low_min": 6.010681318002753e-05, + "clip_ratio/region_mean": 0.002139597861969378, + "epoch": 4.886297376093294, + "grad_norm": 0.15502747893333435, + "learning_rate": 1e-06, + "loss": 0.0054, + "step": 479 + }, + { + "clip_ratio/high_max": 0.002136282102583209, + "clip_ratio/high_mean": 0.0008923701443563914, + "clip_ratio/low_mean": 0.0011423797222960275, + "clip_ratio/low_min": 9.303533170168521e-05, + "clip_ratio/region_mean": 0.0020347498430055566, + "epoch": 4.895626822157435, + "grad_norm": 0.11837761849164963, + "learning_rate": 1e-06, + "loss": 0.0086, + "step": 480 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0362723214285714, + "completions/max_length": 4096.0, + "completions/max_terminated_length": 4086.0, + "completions/mean_length": 682.9603271484375, + "completions/mean_terminated_length": 554.501953125, + "completions/min_length": 3.0, + "completions/min_terminated_length": 3.0, + "epoch": 5.0093294460641395, + "grad_norm": 0.13240015506744385, + "learning_rate": 1e-06, + "loss": 0.0043, + "num_tokens": 295939351.0, + "reward": 0.6090611219406128, + "reward_std": 0.18000535666942596, + "rewards/simpleverify_reward/mean": 0.6090611219406128, + "rewards/simpleverify_reward/std": 0.4879692494869232, + "step": 481 + }, + { + "clip_ratio/high_max": 0.0020576376482495107, + "clip_ratio/high_mean": 0.0008079357867245562, + "clip_ratio/low_mean": 0.0005491220399562735, + "clip_ratio/low_min": 2.6662172786018346e-05, + "clip_ratio/region_mean": 0.0013570578557846602, + "epoch": 5.01865889212828, + "grad_norm": 0.1365615427494049, + "learning_rate": 1e-06, + "loss": -0.0131, + "step": 482 + }, + { + "clip_ratio/high_max": 0.002011826218222268, + "clip_ratio/high_mean": 0.0008923103523557074, + "clip_ratio/low_mean": 0.0006162944546304061, + "clip_ratio/low_min": 2.3742658413539175e-05, + "clip_ratio/region_mean": 0.001508604793343693, + "epoch": 5.0279883381924195, + "grad_norm": 0.13470865786075592, + "learning_rate": 1e-06, + "loss": 0.0039, + "step": 483 + }, + { + "clip_ratio/high_max": 0.002328693233721424, + "clip_ratio/high_mean": 0.0010221383945463458, + "clip_ratio/low_mean": 0.0005530897924472811, + "clip_ratio/low_min": 4.729193460661918e-05, + "clip_ratio/region_mean": 0.0015752281797176693, + "epoch": 5.03731778425656, + "grad_norm": 0.14696210622787476, + "learning_rate": 1e-06, + "loss": -0.0645, + "step": 484 + }, + { + "clip_ratio/high_max": 0.0020421258959686384, + "clip_ratio/high_mean": 0.0007934162676974665, + "clip_ratio/low_mean": 0.0005984390418234398, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0013918552940594964, + "epoch": 5.0466472303206995, + "grad_norm": 0.12379525601863861, + "learning_rate": 1e-06, + "loss": 0.0165, + "step": 485 + }, + { + "clip_ratio/high_max": 0.001749827843013918, + "clip_ratio/high_mean": 0.000792051461758092, + "clip_ratio/low_mean": 0.0007013388185441727, + "clip_ratio/low_min": 3.5201150240027346e-05, + "clip_ratio/region_mean": 0.0014933902530174237, + "epoch": 5.05597667638484, + "grad_norm": 0.1355205923318863, + "learning_rate": 1e-06, + "loss": -0.0002, + "step": 486 + }, + { + "clip_ratio/high_max": 0.0022914472137927078, + "clip_ratio/high_mean": 0.0009888761169349891, + "clip_ratio/low_mean": 0.0005872124193047057, + "clip_ratio/low_min": 4.3916755203099456e-05, + "clip_ratio/region_mean": 0.0015760885253257584, + "epoch": 5.0653061224489795, + "grad_norm": 0.15061749517917633, + "learning_rate": 1e-06, + "loss": 0.0022, + "step": 487 + }, + { + "clip_ratio/high_max": 0.002541314008340123, + "clip_ratio/high_mean": 0.0008550682805434917, + "clip_ratio/low_mean": 0.0008633553989056963, + "clip_ratio/low_min": 3.299023592262529e-05, + "clip_ratio/region_mean": 0.001718423667625757, + "epoch": 5.07463556851312, + "grad_norm": 0.12575991451740265, + "learning_rate": 1e-06, + "loss": 0.0186, + "step": 488 + }, + { + "clip_ratio/high_max": 0.0019343453641340602, + "clip_ratio/high_mean": 0.0008475117610942107, + "clip_ratio/low_mean": 0.0006883281857881229, + "clip_ratio/low_min": 1.659585723245982e-05, + "clip_ratio/region_mean": 0.001535839939606376, + "epoch": 5.0839650145772595, + "grad_norm": 0.10920840501785278, + "learning_rate": 1e-06, + "loss": -0.0217, + "step": 489 + }, + { + "clip_ratio/high_max": 0.001879649054899346, + "clip_ratio/high_mean": 0.0008659716040710919, + "clip_ratio/low_mean": 0.0006675041577182128, + "clip_ratio/low_min": 9.835097171162488e-05, + "clip_ratio/region_mean": 0.0015334757990785874, + "epoch": 5.093294460641399, + "grad_norm": 0.1407405585050583, + "learning_rate": 1e-06, + "loss": -0.0239, + "step": 490 + }, + { + "clip_ratio/high_max": 0.002007840506848879, + "clip_ratio/high_mean": 0.0008585395462432643, + "clip_ratio/low_mean": 0.0006898595765960636, + "clip_ratio/low_min": 1.3855021279596258e-05, + "clip_ratio/region_mean": 0.0015483990900975186, + "epoch": 5.1026239067055394, + "grad_norm": 0.13352954387664795, + "learning_rate": 1e-06, + "loss": 0.0022, + "step": 491 + }, + { + "clip_ratio/high_max": 0.0019643480045488104, + "clip_ratio/high_mean": 0.0008266790264315205, + "clip_ratio/low_mean": 0.0008368439093828783, + "clip_ratio/low_min": 3.501040828268742e-05, + "clip_ratio/region_mean": 0.0016635229330859147, + "epoch": 5.111953352769679, + "grad_norm": 0.13257625699043274, + "learning_rate": 1e-06, + "loss": -0.001, + "step": 492 + }, + { + "clip_ratio/high_max": 0.0018863148870877922, + "clip_ratio/high_mean": 0.0007869529854360735, + "clip_ratio/low_mean": 0.000801651411165949, + "clip_ratio/low_min": 1.3249947187432554e-05, + "clip_ratio/region_mean": 0.0015886044202488847, + "epoch": 5.121282798833819, + "grad_norm": 0.11635281145572662, + "learning_rate": 1e-06, + "loss": -0.0055, + "step": 493 + }, + { + "clip_ratio/high_max": 0.002032311327639036, + "clip_ratio/high_mean": 0.000924006675631972, + "clip_ratio/low_mean": 0.0007529759113822365, + "clip_ratio/low_min": 6.244476026040502e-05, + "clip_ratio/region_mean": 0.0016769826033851132, + "epoch": 5.130612244897959, + "grad_norm": 0.1377553641796112, + "learning_rate": 1e-06, + "loss": -0.0033, + "step": 494 + }, + { + "clip_ratio/high_max": 0.001949135312315775, + "clip_ratio/high_mean": 0.0009312449474236928, + "clip_ratio/low_mean": 0.0008835917869873811, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0018148367162211798, + "epoch": 5.139941690962099, + "grad_norm": 0.13948386907577515, + "learning_rate": 1e-06, + "loss": -0.0362, + "step": 495 + }, + { + "clip_ratio/high_max": 0.002403590398898814, + "clip_ratio/high_mean": 0.0009901507073664106, + "clip_ratio/low_mean": 0.0007651728319615358, + "clip_ratio/low_min": 5.3115289119887166e-05, + "clip_ratio/region_mean": 0.0017553235447849147, + "epoch": 5.149271137026239, + "grad_norm": 0.12421394139528275, + "learning_rate": 1e-06, + "loss": -0.0375, + "step": 496 + }, + { + "clip_ratio/high_max": 0.0021713016758440062, + "clip_ratio/high_mean": 0.0009290425186918583, + "clip_ratio/low_mean": 0.0010167801592615433, + "clip_ratio/low_min": 6.576361283805454e-05, + "clip_ratio/region_mean": 0.0019458227034192532, + "epoch": 5.158600583090379, + "grad_norm": 0.12457162886857986, + "learning_rate": 1e-06, + "loss": 0.0328, + "step": 497 + }, + { + "clip_ratio/high_max": 0.0022624364792136475, + "clip_ratio/high_mean": 0.0009392213578394148, + "clip_ratio/low_mean": 0.001059698381141061, + "clip_ratio/low_min": 9.89463560472359e-05, + "clip_ratio/region_mean": 0.001998919771722285, + "epoch": 5.167930029154519, + "grad_norm": 0.1343548446893692, + "learning_rate": 1e-06, + "loss": 0.0023, + "step": 498 + }, + { + "clip_ratio/high_max": 0.0019533201193553396, + "clip_ratio/high_mean": 0.0008645405050629051, + "clip_ratio/low_mean": 0.0009914670808939263, + "clip_ratio/low_min": 4.188778984826058e-05, + "clip_ratio/region_mean": 0.0018560075986897573, + "epoch": 5.1772594752186585, + "grad_norm": 0.13913746178150177, + "learning_rate": 1e-06, + "loss": 0.0243, + "step": 499 + }, + { + "clip_ratio/high_max": 0.0020386755204526708, + "clip_ratio/high_mean": 0.0009038800017151516, + "clip_ratio/low_mean": 0.000914753754841513, + "clip_ratio/low_min": 1.6573852917645127e-05, + "clip_ratio/region_mean": 0.001818633776565548, + "epoch": 5.186588921282799, + "grad_norm": 0.12314088642597198, + "learning_rate": 1e-06, + "loss": 0.0028, + "step": 500 + }, + { + "clip_ratio/high_max": 0.0024852408678270876, + "clip_ratio/high_mean": 0.0010035527102445485, + "clip_ratio/low_mean": 0.0008701959159225225, + "clip_ratio/low_min": 2.7864467483595945e-05, + "clip_ratio/region_mean": 0.0018737486316240393, + "epoch": 5.1959183673469385, + "grad_norm": 0.1170763224363327, + "learning_rate": 1e-06, + "loss": -0.0443, + "step": 501 + }, + { + "clip_ratio/high_max": 0.0024521528321201913, + "clip_ratio/high_mean": 0.0009978024208976422, + "clip_ratio/low_mean": 0.0010160780257137958, + "clip_ratio/low_min": 6.657757512584794e-05, + "clip_ratio/region_mean": 0.0020138804757152684, + "epoch": 5.205247813411079, + "grad_norm": 0.1256285160779953, + "learning_rate": 1e-06, + "loss": 0.0035, + "step": 502 + }, + { + "clip_ratio/high_max": 0.002453805376717355, + "clip_ratio/high_mean": 0.001000544725684449, + "clip_ratio/low_mean": 0.0010675977282517124, + "clip_ratio/low_min": 0.00010594768718874548, + "clip_ratio/region_mean": 0.002068142421194352, + "epoch": 5.214577259475218, + "grad_norm": 0.12164364010095596, + "learning_rate": 1e-06, + "loss": 0.0276, + "step": 503 + }, + { + "clip_ratio/high_max": 0.0019454803150438238, + "clip_ratio/high_mean": 0.0009572258750267792, + "clip_ratio/low_mean": 0.0011246323301747907, + "clip_ratio/low_min": 7.069775529089384e-05, + "clip_ratio/region_mean": 0.0020818581906496547, + "epoch": 5.223906705539359, + "grad_norm": 0.124293752014637, + "learning_rate": 1e-06, + "loss": -0.0012, + "step": 504 + }, + { + "clip_ratio/high_max": 0.0023654099568375386, + "clip_ratio/high_mean": 0.0010104378598043695, + "clip_ratio/low_mean": 0.000990252610790776, + "clip_ratio/low_min": 7.599846685479861e-05, + "clip_ratio/region_mean": 0.002000690459681209, + "epoch": 5.233236151603498, + "grad_norm": 0.11535981297492981, + "learning_rate": 1e-06, + "loss": 0.0119, + "step": 505 + }, + { + "clip_ratio/high_max": 0.002256822554045357, + "clip_ratio/high_mean": 0.0010653167046257295, + "clip_ratio/low_mean": 0.0009366795766254654, + "clip_ratio/low_min": 5.9150375818717293e-05, + "clip_ratio/region_mean": 0.0020019962830701843, + "epoch": 5.242565597667639, + "grad_norm": 0.14496639370918274, + "learning_rate": 1e-06, + "loss": -0.0304, + "step": 506 + }, + { + "clip_ratio/high_max": 0.0024256028191302903, + "clip_ratio/high_mean": 0.0010171714257012354, + "clip_ratio/low_mean": 0.0010023805261880625, + "clip_ratio/low_min": 7.545882908743806e-05, + "clip_ratio/region_mean": 0.002019551939156372, + "epoch": 5.251895043731778, + "grad_norm": 0.11995477974414825, + "learning_rate": 1e-06, + "loss": -0.003, + "step": 507 + }, + { + "clip_ratio/high_max": 0.002414313275949098, + "clip_ratio/high_mean": 0.0010286249853379559, + "clip_ratio/low_mean": 0.001206538549013203, + "clip_ratio/low_min": 7.811899013177026e-05, + "clip_ratio/region_mean": 0.002235163548903074, + "epoch": 5.261224489795918, + "grad_norm": 0.13626430928707123, + "learning_rate": 1e-06, + "loss": 0.0301, + "step": 508 + }, + { + "clip_ratio/high_max": 0.0028657487346208654, + "clip_ratio/high_mean": 0.001197317789774388, + "clip_ratio/low_mean": 0.0010170679342991207, + "clip_ratio/low_min": 3.202979223715374e-05, + "clip_ratio/region_mean": 0.0022143856695038266, + "epoch": 5.270553935860058, + "grad_norm": 0.13054902851581573, + "learning_rate": 1e-06, + "loss": -0.0248, + "step": 509 + }, + { + "clip_ratio/high_max": 0.0024697585904505104, + "clip_ratio/high_mean": 0.0011102709831902757, + "clip_ratio/low_mean": 0.0008922044835344423, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0020024753976031207, + "epoch": 5.279883381924198, + "grad_norm": 0.13530661165714264, + "learning_rate": 1e-06, + "loss": -0.0482, + "step": 510 + }, + { + "clip_ratio/high_max": 0.002492923813406378, + "clip_ratio/high_mean": 0.0010445061834616354, + "clip_ratio/low_mean": 0.00101987049310992, + "clip_ratio/low_min": 6.257984477997525e-05, + "clip_ratio/region_mean": 0.002064376720227301, + "epoch": 5.289212827988338, + "grad_norm": 0.13626393675804138, + "learning_rate": 1e-06, + "loss": -0.0071, + "step": 511 + }, + { + "clip_ratio/high_max": 0.00222903109897743, + "clip_ratio/high_mean": 0.0010143373947357759, + "clip_ratio/low_mean": 0.00103969967312878, + "clip_ratio/low_min": 3.111000478384085e-05, + "clip_ratio/region_mean": 0.0020540371260722168, + "epoch": 5.298542274052478, + "grad_norm": 0.12809443473815918, + "learning_rate": 1e-06, + "loss": -0.0203, + "step": 512 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0391322544642857, + "completions/max_length": 4096.0, + "completions/max_terminated_length": 3971.0, + "completions/mean_length": 688.7654418945312, + "completions/mean_terminated_length": 550.0025024414062, + "completions/min_length": 5.0, + "completions/min_terminated_length": 5.0, + "epoch": 5.307871720116618, + "grad_norm": 0.13560642302036285, + "learning_rate": 1e-06, + "loss": -0.012, + "num_tokens": 313967329.0, + "reward": 0.609375, + "reward_std": 0.175076425075531, + "rewards/simpleverify_reward/mean": 0.609375, + "rewards/simpleverify_reward/std": 0.48789897561073303, + "step": 513 + }, + { + "clip_ratio/high_max": 0.0017491583348601125, + "clip_ratio/high_mean": 0.0006491530384664657, + "clip_ratio/low_mean": 0.0005962008872302249, + "clip_ratio/low_min": 2.84956267933012e-05, + "clip_ratio/region_mean": 0.00124535392751568, + "epoch": 5.317201166180758, + "grad_norm": 0.12514084577560425, + "learning_rate": 1e-06, + "loss": 0.014, + "step": 514 + }, + { + "clip_ratio/high_max": 0.002272596779221203, + "clip_ratio/high_mean": 0.0009474223224970046, + "clip_ratio/low_mean": 0.0004419933588906133, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.001389415680023376, + "epoch": 5.326530612244898, + "grad_norm": 0.1302099972963333, + "learning_rate": 1e-06, + "loss": -0.0756, + "step": 515 + }, + { + "clip_ratio/high_max": 0.001897988378914306, + "clip_ratio/high_mean": 0.0007603123631270137, + "clip_ratio/low_mean": 0.0006991100162849762, + "clip_ratio/low_min": 6.483782635768875e-05, + "clip_ratio/region_mean": 0.0014594223830499686, + "epoch": 5.335860058309038, + "grad_norm": 0.13479861617088318, + "learning_rate": 1e-06, + "loss": -0.01, + "step": 516 + }, + { + "clip_ratio/high_max": 0.0019074701631325297, + "clip_ratio/high_mean": 0.0007458546442649094, + "clip_ratio/low_mean": 0.000588814371440094, + "clip_ratio/low_min": 4.012657063867664e-05, + "clip_ratio/region_mean": 0.0013346690211619716, + "epoch": 5.345189504373177, + "grad_norm": 0.12178914994001389, + "learning_rate": 1e-06, + "loss": -0.0463, + "step": 517 + }, + { + "clip_ratio/high_max": 0.002265087685373146, + "clip_ratio/high_mean": 0.0008659941595396958, + "clip_ratio/low_mean": 0.0005778917366114911, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0014438858852372505, + "epoch": 5.354518950437318, + "grad_norm": 0.12470357865095139, + "learning_rate": 1e-06, + "loss": -0.0124, + "step": 518 + }, + { + "clip_ratio/high_max": 0.002135470200300915, + "clip_ratio/high_mean": 0.0007226648194773588, + "clip_ratio/low_mean": 0.0006959315414860612, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0014185963809723035, + "epoch": 5.363848396501457, + "grad_norm": 0.12351685017347336, + "learning_rate": 1e-06, + "loss": 0.015, + "step": 519 + }, + { + "clip_ratio/high_max": 0.0020754274737555534, + "clip_ratio/high_mean": 0.0008861361166054849, + "clip_ratio/low_mean": 0.0008542330779164331, + "clip_ratio/low_min": 4.0734303183853626e-05, + "clip_ratio/region_mean": 0.0017403691599611193, + "epoch": 5.373177842565598, + "grad_norm": 0.14237327873706818, + "learning_rate": 1e-06, + "loss": -0.0142, + "step": 520 + }, + { + "clip_ratio/high_max": 0.0020025275480293203, + "clip_ratio/high_mean": 0.0007684946249355562, + "clip_ratio/low_mean": 0.0007583003880426986, + "clip_ratio/low_min": 8.179931410268182e-05, + "clip_ratio/region_mean": 0.0015267949966073502, + "epoch": 5.382507288629737, + "grad_norm": 0.1756371259689331, + "learning_rate": 1e-06, + "loss": 0.04, + "step": 521 + }, + { + "clip_ratio/high_max": 0.00238994440951501, + "clip_ratio/high_mean": 0.0008855019750626525, + "clip_ratio/low_mean": 0.000931519203732023, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0018170211551478133, + "epoch": 5.391836734693878, + "grad_norm": 0.14375469088554382, + "learning_rate": 1e-06, + "loss": -0.0052, + "step": 522 + }, + { + "clip_ratio/high_max": 0.002366450549743604, + "clip_ratio/high_mean": 0.0009654382156440988, + "clip_ratio/low_mean": 0.0007401532984658843, + "clip_ratio/low_min": 1.9266337403678335e-05, + "clip_ratio/region_mean": 0.0017055915122909937, + "epoch": 5.401166180758017, + "grad_norm": 0.13315987586975098, + "learning_rate": 1e-06, + "loss": -0.0612, + "step": 523 + }, + { + "clip_ratio/high_max": 0.002003363460971741, + "clip_ratio/high_mean": 0.0007940236864669714, + "clip_ratio/low_mean": 0.0008930124040489318, + "clip_ratio/low_min": 1.4381040273292456e-05, + "clip_ratio/region_mean": 0.0016870360777829774, + "epoch": 5.410495626822158, + "grad_norm": 0.1370738297700882, + "learning_rate": 1e-06, + "loss": 0.0271, + "step": 524 + }, + { + "clip_ratio/high_max": 0.0018541953249950893, + "clip_ratio/high_mean": 0.000729202223737957, + "clip_ratio/low_mean": 0.0009568829300405923, + "clip_ratio/low_min": 0.00011523921057232656, + "clip_ratio/region_mean": 0.0016860851537785493, + "epoch": 5.419825072886297, + "grad_norm": 0.1281536966562271, + "learning_rate": 1e-06, + "loss": 0.0394, + "step": 525 + }, + { + "clip_ratio/high_max": 0.0020467235444812104, + "clip_ratio/high_mean": 0.0008318013860844076, + "clip_ratio/low_mean": 0.0007885420172897284, + "clip_ratio/low_min": 5.9679015976144e-05, + "clip_ratio/region_mean": 0.0016203433915507048, + "epoch": 5.429154518950437, + "grad_norm": 0.11285793036222458, + "learning_rate": 1e-06, + "loss": -0.01, + "step": 526 + }, + { + "clip_ratio/high_max": 0.0021223134936008137, + "clip_ratio/high_mean": 0.0008833946394588565, + "clip_ratio/low_mean": 0.0008984029500425095, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0017817975822254084, + "epoch": 5.438483965014577, + "grad_norm": 0.12460539489984512, + "learning_rate": 1e-06, + "loss": 0.0105, + "step": 527 + }, + { + "clip_ratio/high_max": 0.001999462958337972, + "clip_ratio/high_mean": 0.0008954146323958412, + "clip_ratio/low_mean": 0.0009278881589125376, + "clip_ratio/low_min": 2.8850068702013232e-05, + "clip_ratio/region_mean": 0.001823302738557686, + "epoch": 5.447813411078717, + "grad_norm": 0.13345134258270264, + "learning_rate": 1e-06, + "loss": -0.0419, + "step": 528 + }, + { + "clip_ratio/high_max": 0.002199254078732338, + "clip_ratio/high_mean": 0.0009600434696039883, + "clip_ratio/low_mean": 0.0008709008607183932, + "clip_ratio/low_min": 1.1042402547900565e-05, + "clip_ratio/region_mean": 0.0018309443039470352, + "epoch": 5.457142857142857, + "grad_norm": 0.13948243856430054, + "learning_rate": 1e-06, + "loss": -0.0244, + "step": 529 + }, + { + "clip_ratio/high_max": 0.002275763708894374, + "clip_ratio/high_mean": 0.0009221295531460783, + "clip_ratio/low_mean": 0.0006748406431142939, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0015969702290021814, + "epoch": 5.466472303206997, + "grad_norm": 0.11956123262643814, + "learning_rate": 1e-06, + "loss": -0.0481, + "step": 530 + }, + { + "clip_ratio/high_max": 0.0022184816043591127, + "clip_ratio/high_mean": 0.000888157905137632, + "clip_ratio/low_mean": 0.0007502004336856771, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0016383583315473516, + "epoch": 5.475801749271137, + "grad_norm": 0.11030435562133789, + "learning_rate": 1e-06, + "loss": -0.0435, + "step": 531 + }, + { + "clip_ratio/high_max": 0.0021036492253188044, + "clip_ratio/high_mean": 0.000867285640197224, + "clip_ratio/low_mean": 0.0007736313045825227, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0016409169911639765, + "epoch": 5.485131195335277, + "grad_norm": 0.11707475781440735, + "learning_rate": 1e-06, + "loss": -0.0212, + "step": 532 + }, + { + "clip_ratio/high_max": 0.0020848766871495172, + "clip_ratio/high_mean": 0.000894044766027946, + "clip_ratio/low_mean": 0.0009081581538339378, + "clip_ratio/low_min": 9.204614980262704e-05, + "clip_ratio/region_mean": 0.0018022029689745978, + "epoch": 5.494460641399417, + "grad_norm": 0.14132405817508698, + "learning_rate": 1e-06, + "loss": -0.0265, + "step": 533 + }, + { + "clip_ratio/high_max": 0.002804819858283736, + "clip_ratio/high_mean": 0.001033475884469226, + "clip_ratio/low_mean": 0.0009524611705273855, + "clip_ratio/low_min": 1.6715699530323036e-05, + "clip_ratio/region_mean": 0.0019859370586345904, + "epoch": 5.503790087463557, + "grad_norm": 0.13796620070934296, + "learning_rate": 1e-06, + "loss": 0.0003, + "step": 534 + }, + { + "clip_ratio/high_max": 0.0021225897180556785, + "clip_ratio/high_mean": 0.0008588396703999024, + "clip_ratio/low_mean": 0.0008971254355856217, + "clip_ratio/low_min": 4.1293245885754004e-05, + "clip_ratio/region_mean": 0.0017559650805196725, + "epoch": 5.513119533527696, + "grad_norm": 0.12176398932933807, + "learning_rate": 1e-06, + "loss": 0.0177, + "step": 535 + }, + { + "clip_ratio/high_max": 0.0025111865616054274, + "clip_ratio/high_mean": 0.0010823341781360796, + "clip_ratio/low_mean": 0.0007295301929843845, + "clip_ratio/low_min": 4.5843520638300106e-05, + "clip_ratio/region_mean": 0.0018118642874469515, + "epoch": 5.522448979591837, + "grad_norm": 0.12989932298660278, + "learning_rate": 1e-06, + "loss": -0.0079, + "step": 536 + }, + { + "clip_ratio/high_max": 0.0026546081717242487, + "clip_ratio/high_mean": 0.001037176316458499, + "clip_ratio/low_mean": 0.0009605788654880598, + "clip_ratio/low_min": 1.3205155482864939e-05, + "clip_ratio/region_mean": 0.001997755200136453, + "epoch": 5.531778425655976, + "grad_norm": 0.13197563588619232, + "learning_rate": 1e-06, + "loss": -0.004, + "step": 537 + }, + { + "clip_ratio/high_max": 0.0022418158405344, + "clip_ratio/high_mean": 0.001036331923387479, + "clip_ratio/low_mean": 0.0008999032943393104, + "clip_ratio/low_min": 2.2045855075703003e-05, + "clip_ratio/region_mean": 0.001936235225002747, + "epoch": 5.541107871720117, + "grad_norm": 0.14139501750469208, + "learning_rate": 1e-06, + "loss": -0.0153, + "step": 538 + }, + { + "clip_ratio/high_max": 0.0024613587593194097, + "clip_ratio/high_mean": 0.0010359186217101524, + "clip_ratio/low_mean": 0.0009398598567713634, + "clip_ratio/low_min": 4.984726183465682e-05, + "clip_ratio/region_mean": 0.0019757784648390952, + "epoch": 5.550437317784256, + "grad_norm": 0.12041354924440384, + "learning_rate": 1e-06, + "loss": -0.0141, + "step": 539 + }, + { + "clip_ratio/high_max": 0.0025265402873628773, + "clip_ratio/high_mean": 0.0010248810413031606, + "clip_ratio/low_mean": 0.0009854290838120505, + "clip_ratio/low_min": 3.78825261577731e-05, + "clip_ratio/region_mean": 0.0020103101123822853, + "epoch": 5.559766763848397, + "grad_norm": 0.12725260853767395, + "learning_rate": 1e-06, + "loss": -0.0179, + "step": 540 + }, + { + "clip_ratio/high_max": 0.0021822889466420747, + "clip_ratio/high_mean": 0.0008952896350820083, + "clip_ratio/low_mean": 0.000962873557000421, + "clip_ratio/low_min": 8.405642256548163e-05, + "clip_ratio/region_mean": 0.0018581632321001962, + "epoch": 5.569096209912536, + "grad_norm": 0.13308624923229218, + "learning_rate": 1e-06, + "loss": -0.007, + "step": 541 + }, + { + "clip_ratio/high_max": 0.002624096829094924, + "clip_ratio/high_mean": 0.0011093954199168365, + "clip_ratio/low_mean": 0.001107957019485184, + "clip_ratio/low_min": 6.319619751593564e-05, + "clip_ratio/region_mean": 0.00221735244122101, + "epoch": 5.578425655976677, + "grad_norm": 0.13106843829154968, + "learning_rate": 1e-06, + "loss": 0.0057, + "step": 542 + }, + { + "clip_ratio/high_max": 0.002117255084158387, + "clip_ratio/high_mean": 0.0009360337826365139, + "clip_ratio/low_mean": 0.001085181789676426, + "clip_ratio/low_min": 0.00013736536129727028, + "clip_ratio/region_mean": 0.0020212155213812366, + "epoch": 5.587755102040816, + "grad_norm": 0.1337672919034958, + "learning_rate": 1e-06, + "loss": 0.0162, + "step": 543 + }, + { + "clip_ratio/high_max": 0.0020849234369961778, + "clip_ratio/high_mean": 0.0009420119222340873, + "clip_ratio/low_mean": 0.0009909714026434813, + "clip_ratio/low_min": 1.4351320714922622e-05, + "clip_ratio/region_mean": 0.0019329833594383672, + "epoch": 5.597084548104956, + "grad_norm": 0.1221359446644783, + "learning_rate": 1e-06, + "loss": -0.0048, + "step": 544 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0350167410714286, + "completions/max_length": 4096.0, + "completions/max_terminated_length": 4092.0, + "completions/mean_length": 671.4923706054688, + "completions/mean_terminated_length": 547.2257690429688, + "completions/min_length": 48.0, + "completions/min_terminated_length": 48.0, + "epoch": 5.606413994169096, + "grad_norm": 0.13670922815799713, + "learning_rate": 1e-06, + "loss": 0.0104, + "num_tokens": 331985892.0, + "reward": 0.6315569281578064, + "reward_std": 0.17832709848880768, + "rewards/simpleverify_reward/mean": 0.6315569281578064, + "rewards/simpleverify_reward/std": 0.482390820980072, + "step": 545 + }, + { + "clip_ratio/high_max": 0.001750637726217974, + "clip_ratio/high_mean": 0.0007403647496175836, + "clip_ratio/low_mean": 0.000600640892116644, + "clip_ratio/low_min": 3.950626432924764e-05, + "clip_ratio/region_mean": 0.0013410056490101852, + "epoch": 5.615743440233236, + "grad_norm": 0.12390350550413132, + "learning_rate": 1e-06, + "loss": -0.0075, + "step": 546 + }, + { + "clip_ratio/high_max": 0.002036589379713405, + "clip_ratio/high_mean": 0.0007229789825942134, + "clip_ratio/low_mean": 0.0006343035311147105, + "clip_ratio/low_min": 4.37473881902406e-05, + "clip_ratio/region_mean": 0.0013572824900620617, + "epoch": 5.625072886297376, + "grad_norm": 0.1806793510913849, + "learning_rate": 1e-06, + "loss": 0.02, + "step": 547 + }, + { + "clip_ratio/high_max": 0.0018286255617567804, + "clip_ratio/high_mean": 0.0007545240860054037, + "clip_ratio/low_mean": 0.0005959441023151157, + "clip_ratio/low_min": 1.720341242616996e-05, + "clip_ratio/region_mean": 0.001350468177406583, + "epoch": 5.634402332361516, + "grad_norm": 0.1340532749891281, + "learning_rate": 1e-06, + "loss": -0.0345, + "step": 548 + }, + { + "clip_ratio/high_max": 0.0022353808672050945, + "clip_ratio/high_mean": 0.0010070279677165672, + "clip_ratio/low_mean": 0.000661815238345298, + "clip_ratio/low_min": 1.5034880561870523e-05, + "clip_ratio/region_mean": 0.0016688432078808546, + "epoch": 5.643731778425656, + "grad_norm": 0.1435471475124359, + "learning_rate": 1e-06, + "loss": -0.0288, + "step": 549 + }, + { + "clip_ratio/high_max": 0.002421589808363933, + "clip_ratio/high_mean": 0.0009676043300714809, + "clip_ratio/low_mean": 0.0006054889258848561, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.001573093242768664, + "epoch": 5.653061224489796, + "grad_norm": 0.14605525135993958, + "learning_rate": 1e-06, + "loss": -0.0196, + "step": 550 + }, + { + "clip_ratio/high_max": 0.0021120113524375483, + "clip_ratio/high_mean": 0.0008440371475444408, + "clip_ratio/low_mean": 0.0006554055444212281, + "clip_ratio/low_min": 1.055564916896401e-05, + "clip_ratio/region_mean": 0.0014994427183410153, + "epoch": 5.662390670553936, + "grad_norm": 0.13467949628829956, + "learning_rate": 1e-06, + "loss": 0.008, + "step": 551 + }, + { + "clip_ratio/high_max": 0.0022380819427780807, + "clip_ratio/high_mean": 0.0009138100904237945, + "clip_ratio/low_mean": 0.0008065852262006956, + "clip_ratio/low_min": 1.268262985831825e-05, + "clip_ratio/region_mean": 0.0017203953102580272, + "epoch": 5.671720116618076, + "grad_norm": 0.13332700729370117, + "learning_rate": 1e-06, + "loss": -0.0056, + "step": 552 + }, + { + "clip_ratio/high_max": 0.0018018883165495936, + "clip_ratio/high_mean": 0.0008263292038463987, + "clip_ratio/low_mean": 0.0007220802672236459, + "clip_ratio/low_min": 7.978991197887808e-05, + "clip_ratio/region_mean": 0.0015484094692510553, + "epoch": 5.681049562682215, + "grad_norm": 0.1289679855108261, + "learning_rate": 1e-06, + "loss": -0.006, + "step": 553 + }, + { + "clip_ratio/high_max": 0.0019725370257219765, + "clip_ratio/high_mean": 0.0008009821322048083, + "clip_ratio/low_mean": 0.0008184621183318086, + "clip_ratio/low_min": 4.022441953566158e-05, + "clip_ratio/region_mean": 0.0016194442650885321, + "epoch": 5.690379008746356, + "grad_norm": 0.586776852607727, + "learning_rate": 1e-06, + "loss": 0.0323, + "step": 554 + }, + { + "clip_ratio/high_max": 0.0020085054929950275, + "clip_ratio/high_mean": 0.000941243240959011, + "clip_ratio/low_mean": 0.0007120742120605428, + "clip_ratio/low_min": 9.390024388267193e-06, + "clip_ratio/region_mean": 0.0016533174639334902, + "epoch": 5.699708454810495, + "grad_norm": 0.12782545387744904, + "learning_rate": 1e-06, + "loss": -0.0608, + "step": 555 + }, + { + "clip_ratio/high_max": 0.0023442203273589257, + "clip_ratio/high_mean": 0.0010248871185467578, + "clip_ratio/low_mean": 0.000857668945172918, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0018825560255208984, + "epoch": 5.709037900874636, + "grad_norm": 0.12847337126731873, + "learning_rate": 1e-06, + "loss": -0.0208, + "step": 556 + }, + { + "clip_ratio/high_max": 0.0024622225391794927, + "clip_ratio/high_mean": 0.0010462429090694059, + "clip_ratio/low_mean": 0.0008149801797117107, + "clip_ratio/low_min": 8.858046385284979e-05, + "clip_ratio/region_mean": 0.0018612230851431377, + "epoch": 5.718367346938775, + "grad_norm": 0.12106923758983612, + "learning_rate": 1e-06, + "loss": -0.0555, + "step": 557 + }, + { + "clip_ratio/high_max": 0.002218135188741144, + "clip_ratio/high_mean": 0.000883603253896581, + "clip_ratio/low_mean": 0.000936070253374055, + "clip_ratio/low_min": 4.945598266203888e-05, + "clip_ratio/region_mean": 0.0018196734235971235, + "epoch": 5.727696793002916, + "grad_norm": 0.13230378925800323, + "learning_rate": 1e-06, + "loss": 0.0094, + "step": 558 + }, + { + "clip_ratio/high_max": 0.0022100650385254994, + "clip_ratio/high_mean": 0.0009353403002023697, + "clip_ratio/low_mean": 0.0009273219384340337, + "clip_ratio/low_min": 8.200657975976355e-05, + "clip_ratio/region_mean": 0.0018626622259034775, + "epoch": 5.737026239067055, + "grad_norm": 0.11959223449230194, + "learning_rate": 1e-06, + "loss": 0.0044, + "step": 559 + }, + { + "clip_ratio/high_max": 0.002289699543325696, + "clip_ratio/high_mean": 0.0009810059309529606, + "clip_ratio/low_mean": 0.0009907797975756694, + "clip_ratio/low_min": 3.4939223041874357e-05, + "clip_ratio/region_mean": 0.0019717857358045876, + "epoch": 5.746355685131196, + "grad_norm": 0.14054135978221893, + "learning_rate": 1e-06, + "loss": -0.0248, + "step": 560 + }, + { + "clip_ratio/high_max": 0.002019625659158919, + "clip_ratio/high_mean": 0.0008171865774784237, + "clip_ratio/low_mean": 0.0008156140029313974, + "clip_ratio/low_min": 9.728010627441108e-05, + "clip_ratio/region_mean": 0.0016328005985997152, + "epoch": 5.755685131195335, + "grad_norm": 0.13992683589458466, + "learning_rate": 1e-06, + "loss": 0.0186, + "step": 561 + }, + { + "clip_ratio/high_max": 0.002730626001721248, + "clip_ratio/high_mean": 0.001053441308613401, + "clip_ratio/low_mean": 0.0008964314220065717, + "clip_ratio/low_min": 2.4108003344736062e-05, + "clip_ratio/region_mean": 0.0019498727269819938, + "epoch": 5.765014577259475, + "grad_norm": 0.13415361940860748, + "learning_rate": 1e-06, + "loss": -0.0119, + "step": 562 + }, + { + "clip_ratio/high_max": 0.002285698297782801, + "clip_ratio/high_mean": 0.0009506792448519263, + "clip_ratio/low_mean": 0.0009029839511640603, + "clip_ratio/low_min": 5.8279083532397635e-05, + "clip_ratio/region_mean": 0.0018536631760071032, + "epoch": 5.774344023323615, + "grad_norm": 0.12575222551822662, + "learning_rate": 1e-06, + "loss": -0.0048, + "step": 563 + }, + { + "clip_ratio/high_max": 0.0024820152029860765, + "clip_ratio/high_mean": 0.0010349018048145808, + "clip_ratio/low_mean": 0.0010373322420491604, + "clip_ratio/low_min": 0.000114900558401132, + "clip_ratio/region_mean": 0.002072234026854858, + "epoch": 5.783673469387755, + "grad_norm": 0.1460321545600891, + "learning_rate": 1e-06, + "loss": 0.0061, + "step": 564 + }, + { + "clip_ratio/high_max": 0.002482082723872736, + "clip_ratio/high_mean": 0.0009173096768790856, + "clip_ratio/low_mean": 0.0009734103441587649, + "clip_ratio/low_min": 9.505935304332525e-05, + "clip_ratio/region_mean": 0.0018907200064859353, + "epoch": 5.793002915451895, + "grad_norm": 0.14576870203018188, + "learning_rate": 1e-06, + "loss": 0.0102, + "step": 565 + }, + { + "clip_ratio/high_max": 0.0028556299657793716, + "clip_ratio/high_mean": 0.0010983677311742213, + "clip_ratio/low_mean": 0.001033614684274653, + "clip_ratio/low_min": 7.411484511976596e-05, + "clip_ratio/region_mean": 0.0021319823936210014, + "epoch": 5.802332361516035, + "grad_norm": 0.14002689719200134, + "learning_rate": 1e-06, + "loss": -0.0308, + "step": 566 + }, + { + "clip_ratio/high_max": 0.0023263703478733078, + "clip_ratio/high_mean": 0.0009396486493642442, + "clip_ratio/low_mean": 0.001030469493343844, + "clip_ratio/low_min": 6.231813995327684e-05, + "clip_ratio/region_mean": 0.0019701181881828234, + "epoch": 5.811661807580175, + "grad_norm": 0.14780262112617493, + "learning_rate": 1e-06, + "loss": 0.0073, + "step": 567 + }, + { + "clip_ratio/high_max": 0.0026978353707818314, + "clip_ratio/high_mean": 0.001165849062090274, + "clip_ratio/low_mean": 0.0010416831864858977, + "clip_ratio/low_min": 1.258558222616557e-05, + "clip_ratio/region_mean": 0.0022075322485761717, + "epoch": 5.820991253644315, + "grad_norm": 0.12568442523479462, + "learning_rate": 1e-06, + "loss": -0.0078, + "step": 568 + }, + { + "clip_ratio/high_max": 0.0023234408035932574, + "clip_ratio/high_mean": 0.0009093883036257466, + "clip_ratio/low_mean": 0.001033348786222632, + "clip_ratio/low_min": 2.622194188006688e-05, + "clip_ratio/region_mean": 0.0019427370789344423, + "epoch": 5.830320699708455, + "grad_norm": 0.13785843551158905, + "learning_rate": 1e-06, + "loss": -0.0242, + "step": 569 + }, + { + "clip_ratio/high_max": 0.002600420546514215, + "clip_ratio/high_mean": 0.0010785224658320658, + "clip_ratio/low_mean": 0.0010606590512907133, + "clip_ratio/low_min": 4.420942786964588e-05, + "clip_ratio/region_mean": 0.0021391815316746943, + "epoch": 5.839650145772595, + "grad_norm": 0.1736195981502533, + "learning_rate": 1e-06, + "loss": 0.0189, + "step": 570 + }, + { + "clip_ratio/high_max": 0.002762816053291317, + "clip_ratio/high_mean": 0.0010711998183978721, + "clip_ratio/low_mean": 0.0010755806742963614, + "clip_ratio/low_min": 2.829975164786447e-05, + "clip_ratio/region_mean": 0.0021467804617714137, + "epoch": 5.848979591836734, + "grad_norm": 0.14605410397052765, + "learning_rate": 1e-06, + "loss": 0.0059, + "step": 571 + }, + { + "clip_ratio/high_max": 0.0026545310392975807, + "clip_ratio/high_mean": 0.0011149754755024333, + "clip_ratio/low_mean": 0.0010497763596504228, + "clip_ratio/low_min": 5.000800229026936e-05, + "clip_ratio/region_mean": 0.0021647518369718455, + "epoch": 5.858309037900875, + "grad_norm": 0.14060768485069275, + "learning_rate": 1e-06, + "loss": -0.0148, + "step": 572 + }, + { + "clip_ratio/high_max": 0.00251997570740059, + "clip_ratio/high_mean": 0.0010765289152914193, + "clip_ratio/low_mean": 0.001015187895973213, + "clip_ratio/low_min": 4.920676292385906e-05, + "clip_ratio/region_mean": 0.00209171681854059, + "epoch": 5.867638483965014, + "grad_norm": 0.1242566704750061, + "learning_rate": 1e-06, + "loss": -0.0047, + "step": 573 + }, + { + "clip_ratio/high_max": 0.0029883282113587484, + "clip_ratio/high_mean": 0.0011473836930235848, + "clip_ratio/low_mean": 0.0010441396098030964, + "clip_ratio/low_min": 7.687041852477705e-05, + "clip_ratio/region_mean": 0.0021915233082836494, + "epoch": 5.876967930029155, + "grad_norm": 0.13064825534820557, + "learning_rate": 1e-06, + "loss": -0.0122, + "step": 574 + }, + { + "clip_ratio/high_max": 0.0027383708220440894, + "clip_ratio/high_mean": 0.0011308666489640018, + "clip_ratio/low_mean": 0.0010081860709760804, + "clip_ratio/low_min": 4.4768569750885945e-05, + "clip_ratio/region_mean": 0.0021390527035691775, + "epoch": 5.886297376093294, + "grad_norm": 0.1567859947681427, + "learning_rate": 1e-06, + "loss": -0.0072, + "step": 575 + }, + { + "clip_ratio/high_max": 0.0025184598998748697, + "clip_ratio/high_mean": 0.001111205951019656, + "clip_ratio/low_mean": 0.0009063593856808438, + "clip_ratio/low_min": 1.1480528883112129e-05, + "clip_ratio/region_mean": 0.002017565300775459, + "epoch": 5.895626822157435, + "grad_norm": 0.11835873872041702, + "learning_rate": 1e-06, + "loss": -0.0757, + "step": 576 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0405970982142857, + "completions/max_length": 4096.0, + "completions/max_terminated_length": 4014.0, + "completions/mean_length": 689.2316284179688, + "completions/mean_terminated_length": 545.0742797851562, + "completions/min_length": 4.0, + "completions/min_terminated_length": 4.0, + "epoch": 6.0093294460641395, + "grad_norm": 0.13992027938365936, + "learning_rate": 1e-06, + "loss": 0.0034, + "num_tokens": 349849028.0, + "reward": 0.6221749782562256, + "reward_std": 0.16985295712947845, + "rewards/simpleverify_reward/mean": 0.6221749186515808, + "rewards/simpleverify_reward/std": 0.48485201597213745, + "step": 577 + }, + { + "clip_ratio/high_max": 0.001839089060013066, + "clip_ratio/high_mean": 0.0007203435447991069, + "clip_ratio/low_mean": 0.0005195758740228484, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.001239919427462155, + "epoch": 6.01865889212828, + "grad_norm": 0.12756891548633575, + "learning_rate": 1e-06, + "loss": -0.0036, + "step": 578 + }, + { + "clip_ratio/high_max": 0.0018774046475300565, + "clip_ratio/high_mean": 0.0007390343562292401, + "clip_ratio/low_mean": 0.000552712145690748, + "clip_ratio/low_min": 2.9911461751908064e-05, + "clip_ratio/region_mean": 0.0012917465101054404, + "epoch": 6.0279883381924195, + "grad_norm": 0.14087362587451935, + "learning_rate": 1e-06, + "loss": -0.0046, + "step": 579 + }, + { + "clip_ratio/high_max": 0.0019639048987301067, + "clip_ratio/high_mean": 0.0007716389627603348, + "clip_ratio/low_mean": 0.0006355242221616209, + "clip_ratio/low_min": 1.1326567801006604e-05, + "clip_ratio/region_mean": 0.0014071632031118497, + "epoch": 6.03731778425656, + "grad_norm": 0.12709547579288483, + "learning_rate": 1e-06, + "loss": -0.0156, + "step": 580 + }, + { + "clip_ratio/high_max": 0.001984716254810337, + "clip_ratio/high_mean": 0.0008388578226004029, + "clip_ratio/low_mean": 0.0005622030585072935, + "clip_ratio/low_min": 3.081580962316366e-05, + "clip_ratio/region_mean": 0.0014010608938406222, + "epoch": 6.0466472303206995, + "grad_norm": 0.12824492156505585, + "learning_rate": 1e-06, + "loss": -0.0249, + "step": 581 + }, + { + "clip_ratio/high_max": 0.002107550681103021, + "clip_ratio/high_mean": 0.0008283532870336785, + "clip_ratio/low_mean": 0.0006303839909378439, + "clip_ratio/low_min": 5.199104634812102e-05, + "clip_ratio/region_mean": 0.0014587372861569747, + "epoch": 6.05597667638484, + "grad_norm": 0.12018285691738129, + "learning_rate": 1e-06, + "loss": 0.0127, + "step": 582 + }, + { + "clip_ratio/high_max": 0.002049487200565636, + "clip_ratio/high_mean": 0.0008545352739020018, + "clip_ratio/low_mean": 0.0006267588869377505, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.00148129413719289, + "epoch": 6.0653061224489795, + "grad_norm": 0.12946173548698425, + "learning_rate": 1e-06, + "loss": 0.0087, + "step": 583 + }, + { + "clip_ratio/high_max": 0.0018140029460482765, + "clip_ratio/high_mean": 0.0008485489033773774, + "clip_ratio/low_mean": 0.000708300263795536, + "clip_ratio/low_min": 3.322311749798246e-05, + "clip_ratio/region_mean": 0.0015568491944577545, + "epoch": 6.07463556851312, + "grad_norm": 0.13875502347946167, + "learning_rate": 1e-06, + "loss": -0.011, + "step": 584 + }, + { + "clip_ratio/high_max": 0.002378743221925106, + "clip_ratio/high_mean": 0.0010164604191231774, + "clip_ratio/low_mean": 0.0007099261256371392, + "clip_ratio/low_min": 2.7026405405194964e-05, + "clip_ratio/region_mean": 0.0017263864865526557, + "epoch": 6.0839650145772595, + "grad_norm": 0.15075162053108215, + "learning_rate": 1e-06, + "loss": -0.0011, + "step": 585 + }, + { + "clip_ratio/high_max": 0.0020389452438394073, + "clip_ratio/high_mean": 0.0008910112519515678, + "clip_ratio/low_mean": 0.0007617377887072507, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.001652749087952543, + "epoch": 6.093294460641399, + "grad_norm": 0.1448982208967209, + "learning_rate": 1e-06, + "loss": 0.0191, + "step": 586 + }, + { + "clip_ratio/high_max": 0.002088928649754962, + "clip_ratio/high_mean": 0.0007960854982229648, + "clip_ratio/low_mean": 0.0007277964423337835, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0015238819178193808, + "epoch": 6.1026239067055394, + "grad_norm": 0.13771381974220276, + "learning_rate": 1e-06, + "loss": -0.0161, + "step": 587 + }, + { + "clip_ratio/high_max": 0.0022339819042827003, + "clip_ratio/high_mean": 0.0009189747433993034, + "clip_ratio/low_mean": 0.000862750326632522, + "clip_ratio/low_min": 4.3757292587542906e-05, + "clip_ratio/region_mean": 0.0017817250554799102, + "epoch": 6.111953352769679, + "grad_norm": 0.15242813527584076, + "learning_rate": 1e-06, + "loss": -0.0192, + "step": 588 + }, + { + "clip_ratio/high_max": 0.001995936741877813, + "clip_ratio/high_mean": 0.0008444324212177889, + "clip_ratio/low_mean": 0.000831053248475655, + "clip_ratio/low_min": 7.263965289894259e-05, + "clip_ratio/region_mean": 0.0016754856987972744, + "epoch": 6.121282798833819, + "grad_norm": 0.12993387877941132, + "learning_rate": 1e-06, + "loss": 0.003, + "step": 589 + }, + { + "clip_ratio/high_max": 0.002148512583517004, + "clip_ratio/high_mean": 0.0009271651069866493, + "clip_ratio/low_mean": 0.0007392742700176314, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0016664393551764078, + "epoch": 6.130612244897959, + "grad_norm": 0.13784319162368774, + "learning_rate": 1e-06, + "loss": -0.0034, + "step": 590 + }, + { + "clip_ratio/high_max": 0.002533342529204674, + "clip_ratio/high_mean": 0.000949246366872103, + "clip_ratio/low_mean": 0.0007860487457946874, + "clip_ratio/low_min": 2.5911807824741118e-05, + "clip_ratio/region_mean": 0.0017352951253997162, + "epoch": 6.139941690962099, + "grad_norm": 0.16066806018352509, + "learning_rate": 1e-06, + "loss": -0.0404, + "step": 591 + }, + { + "clip_ratio/high_max": 0.0023953273412189446, + "clip_ratio/high_mean": 0.0009418380468559917, + "clip_ratio/low_mean": 0.000735820411136956, + "clip_ratio/low_min": 2.071594281005673e-05, + "clip_ratio/region_mean": 0.0016776584525359794, + "epoch": 6.149271137026239, + "grad_norm": 0.12303200364112854, + "learning_rate": 1e-06, + "loss": -0.0068, + "step": 592 + }, + { + "clip_ratio/high_max": 0.002475280132784974, + "clip_ratio/high_mean": 0.0009786700738914078, + "clip_ratio/low_mean": 0.0008750699344091117, + "clip_ratio/low_min": 1.9201228496967815e-05, + "clip_ratio/region_mean": 0.0018537400246714242, + "epoch": 6.158600583090379, + "grad_norm": 0.1335153728723526, + "learning_rate": 1e-06, + "loss": -0.0336, + "step": 593 + }, + { + "clip_ratio/high_max": 0.0021944416002952494, + "clip_ratio/high_mean": 0.0009211090437020175, + "clip_ratio/low_mean": 0.0008013766855583526, + "clip_ratio/low_min": 1.5232756595651153e-05, + "clip_ratio/region_mean": 0.0017224857365363277, + "epoch": 6.167930029154519, + "grad_norm": 0.1260383278131485, + "learning_rate": 1e-06, + "loss": -0.025, + "step": 594 + }, + { + "clip_ratio/high_max": 0.002393010574451182, + "clip_ratio/high_mean": 0.0008956637539085932, + "clip_ratio/low_mean": 0.0009093392845898052, + "clip_ratio/low_min": 0.00011197342428204138, + "clip_ratio/region_mean": 0.0018050030339509249, + "epoch": 6.1772594752186585, + "grad_norm": 0.13759589195251465, + "learning_rate": 1e-06, + "loss": 0.0157, + "step": 595 + }, + { + "clip_ratio/high_max": 0.00213131863711169, + "clip_ratio/high_mean": 0.0008908259896998061, + "clip_ratio/low_mean": 0.0008758629046496935, + "clip_ratio/low_min": 4.388017623568885e-05, + "clip_ratio/region_mean": 0.0017666888816165738, + "epoch": 6.186588921282799, + "grad_norm": 0.12837755680084229, + "learning_rate": 1e-06, + "loss": -0.0265, + "step": 596 + }, + { + "clip_ratio/high_max": 0.002409492401056923, + "clip_ratio/high_mean": 0.0010026691688835854, + "clip_ratio/low_mean": 0.0008720431505935267, + "clip_ratio/low_min": 1.5360039469669573e-05, + "clip_ratio/region_mean": 0.001874712310382165, + "epoch": 6.1959183673469385, + "grad_norm": 0.13673429191112518, + "learning_rate": 1e-06, + "loss": -0.0071, + "step": 597 + }, + { + "clip_ratio/high_max": 0.002369748406636063, + "clip_ratio/high_mean": 0.0010013857972808182, + "clip_ratio/low_mean": 0.000876778301972081, + "clip_ratio/low_min": 3.4616448829183355e-05, + "clip_ratio/region_mean": 0.0018781640974339098, + "epoch": 6.205247813411079, + "grad_norm": 0.1329900622367859, + "learning_rate": 1e-06, + "loss": 0.0188, + "step": 598 + }, + { + "clip_ratio/high_max": 0.0021368012894527055, + "clip_ratio/high_mean": 0.000836305545817595, + "clip_ratio/low_mean": 0.0009717360553622711, + "clip_ratio/low_min": 4.0644851651450153e-05, + "clip_ratio/region_mean": 0.0018080415975418873, + "epoch": 6.214577259475218, + "grad_norm": 0.1321057677268982, + "learning_rate": 1e-06, + "loss": 0.0373, + "step": 599 + }, + { + "clip_ratio/high_max": 0.0021515826738323085, + "clip_ratio/high_mean": 0.0008484534628223628, + "clip_ratio/low_mean": 0.0009161942289210856, + "clip_ratio/low_min": 1.8701375665841624e-05, + "clip_ratio/region_mean": 0.0017646476844674908, + "epoch": 6.223906705539359, + "grad_norm": 0.1322554051876068, + "learning_rate": 1e-06, + "loss": 0.0161, + "step": 600 + }, + { + "clip_ratio/high_max": 0.002358754485612735, + "clip_ratio/high_mean": 0.0009683257121650968, + "clip_ratio/low_mean": 0.0008753170877753291, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.001843642836320214, + "epoch": 6.233236151603498, + "grad_norm": 0.11785576492547989, + "learning_rate": 1e-06, + "loss": -0.0332, + "step": 601 + }, + { + "clip_ratio/high_max": 0.0023365349807136226, + "clip_ratio/high_mean": 0.0010169639390369412, + "clip_ratio/low_mean": 0.0009689202806839603, + "clip_ratio/low_min": 5.7527481658325996e-05, + "clip_ratio/region_mean": 0.001985884242458269, + "epoch": 6.242565597667639, + "grad_norm": 0.12285005301237106, + "learning_rate": 1e-06, + "loss": 0.0047, + "step": 602 + }, + { + "clip_ratio/high_max": 0.0027111066883662716, + "clip_ratio/high_mean": 0.0010989059155690484, + "clip_ratio/low_mean": 0.0008540316393919056, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0019529375931597315, + "epoch": 6.251895043731778, + "grad_norm": 0.11782839894294739, + "learning_rate": 1e-06, + "loss": -0.0259, + "step": 603 + }, + { + "clip_ratio/high_max": 0.0023481692041968927, + "clip_ratio/high_mean": 0.0010105891960847657, + "clip_ratio/low_mean": 0.0008507465663569747, + "clip_ratio/low_min": 1.814750248740893e-05, + "clip_ratio/region_mean": 0.0018613357678987086, + "epoch": 6.261224489795918, + "grad_norm": 0.12154977768659592, + "learning_rate": 1e-06, + "loss": -0.0293, + "step": 604 + }, + { + "clip_ratio/high_max": 0.0023895007907412946, + "clip_ratio/high_mean": 0.001023040087602567, + "clip_ratio/low_mean": 0.0008180166587408166, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0018410567499813624, + "epoch": 6.270553935860058, + "grad_norm": 0.14218257367610931, + "learning_rate": 1e-06, + "loss": -0.0059, + "step": 605 + }, + { + "clip_ratio/high_max": 0.002362646468100138, + "clip_ratio/high_mean": 0.0010038689979410265, + "clip_ratio/low_mean": 0.000983446774625918, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.001987315721635241, + "epoch": 6.279883381924198, + "grad_norm": 0.15273207426071167, + "learning_rate": 1e-06, + "loss": -0.0088, + "step": 606 + }, + { + "clip_ratio/high_max": 0.002689383734832518, + "clip_ratio/high_mean": 0.0010227087950624991, + "clip_ratio/low_mean": 0.0010481101562618278, + "clip_ratio/low_min": 6.923452019691467e-05, + "clip_ratio/region_mean": 0.0020708189040306024, + "epoch": 6.289212827988338, + "grad_norm": 0.12915882468223572, + "learning_rate": 1e-06, + "loss": -0.0116, + "step": 607 + }, + { + "clip_ratio/high_max": 0.002454601475619711, + "clip_ratio/high_mean": 0.0009301243590016384, + "clip_ratio/low_mean": 0.0009792717573873233, + "clip_ratio/low_min": 2.392802525719162e-05, + "clip_ratio/region_mean": 0.0019093961018370464, + "epoch": 6.298542274052478, + "grad_norm": 0.1344621181488037, + "learning_rate": 1e-06, + "loss": -0.0134, + "step": 608 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0467006138392857, + "completions/max_length": 4096.0, + "completions/max_terminated_length": 4067.0, + "completions/mean_length": 719.3214111328125, + "completions/mean_terminated_length": 553.9032592773438, + "completions/min_length": 71.0, + "completions/min_terminated_length": 71.0, + "epoch": 6.307871720116618, + "grad_norm": 0.15148359537124634, + "learning_rate": 1e-06, + "loss": 0.011, + "num_tokens": 367821938.0, + "reward": 0.625558078289032, + "reward_std": 0.17333312332630157, + "rewards/simpleverify_reward/mean": 0.6255580186843872, + "rewards/simpleverify_reward/std": 0.4839869439601898, + "step": 609 + }, + { + "clip_ratio/high_max": 0.0022209672242752276, + "clip_ratio/high_mean": 0.000887323969436693, + "clip_ratio/low_mean": 0.000513251270604087, + "clip_ratio/low_min": 2.7151231734023895e-05, + "clip_ratio/region_mean": 0.001400575289153494, + "epoch": 6.317201166180758, + "grad_norm": 0.1446138471364975, + "learning_rate": 1e-06, + "loss": -0.0197, + "step": 610 + }, + { + "clip_ratio/high_max": 0.0021032415752415545, + "clip_ratio/high_mean": 0.0008024059316085186, + "clip_ratio/low_mean": 0.0005646726176564698, + "clip_ratio/low_min": 3.498779005894903e-05, + "clip_ratio/region_mean": 0.0013670785883732606, + "epoch": 6.326530612244898, + "grad_norm": 0.1274147778749466, + "learning_rate": 1e-06, + "loss": -0.0172, + "step": 611 + }, + { + "clip_ratio/high_max": 0.0020569848093145993, + "clip_ratio/high_mean": 0.0007649173021491151, + "clip_ratio/low_mean": 0.0006643257511314005, + "clip_ratio/low_min": 1.6322799638146535e-05, + "clip_ratio/region_mean": 0.0014292430569184944, + "epoch": 6.335860058309038, + "grad_norm": 0.1524488925933838, + "learning_rate": 1e-06, + "loss": 0.0126, + "step": 612 + }, + { + "clip_ratio/high_max": 0.0023838920387788676, + "clip_ratio/high_mean": 0.0009499993720964994, + "clip_ratio/low_mean": 0.0006925849338585977, + "clip_ratio/low_min": 3.3037757930287626e-05, + "clip_ratio/region_mean": 0.0016425842914031819, + "epoch": 6.345189504373177, + "grad_norm": 0.2502550780773163, + "learning_rate": 1e-06, + "loss": -0.019, + "step": 613 + }, + { + "clip_ratio/high_max": 0.0023228178106364794, + "clip_ratio/high_mean": 0.0008319845865116804, + "clip_ratio/low_mean": 0.0006052628432371421, + "clip_ratio/low_min": 1.4269406165112741e-05, + "clip_ratio/region_mean": 0.001437247425201349, + "epoch": 6.354518950437318, + "grad_norm": 0.14092649519443512, + "learning_rate": 1e-06, + "loss": -0.0288, + "step": 614 + }, + { + "clip_ratio/high_max": 0.002091754045977723, + "clip_ratio/high_mean": 0.0008345646383531857, + "clip_ratio/low_mean": 0.0007206213213066803, + "clip_ratio/low_min": 8.818550486466847e-05, + "clip_ratio/region_mean": 0.0015551859214610886, + "epoch": 6.363848396501457, + "grad_norm": 0.12686687707901, + "learning_rate": 1e-06, + "loss": 0.0251, + "step": 615 + }, + { + "clip_ratio/high_max": 0.0023100140315364115, + "clip_ratio/high_mean": 0.0009353762652608566, + "clip_ratio/low_mean": 0.0006434729712054832, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0015788492310093716, + "epoch": 6.373177842565598, + "grad_norm": 0.14159606397151947, + "learning_rate": 1e-06, + "loss": -0.0249, + "step": 616 + }, + { + "clip_ratio/high_max": 0.0021749218212789856, + "clip_ratio/high_mean": 0.0008649941773910541, + "clip_ratio/low_mean": 0.0006086839803174371, + "clip_ratio/low_min": 1.512584731244715e-05, + "clip_ratio/region_mean": 0.0014736781631654594, + "epoch": 6.382507288629737, + "grad_norm": 0.12879303097724915, + "learning_rate": 1e-06, + "loss": -0.0302, + "step": 617 + }, + { + "clip_ratio/high_max": 0.001986747003684286, + "clip_ratio/high_mean": 0.0008684917156642769, + "clip_ratio/low_mean": 0.0006974654334044317, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0015659571436117403, + "epoch": 6.391836734693878, + "grad_norm": 0.13047300279140472, + "learning_rate": 1e-06, + "loss": -0.0227, + "step": 618 + }, + { + "clip_ratio/high_max": 0.002185514196753502, + "clip_ratio/high_mean": 0.0008790957544988487, + "clip_ratio/low_mean": 0.0006804655877203913, + "clip_ratio/low_min": 1.7735528672346845e-05, + "clip_ratio/region_mean": 0.0015595613258483354, + "epoch": 6.401166180758017, + "grad_norm": 0.13169008493423462, + "learning_rate": 1e-06, + "loss": -0.0229, + "step": 619 + }, + { + "clip_ratio/high_max": 0.002048205979008344, + "clip_ratio/high_mean": 0.0008806505429674871, + "clip_ratio/low_mean": 0.0006990229676375748, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0015796735024196096, + "epoch": 6.410495626822158, + "grad_norm": 0.12384453415870667, + "learning_rate": 1e-06, + "loss": -0.0254, + "step": 620 + }, + { + "clip_ratio/high_max": 0.001734846653562272, + "clip_ratio/high_mean": 0.0006966765240576933, + "clip_ratio/low_mean": 0.0008338587358593941, + "clip_ratio/low_min": 4.307616109144874e-05, + "clip_ratio/region_mean": 0.0015305352389987092, + "epoch": 6.419825072886297, + "grad_norm": 0.12743602693080902, + "learning_rate": 1e-06, + "loss": 0.0384, + "step": 621 + }, + { + "clip_ratio/high_max": 0.0022159938052936923, + "clip_ratio/high_mean": 0.0009187869982270058, + "clip_ratio/low_mean": 0.0007449616150552174, + "clip_ratio/low_min": 2.9599810659419745e-05, + "clip_ratio/region_mean": 0.001663748626015149, + "epoch": 6.429154518950437, + "grad_norm": 0.1390436589717865, + "learning_rate": 1e-06, + "loss": -0.0107, + "step": 622 + }, + { + "clip_ratio/high_max": 0.0025685537002573255, + "clip_ratio/high_mean": 0.0009798562405194389, + "clip_ratio/low_mean": 0.0008664640572533244, + "clip_ratio/low_min": 1.7660355297266506e-05, + "clip_ratio/region_mean": 0.0018463203086866997, + "epoch": 6.438483965014577, + "grad_norm": 0.1312895566225052, + "learning_rate": 1e-06, + "loss": -0.0101, + "step": 623 + }, + { + "clip_ratio/high_max": 0.0024969174119178206, + "clip_ratio/high_mean": 0.0008947170717874542, + "clip_ratio/low_mean": 0.0008471670316794189, + "clip_ratio/low_min": 5.392197272158228e-05, + "clip_ratio/region_mean": 0.0017418841089238413, + "epoch": 6.447813411078717, + "grad_norm": 0.1244460865855217, + "learning_rate": 1e-06, + "loss": -0.017, + "step": 624 + }, + { + "clip_ratio/high_max": 0.0019798041612375528, + "clip_ratio/high_mean": 0.0008563256888010073, + "clip_ratio/low_mean": 0.0007876590611886058, + "clip_ratio/low_min": 3.87471209251089e-05, + "clip_ratio/region_mean": 0.0016439847495348658, + "epoch": 6.457142857142857, + "grad_norm": 0.12919428944587708, + "learning_rate": 1e-06, + "loss": -0.0458, + "step": 625 + }, + { + "clip_ratio/high_max": 0.002259565269923769, + "clip_ratio/high_mean": 0.0010541889023443218, + "clip_ratio/low_mean": 0.0009211762953782454, + "clip_ratio/low_min": 4.037467806483619e-05, + "clip_ratio/region_mean": 0.0019753652159124613, + "epoch": 6.466472303206997, + "grad_norm": 0.13917073607444763, + "learning_rate": 1e-06, + "loss": -0.0062, + "step": 626 + }, + { + "clip_ratio/high_max": 0.0022725915259798057, + "clip_ratio/high_mean": 0.0009317070525867166, + "clip_ratio/low_mean": 0.0009989035042963224, + "clip_ratio/low_min": 1.6914749721763656e-05, + "clip_ratio/region_mean": 0.0019306105241412297, + "epoch": 6.475801749271137, + "grad_norm": 0.2032402753829956, + "learning_rate": 1e-06, + "loss": -0.0159, + "step": 627 + }, + { + "clip_ratio/high_max": 0.0022228288726182655, + "clip_ratio/high_mean": 0.0009595755964255659, + "clip_ratio/low_mean": 0.0009383168344356818, + "clip_ratio/low_min": 2.791659517242806e-05, + "clip_ratio/region_mean": 0.0018978924126713537, + "epoch": 6.485131195335277, + "grad_norm": 0.12489090114831924, + "learning_rate": 1e-06, + "loss": 0.0004, + "step": 628 + }, + { + "clip_ratio/high_max": 0.002745949721429497, + "clip_ratio/high_mean": 0.000985255235718796, + "clip_ratio/low_mean": 0.0010692366631701589, + "clip_ratio/low_min": 4.0198110582423396e-05, + "clip_ratio/region_mean": 0.0020544919098028913, + "epoch": 6.494460641399417, + "grad_norm": 0.14026491343975067, + "learning_rate": 1e-06, + "loss": -0.0038, + "step": 629 + }, + { + "clip_ratio/high_max": 0.0020863692261627875, + "clip_ratio/high_mean": 0.0008506243357260246, + "clip_ratio/low_mean": 0.0010225664809695445, + "clip_ratio/low_min": 5.534076990443282e-05, + "clip_ratio/region_mean": 0.0018731908130575903, + "epoch": 6.503790087463557, + "grad_norm": 0.13971810042858124, + "learning_rate": 1e-06, + "loss": 0.0003, + "step": 630 + }, + { + "clip_ratio/high_max": 0.002494731335900724, + "clip_ratio/high_mean": 0.0010490268068679143, + "clip_ratio/low_mean": 0.0008949859093263512, + "clip_ratio/low_min": 1.694685488473624e-05, + "clip_ratio/region_mean": 0.0019440127143752761, + "epoch": 6.513119533527696, + "grad_norm": 0.1381852924823761, + "learning_rate": 1e-06, + "loss": -0.0139, + "step": 631 + }, + { + "clip_ratio/high_max": 0.002818521599692758, + "clip_ratio/high_mean": 0.0010897832726186607, + "clip_ratio/low_mean": 0.0009193259065796155, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0020091091719223186, + "epoch": 6.522448979591837, + "grad_norm": 0.13181720674037933, + "learning_rate": 1e-06, + "loss": -0.026, + "step": 632 + }, + { + "clip_ratio/high_max": 0.0020027623177156784, + "clip_ratio/high_mean": 0.0008709143876330927, + "clip_ratio/low_mean": 0.0011225317066418938, + "clip_ratio/low_min": 3.181201100233011e-05, + "clip_ratio/region_mean": 0.0019934461306547746, + "epoch": 6.531778425655976, + "grad_norm": 0.13129457831382751, + "learning_rate": 1e-06, + "loss": 0.0207, + "step": 633 + }, + { + "clip_ratio/high_max": 0.0022481358937511686, + "clip_ratio/high_mean": 0.0010323515944037354, + "clip_ratio/low_mean": 0.0008964085409388645, + "clip_ratio/low_min": 2.8242204280104488e-05, + "clip_ratio/region_mean": 0.0019287601608084515, + "epoch": 6.541107871720117, + "grad_norm": 0.14023590087890625, + "learning_rate": 1e-06, + "loss": -0.0376, + "step": 634 + }, + { + "clip_ratio/high_max": 0.0022737462750228588, + "clip_ratio/high_mean": 0.0009878130331344437, + "clip_ratio/low_mean": 0.0007715270003245678, + "clip_ratio/low_min": 3.7993919249856845e-05, + "clip_ratio/region_mean": 0.0017593400334590115, + "epoch": 6.550437317784256, + "grad_norm": 0.1318240761756897, + "learning_rate": 1e-06, + "loss": -0.0946, + "step": 635 + }, + { + "clip_ratio/high_max": 0.001854291847848799, + "clip_ratio/high_mean": 0.0008079188028204953, + "clip_ratio/low_mean": 0.001160078540124232, + "clip_ratio/low_min": 0.00015285129848052748, + "clip_ratio/region_mean": 0.0019679973629536107, + "epoch": 6.559766763848397, + "grad_norm": 0.1594737470149994, + "learning_rate": 1e-06, + "loss": 0.0375, + "step": 636 + }, + { + "clip_ratio/high_max": 0.002663663777639158, + "clip_ratio/high_mean": 0.0011117425838165218, + "clip_ratio/low_mean": 0.0009884372502710903, + "clip_ratio/low_min": 3.225418186048046e-05, + "clip_ratio/region_mean": 0.002100179895933252, + "epoch": 6.569096209912536, + "grad_norm": 0.13806471228599548, + "learning_rate": 1e-06, + "loss": -0.032, + "step": 637 + }, + { + "clip_ratio/high_max": 0.002303545923496131, + "clip_ratio/high_mean": 0.0009381746876897523, + "clip_ratio/low_mean": 0.0010181512025155826, + "clip_ratio/low_min": 1.7293858036282472e-05, + "clip_ratio/region_mean": 0.0019563259120332077, + "epoch": 6.578425655976677, + "grad_norm": 0.12728336453437805, + "learning_rate": 1e-06, + "loss": -0.006, + "step": 638 + }, + { + "clip_ratio/high_max": 0.002492465981049463, + "clip_ratio/high_mean": 0.0009622507150197634, + "clip_ratio/low_mean": 0.0009929457755788462, + "clip_ratio/low_min": 3.0569823138648644e-05, + "clip_ratio/region_mean": 0.0019551964942365885, + "epoch": 6.587755102040816, + "grad_norm": 0.12930989265441895, + "learning_rate": 1e-06, + "loss": 0.0055, + "step": 639 + }, + { + "clip_ratio/high_max": 0.002372198701777961, + "clip_ratio/high_mean": 0.0009705745032988489, + "clip_ratio/low_mean": 0.001233496735949302, + "clip_ratio/low_min": 2.739425872277934e-05, + "clip_ratio/region_mean": 0.0022040712283342145, + "epoch": 6.597084548104956, + "grad_norm": 0.12909016013145447, + "learning_rate": 1e-06, + "loss": 0.0112, + "step": 640 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0470145089285714, + "completions/max_length": 4096.0, + "completions/max_terminated_length": 4080.0, + "completions/mean_length": 717.0166625976562, + "completions/mean_terminated_length": 550.318115234375, + "completions/min_length": 85.0, + "completions/min_terminated_length": 85.0, + "epoch": 6.606413994169096, + "grad_norm": 0.1321345865726471, + "learning_rate": 1e-06, + "loss": -0.0434, + "num_tokens": 385762678.0, + "reward": 0.6217564344406128, + "reward_std": 0.1684490144252777, + "rewards/simpleverify_reward/mean": 0.6217564344406128, + "rewards/simpleverify_reward/std": 0.4849573075771332, + "step": 641 + }, + { + "clip_ratio/high_max": 0.0019111405417788774, + "clip_ratio/high_mean": 0.0008823664575174917, + "clip_ratio/low_mean": 0.0004736024011435802, + "clip_ratio/low_min": 5.955027154413983e-05, + "clip_ratio/region_mean": 0.0013559688377426937, + "epoch": 6.615743440233236, + "grad_norm": 0.13952584564685822, + "learning_rate": 1e-06, + "loss": -0.0624, + "step": 642 + }, + { + "clip_ratio/high_max": 0.0022251204027270433, + "clip_ratio/high_mean": 0.0008111097849905491, + "clip_ratio/low_mean": 0.0004942562100040959, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0013053659749857616, + "epoch": 6.625072886297376, + "grad_norm": 0.1269238442182541, + "learning_rate": 1e-06, + "loss": -0.0111, + "step": 643 + }, + { + "clip_ratio/high_max": 0.0023263603907253128, + "clip_ratio/high_mean": 0.0007895139106040006, + "clip_ratio/low_mean": 0.0004975230986019596, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0012870370010205079, + "epoch": 6.634402332361516, + "grad_norm": 0.12807856500148773, + "learning_rate": 1e-06, + "loss": -0.0165, + "step": 644 + }, + { + "clip_ratio/high_max": 0.001852031626185635, + "clip_ratio/high_mean": 0.0007514072694903007, + "clip_ratio/low_mean": 0.0006156293147796532, + "clip_ratio/low_min": 3.296637714811368e-05, + "clip_ratio/region_mean": 0.0013670365915459115, + "epoch": 6.643731778425656, + "grad_norm": 0.14370988309383392, + "learning_rate": 1e-06, + "loss": -0.0373, + "step": 645 + }, + { + "clip_ratio/high_max": 0.0021303373614500742, + "clip_ratio/high_mean": 0.0008435951331193792, + "clip_ratio/low_mean": 0.0005227381061558845, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.001366333231999306, + "epoch": 6.653061224489796, + "grad_norm": 0.13653628528118134, + "learning_rate": 1e-06, + "loss": -0.0438, + "step": 646 + }, + { + "clip_ratio/high_max": 0.0018736626516329125, + "clip_ratio/high_mean": 0.0007430201894749189, + "clip_ratio/low_mean": 0.0006139592060208088, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0013569793954957277, + "epoch": 6.662390670553936, + "grad_norm": 0.13890062272548676, + "learning_rate": 1e-06, + "loss": 0.0208, + "step": 647 + }, + { + "clip_ratio/high_max": 0.002104283092194237, + "clip_ratio/high_mean": 0.0007425977601087652, + "clip_ratio/low_mean": 0.000717075407919765, + "clip_ratio/low_min": 3.3325113690807484e-05, + "clip_ratio/region_mean": 0.0014596731525671203, + "epoch": 6.671720116618076, + "grad_norm": 0.134938046336174, + "learning_rate": 1e-06, + "loss": 0.0182, + "step": 648 + }, + { + "clip_ratio/high_max": 0.0020582626675604843, + "clip_ratio/high_mean": 0.0008491899734508479, + "clip_ratio/low_mean": 0.0006601896284337272, + "clip_ratio/low_min": 2.7667108952300623e-05, + "clip_ratio/region_mean": 0.0015093796027940698, + "epoch": 6.681049562682215, + "grad_norm": 0.12558665871620178, + "learning_rate": 1e-06, + "loss": -0.0322, + "step": 649 + }, + { + "clip_ratio/high_max": 0.001985960712772794, + "clip_ratio/high_mean": 0.0007914558627817314, + "clip_ratio/low_mean": 0.0006645121648034547, + "clip_ratio/low_min": 1.5531808458035812e-05, + "clip_ratio/region_mean": 0.0014559680021193344, + "epoch": 6.690379008746356, + "grad_norm": 0.13106083869934082, + "learning_rate": 1e-06, + "loss": 0.0183, + "step": 650 + }, + { + "clip_ratio/high_max": 0.0019152088279952295, + "clip_ratio/high_mean": 0.0007018474379947293, + "clip_ratio/low_mean": 0.0007393714477075264, + "clip_ratio/low_min": 1.9272278223070316e-05, + "clip_ratio/region_mean": 0.0014412189048016444, + "epoch": 6.699708454810495, + "grad_norm": 0.13795951008796692, + "learning_rate": 1e-06, + "loss": -0.0041, + "step": 651 + }, + { + "clip_ratio/high_max": 0.0021834392828168347, + "clip_ratio/high_mean": 0.0009852310631686123, + "clip_ratio/low_mean": 0.0007133240669645602, + "clip_ratio/low_min": 2.5193605324602686e-05, + "clip_ratio/region_mean": 0.0016985550755634904, + "epoch": 6.709037900874636, + "grad_norm": 0.13325002789497375, + "learning_rate": 1e-06, + "loss": -0.0543, + "step": 652 + }, + { + "clip_ratio/high_max": 0.0024413528735749424, + "clip_ratio/high_mean": 0.001027618300213362, + "clip_ratio/low_mean": 0.0008024691069294931, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.001830087428970728, + "epoch": 6.718367346938775, + "grad_norm": 0.13927236199378967, + "learning_rate": 1e-06, + "loss": -0.041, + "step": 653 + }, + { + "clip_ratio/high_max": 0.0024063595228653867, + "clip_ratio/high_mean": 0.0008185788965420215, + "clip_ratio/low_mean": 0.0008711892241990427, + "clip_ratio/low_min": 6.598297386517515e-05, + "clip_ratio/region_mean": 0.001689768127107527, + "epoch": 6.727696793002916, + "grad_norm": 0.15905453264713287, + "learning_rate": 1e-06, + "loss": 0.0362, + "step": 654 + }, + { + "clip_ratio/high_max": 0.0022277879106695764, + "clip_ratio/high_mean": 0.000880508352565812, + "clip_ratio/low_mean": 0.0008590872967033647, + "clip_ratio/low_min": 5.658558802679181e-05, + "clip_ratio/region_mean": 0.0017395956456311978, + "epoch": 6.737026239067055, + "grad_norm": 0.13501450419425964, + "learning_rate": 1e-06, + "loss": -0.0065, + "step": 655 + }, + { + "clip_ratio/high_max": 0.0020497181139944587, + "clip_ratio/high_mean": 0.0008242403855547309, + "clip_ratio/low_mean": 0.0008869453085935675, + "clip_ratio/low_min": 1.7346656022709794e-05, + "clip_ratio/region_mean": 0.001711185701424256, + "epoch": 6.746355685131196, + "grad_norm": 0.12874852120876312, + "learning_rate": 1e-06, + "loss": -0.0093, + "step": 656 + }, + { + "clip_ratio/high_max": 0.002331524548935704, + "clip_ratio/high_mean": 0.0009113175801758189, + "clip_ratio/low_mean": 0.0008681975141371367, + "clip_ratio/low_min": 3.488886613922659e-05, + "clip_ratio/region_mean": 0.0017795150924939662, + "epoch": 6.755685131195335, + "grad_norm": 0.12763261795043945, + "learning_rate": 1e-06, + "loss": 0.0088, + "step": 657 + }, + { + "clip_ratio/high_max": 0.00276705261785537, + "clip_ratio/high_mean": 0.0010822703552548774, + "clip_ratio/low_mean": 0.0008276878661490628, + "clip_ratio/low_min": 1.6864543795236386e-05, + "clip_ratio/region_mean": 0.001909958227770403, + "epoch": 6.765014577259475, + "grad_norm": 0.1331901103258133, + "learning_rate": 1e-06, + "loss": -0.0457, + "step": 658 + }, + { + "clip_ratio/high_max": 0.0021022208966314793, + "clip_ratio/high_mean": 0.0008931019328883849, + "clip_ratio/low_mean": 0.0008605613475083373, + "clip_ratio/low_min": 6.899977597640827e-05, + "clip_ratio/region_mean": 0.0017536632731207646, + "epoch": 6.774344023323615, + "grad_norm": 0.11489409953355789, + "learning_rate": 1e-06, + "loss": 0.0073, + "step": 659 + }, + { + "clip_ratio/high_max": 0.002463387296302244, + "clip_ratio/high_mean": 0.0009977245645131916, + "clip_ratio/low_mean": 0.0007485841033485485, + "clip_ratio/low_min": 2.4356975700356998e-05, + "clip_ratio/region_mean": 0.001746308640576899, + "epoch": 6.783673469387755, + "grad_norm": 0.11657286435365677, + "learning_rate": 1e-06, + "loss": -0.0459, + "step": 660 + }, + { + "clip_ratio/high_max": 0.0022582504607271403, + "clip_ratio/high_mean": 0.0009322661062469706, + "clip_ratio/low_mean": 0.0007675797896808945, + "clip_ratio/low_min": 4.44878378402791e-05, + "clip_ratio/region_mean": 0.0016998459104797803, + "epoch": 6.793002915451895, + "grad_norm": 0.1246897354722023, + "learning_rate": 1e-06, + "loss": -0.0173, + "step": 661 + }, + { + "clip_ratio/high_max": 0.00245572610583622, + "clip_ratio/high_mean": 0.0009250996590708382, + "clip_ratio/low_mean": 0.0010003117113228654, + "clip_ratio/low_min": 0.00011339371030771872, + "clip_ratio/region_mean": 0.0019254113140050322, + "epoch": 6.802332361516035, + "grad_norm": 0.13310350477695465, + "learning_rate": 1e-06, + "loss": 0.0137, + "step": 662 + }, + { + "clip_ratio/high_max": 0.0021200337941991165, + "clip_ratio/high_mean": 0.0009233693654095987, + "clip_ratio/low_mean": 0.0008585165523982141, + "clip_ratio/low_min": 6.877341365907341e-05, + "clip_ratio/region_mean": 0.0017818859341787174, + "epoch": 6.811661807580175, + "grad_norm": 0.13047559559345245, + "learning_rate": 1e-06, + "loss": 0.0144, + "step": 663 + }, + { + "clip_ratio/high_max": 0.001993808531551622, + "clip_ratio/high_mean": 0.0008405548724113032, + "clip_ratio/low_mean": 0.0008321946061187191, + "clip_ratio/low_min": 0.0001215322354255477, + "clip_ratio/region_mean": 0.0016727494803490117, + "epoch": 6.820991253644315, + "grad_norm": 0.11407936364412308, + "learning_rate": 1e-06, + "loss": -0.0322, + "step": 664 + }, + { + "clip_ratio/high_max": 0.0020853564492426813, + "clip_ratio/high_mean": 0.0009454951541556511, + "clip_ratio/low_mean": 0.0008293647879327182, + "clip_ratio/low_min": 6.899723666720092e-05, + "clip_ratio/region_mean": 0.001774859949364327, + "epoch": 6.830320699708455, + "grad_norm": 0.11815281957387924, + "learning_rate": 1e-06, + "loss": -0.0151, + "step": 665 + }, + { + "clip_ratio/high_max": 0.0025716686286614276, + "clip_ratio/high_mean": 0.001044850871039671, + "clip_ratio/low_mean": 0.0007745600814814679, + "clip_ratio/low_min": 3.21006664307788e-05, + "clip_ratio/region_mean": 0.0018194109507021494, + "epoch": 6.839650145772595, + "grad_norm": 0.13043001294136047, + "learning_rate": 1e-06, + "loss": -0.0444, + "step": 666 + }, + { + "clip_ratio/high_max": 0.002634268610563595, + "clip_ratio/high_mean": 0.0010826465440914035, + "clip_ratio/low_mean": 0.0009278506931877928, + "clip_ratio/low_min": 1.8927921701106243e-05, + "clip_ratio/region_mean": 0.00201049728639191, + "epoch": 6.848979591836734, + "grad_norm": 0.14265958964824677, + "learning_rate": 1e-06, + "loss": 0.0161, + "step": 667 + }, + { + "clip_ratio/high_max": 0.0021339081758924294, + "clip_ratio/high_mean": 0.0008609269116277574, + "clip_ratio/low_mean": 0.0009526016474410426, + "clip_ratio/low_min": 6.552633749379311e-05, + "clip_ratio/region_mean": 0.0018135285026801284, + "epoch": 6.858309037900875, + "grad_norm": 0.12092170119285583, + "learning_rate": 1e-06, + "loss": -0.0026, + "step": 668 + }, + { + "clip_ratio/high_max": 0.0026094949716934934, + "clip_ratio/high_mean": 0.0009530665192869492, + "clip_ratio/low_mean": 0.0009451533987885341, + "clip_ratio/low_min": 3.4131369829992764e-05, + "clip_ratio/region_mean": 0.0018982198744197376, + "epoch": 6.867638483965014, + "grad_norm": 0.1307818740606308, + "learning_rate": 1e-06, + "loss": 0.0059, + "step": 669 + }, + { + "clip_ratio/high_max": 0.002434329413517844, + "clip_ratio/high_mean": 0.0010277092878823169, + "clip_ratio/low_mean": 0.0010958412749459967, + "clip_ratio/low_min": 7.635772453795653e-05, + "clip_ratio/region_mean": 0.002123550591932144, + "epoch": 6.876967930029155, + "grad_norm": 0.14588437974452972, + "learning_rate": 1e-06, + "loss": 0.002, + "step": 670 + }, + { + "clip_ratio/high_max": 0.002530924561142456, + "clip_ratio/high_mean": 0.0010226413687632885, + "clip_ratio/low_mean": 0.0008716365955478977, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0018942779424833134, + "epoch": 6.886297376093294, + "grad_norm": 0.1325405389070511, + "learning_rate": 1e-06, + "loss": -0.0008, + "step": 671 + }, + { + "clip_ratio/high_max": 0.0027137969445902854, + "clip_ratio/high_mean": 0.0011204696165805217, + "clip_ratio/low_mean": 0.0009062435274245217, + "clip_ratio/low_min": 0.00011564626584004145, + "clip_ratio/region_mean": 0.0020267131621949375, + "epoch": 6.895626822157435, + "grad_norm": 0.12995803356170654, + "learning_rate": 1e-06, + "loss": -0.0388, + "step": 672 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0511648995535714, + "completions/max_length": 4096.0, + "completions/max_terminated_length": 4094.0, + "completions/mean_length": 733.5430297851562, + "completions/mean_terminated_length": 552.22607421875, + "completions/min_length": 73.0, + "completions/min_terminated_length": 73.0, + "epoch": 7.0093294460641395, + "grad_norm": 0.1446388065814972, + "learning_rate": 1e-06, + "loss": -0.0328, + "num_tokens": 403643101.0, + "reward": 0.631661593914032, + "reward_std": 0.17118562757968903, + "rewards/simpleverify_reward/mean": 0.6316615343093872, + "rewards/simpleverify_reward/std": 0.482362300157547, + "step": 673 + }, + { + "clip_ratio/high_max": 0.002274634229252115, + "clip_ratio/high_mean": 0.0008250863938883413, + "clip_ratio/low_mean": 0.0005860922447027406, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0014111786767898593, + "epoch": 7.01865889212828, + "grad_norm": 0.14925158023834229, + "learning_rate": 1e-06, + "loss": 0.0071, + "step": 674 + }, + { + "clip_ratio/high_max": 0.002243863658804912, + "clip_ratio/high_mean": 0.000805944446256035, + "clip_ratio/low_mean": 0.0005229045946180122, + "clip_ratio/low_min": 1.3736264008912258e-05, + "clip_ratio/region_mean": 0.001328849026322132, + "epoch": 7.0279883381924195, + "grad_norm": 0.1439531147480011, + "learning_rate": 1e-06, + "loss": 0.0228, + "step": 675 + }, + { + "clip_ratio/high_max": 0.002420619966869708, + "clip_ratio/high_mean": 0.0008337804829352535, + "clip_ratio/low_mean": 0.0005500074266819865, + "clip_ratio/low_min": 1.165392495749984e-05, + "clip_ratio/region_mean": 0.0013837879268976394, + "epoch": 7.03731778425656, + "grad_norm": 0.14207249879837036, + "learning_rate": 1e-06, + "loss": -0.0277, + "step": 676 + }, + { + "clip_ratio/high_max": 0.002262996382341953, + "clip_ratio/high_mean": 0.000948994094869704, + "clip_ratio/low_mean": 0.0005671589369740104, + "clip_ratio/low_min": 4.60694382127258e-05, + "clip_ratio/region_mean": 0.0015161530172917992, + "epoch": 7.0466472303206995, + "grad_norm": 0.1413637399673462, + "learning_rate": 1e-06, + "loss": -0.0405, + "step": 677 + }, + { + "clip_ratio/high_max": 0.0017554597980051767, + "clip_ratio/high_mean": 0.0007474723761333735, + "clip_ratio/low_mean": 0.0006531819535666727, + "clip_ratio/low_min": 1.340626386081567e-05, + "clip_ratio/region_mean": 0.001400654324243078, + "epoch": 7.05597667638484, + "grad_norm": 0.14316169917583466, + "learning_rate": 1e-06, + "loss": 0.0451, + "step": 678 + }, + { + "clip_ratio/high_max": 0.002214854051999282, + "clip_ratio/high_mean": 0.0009222275821230141, + "clip_ratio/low_mean": 0.0006624479378842807, + "clip_ratio/low_min": 3.212185765733011e-05, + "clip_ratio/region_mean": 0.0015846755341044627, + "epoch": 7.0653061224489795, + "grad_norm": 0.13190414011478424, + "learning_rate": 1e-06, + "loss": -0.0666, + "step": 679 + }, + { + "clip_ratio/high_max": 0.002458992054016562, + "clip_ratio/high_mean": 0.0009588229640939971, + "clip_ratio/low_mean": 0.0006006334433550364, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0015594563810736872, + "epoch": 7.07463556851312, + "grad_norm": 0.13769963383674622, + "learning_rate": 1e-06, + "loss": -0.0435, + "step": 680 + }, + { + "clip_ratio/high_max": 0.0019705963059095666, + "clip_ratio/high_mean": 0.0008263262461696286, + "clip_ratio/low_mean": 0.0006524786058434984, + "clip_ratio/low_min": 3.8958807635935955e-05, + "clip_ratio/region_mean": 0.0014788048538321164, + "epoch": 7.0839650145772595, + "grad_norm": 0.12982453405857086, + "learning_rate": 1e-06, + "loss": -0.024, + "step": 681 + }, + { + "clip_ratio/high_max": 0.002256254720123252, + "clip_ratio/high_mean": 0.0008849734849718516, + "clip_ratio/low_mean": 0.000692534000336309, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0015775075044075493, + "epoch": 7.093294460641399, + "grad_norm": 0.1436099112033844, + "learning_rate": 1e-06, + "loss": -0.017, + "step": 682 + }, + { + "clip_ratio/high_max": 0.0022941908391658217, + "clip_ratio/high_mean": 0.0009941111602529418, + "clip_ratio/low_mean": 0.0006840911964900442, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0016782023449195549, + "epoch": 7.1026239067055394, + "grad_norm": 0.13922156393527985, + "learning_rate": 1e-06, + "loss": -0.0516, + "step": 683 + }, + { + "clip_ratio/high_max": 0.002354970754822716, + "clip_ratio/high_mean": 0.0008975140444817953, + "clip_ratio/low_mean": 0.0006907357383170165, + "clip_ratio/low_min": 4.802758303412702e-05, + "clip_ratio/region_mean": 0.0015882497318671085, + "epoch": 7.111953352769679, + "grad_norm": 0.13131393492221832, + "learning_rate": 1e-06, + "loss": 0.0179, + "step": 684 + }, + { + "clip_ratio/high_max": 0.002078121993690729, + "clip_ratio/high_mean": 0.0008868593686202075, + "clip_ratio/low_mean": 0.0008656317368149757, + "clip_ratio/low_min": 3.122991256532259e-05, + "clip_ratio/region_mean": 0.0017524911236250773, + "epoch": 7.121282798833819, + "grad_norm": 0.15135273337364197, + "learning_rate": 1e-06, + "loss": 0.0063, + "step": 685 + }, + { + "clip_ratio/high_max": 0.0020751736919919495, + "clip_ratio/high_mean": 0.0007557308708783239, + "clip_ratio/low_mean": 0.000793259750935249, + "clip_ratio/low_min": 1.581477772560902e-05, + "clip_ratio/region_mean": 0.0015489905963477213, + "epoch": 7.130612244897959, + "grad_norm": 0.1345197707414627, + "learning_rate": 1e-06, + "loss": -0.0156, + "step": 686 + }, + { + "clip_ratio/high_max": 0.002680592573597096, + "clip_ratio/high_mean": 0.0010690451599657536, + "clip_ratio/low_mean": 0.0007496202579204692, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0018186654197052121, + "epoch": 7.139941690962099, + "grad_norm": 0.13936971127986908, + "learning_rate": 1e-06, + "loss": -0.0451, + "step": 687 + }, + { + "clip_ratio/high_max": 0.002169146573578473, + "clip_ratio/high_mean": 0.000876297472132137, + "clip_ratio/low_mean": 0.0009492349327047123, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.001825532381189987, + "epoch": 7.149271137026239, + "grad_norm": 0.1437898576259613, + "learning_rate": 1e-06, + "loss": 0.0306, + "step": 688 + }, + { + "clip_ratio/high_max": 0.0022963358896959107, + "clip_ratio/high_mean": 0.0009715426658658544, + "clip_ratio/low_mean": 0.0008095532939478289, + "clip_ratio/low_min": 3.749865209101699e-05, + "clip_ratio/region_mean": 0.001781095976184588, + "epoch": 7.158600583090379, + "grad_norm": 0.1644812375307083, + "learning_rate": 1e-06, + "loss": -0.0328, + "step": 689 + }, + { + "clip_ratio/high_max": 0.0020644167161663063, + "clip_ratio/high_mean": 0.0009001021589938318, + "clip_ratio/low_mean": 0.001011189477139851, + "clip_ratio/low_min": 1.4920028661435936e-05, + "clip_ratio/region_mean": 0.0019112915979349054, + "epoch": 7.167930029154519, + "grad_norm": 0.15941566228866577, + "learning_rate": 1e-06, + "loss": -0.0075, + "step": 690 + }, + { + "clip_ratio/high_max": 0.0023595267775817774, + "clip_ratio/high_mean": 0.0009081766766030341, + "clip_ratio/low_mean": 0.0007945056386233773, + "clip_ratio/low_min": 3.2765401556389406e-05, + "clip_ratio/region_mean": 0.001702682362520136, + "epoch": 7.1772594752186585, + "grad_norm": 0.13080213963985443, + "learning_rate": 1e-06, + "loss": 0.0068, + "step": 691 + }, + { + "clip_ratio/high_max": 0.0022986296971794218, + "clip_ratio/high_mean": 0.0010158856930502225, + "clip_ratio/low_mean": 0.0010339694599679206, + "clip_ratio/low_min": 6.743405174347572e-05, + "clip_ratio/region_mean": 0.00204985513119027, + "epoch": 7.186588921282799, + "grad_norm": 0.1372389942407608, + "learning_rate": 1e-06, + "loss": 0.0044, + "step": 692 + }, + { + "clip_ratio/high_max": 0.0023204920944408514, + "clip_ratio/high_mean": 0.0008961426792666316, + "clip_ratio/low_mean": 0.0010075816626340384, + "clip_ratio/low_min": 1.6525647879461758e-05, + "clip_ratio/region_mean": 0.0019037243546335958, + "epoch": 7.1959183673469385, + "grad_norm": 0.1330796182155609, + "learning_rate": 1e-06, + "loss": 0.022, + "step": 693 + }, + { + "clip_ratio/high_max": 0.0028003066036035307, + "clip_ratio/high_mean": 0.000988101564871613, + "clip_ratio/low_mean": 0.0009583205028320663, + "clip_ratio/low_min": 1.648641591600608e-05, + "clip_ratio/region_mean": 0.001946422089531552, + "epoch": 7.205247813411079, + "grad_norm": 0.1313985288143158, + "learning_rate": 1e-06, + "loss": -0.0274, + "step": 694 + }, + { + "clip_ratio/high_max": 0.002214182008174248, + "clip_ratio/high_mean": 0.0010327479685656726, + "clip_ratio/low_mean": 0.0008715986059542047, + "clip_ratio/low_min": 2.9267150239320472e-05, + "clip_ratio/region_mean": 0.0019043465363210998, + "epoch": 7.214577259475218, + "grad_norm": 0.13056787848472595, + "learning_rate": 1e-06, + "loss": -0.0765, + "step": 695 + }, + { + "clip_ratio/high_max": 0.0021691152069251984, + "clip_ratio/high_mean": 0.0009348489729745779, + "clip_ratio/low_mean": 0.000981265957307187, + "clip_ratio/low_min": 1.5683814126532525e-05, + "clip_ratio/region_mean": 0.0019161148666171357, + "epoch": 7.223906705539359, + "grad_norm": 0.12841342389583588, + "learning_rate": 1e-06, + "loss": -0.0112, + "step": 696 + }, + { + "clip_ratio/high_max": 0.0021718939606216736, + "clip_ratio/high_mean": 0.0009283142389904242, + "clip_ratio/low_mean": 0.0007290018111234531, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.001657316075579729, + "epoch": 7.233236151603498, + "grad_norm": 0.13874033093452454, + "learning_rate": 1e-06, + "loss": -0.0493, + "step": 697 + }, + { + "clip_ratio/high_max": 0.002405275768978754, + "clip_ratio/high_mean": 0.0009633324098103913, + "clip_ratio/low_mean": 0.0007987799581314903, + "clip_ratio/low_min": 5.313033307174919e-05, + "clip_ratio/region_mean": 0.0017621123333810829, + "epoch": 7.242565597667639, + "grad_norm": 0.12721934914588928, + "learning_rate": 1e-06, + "loss": -0.047, + "step": 698 + }, + { + "clip_ratio/high_max": 0.0022063128199079074, + "clip_ratio/high_mean": 0.0008847082754073199, + "clip_ratio/low_mean": 0.0009335355862276629, + "clip_ratio/low_min": 1.8712575183599256e-05, + "clip_ratio/region_mean": 0.001818243857997004, + "epoch": 7.251895043731778, + "grad_norm": 0.1464638113975525, + "learning_rate": 1e-06, + "loss": -0.0474, + "step": 699 + }, + { + "clip_ratio/high_max": 0.002669274304935243, + "clip_ratio/high_mean": 0.0010141936545551289, + "clip_ratio/low_mean": 0.0008192513323592721, + "clip_ratio/low_min": 3.173394361510873e-05, + "clip_ratio/region_mean": 0.0018334450287511572, + "epoch": 7.261224489795918, + "grad_norm": 0.12942858040332794, + "learning_rate": 1e-06, + "loss": -0.0304, + "step": 700 + }, + { + "clip_ratio/high_max": 0.0025161706580547616, + "clip_ratio/high_mean": 0.0010398143494967371, + "clip_ratio/low_mean": 0.0010020996051025577, + "clip_ratio/low_min": 4.560462821245892e-05, + "clip_ratio/region_mean": 0.0020419140200829133, + "epoch": 7.270553935860058, + "grad_norm": 0.11450876295566559, + "learning_rate": 1e-06, + "loss": -0.0262, + "step": 701 + }, + { + "clip_ratio/high_max": 0.0022585059705306776, + "clip_ratio/high_mean": 0.0009332661811640719, + "clip_ratio/low_mean": 0.0011183156238985248, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0020515818177955225, + "epoch": 7.279883381924198, + "grad_norm": 0.13772554695606232, + "learning_rate": 1e-06, + "loss": 0.0031, + "step": 702 + }, + { + "clip_ratio/high_max": 0.0026186193281318992, + "clip_ratio/high_mean": 0.0010648052593751345, + "clip_ratio/low_mean": 0.001090158299120958, + "clip_ratio/low_min": 8.075695222942159e-05, + "clip_ratio/region_mean": 0.002154963578504976, + "epoch": 7.289212827988338, + "grad_norm": 0.18790072202682495, + "learning_rate": 1e-06, + "loss": 0.0152, + "step": 703 + }, + { + "clip_ratio/high_max": 0.002458573682815768, + "clip_ratio/high_mean": 0.0010990938244503923, + "clip_ratio/low_mean": 0.0008878713979356689, + "clip_ratio/low_min": 2.874884921766352e-05, + "clip_ratio/region_mean": 0.0019869652242050506, + "epoch": 7.298542274052478, + "grad_norm": 0.13063327968120575, + "learning_rate": 1e-06, + "loss": -0.0327, + "step": 704 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0545479910714286, + "completions/max_length": 4096.0, + "completions/max_terminated_length": 4090.0, + "completions/mean_length": 749.7044067382812, + "completions/mean_terminated_length": 556.639404296875, + "completions/min_length": 3.0, + "completions/min_terminated_length": 3.0, + "epoch": 7.307871720116618, + "grad_norm": 0.1654537320137024, + "learning_rate": 1e-06, + "loss": -0.0528, + "num_tokens": 421575001.0, + "reward": 0.6293247938156128, + "reward_std": 0.16807661950588226, + "rewards/simpleverify_reward/mean": 0.6293247938156128, + "rewards/simpleverify_reward/std": 0.48299407958984375, + "step": 705 + }, + { + "clip_ratio/high_max": 0.0018265460385009646, + "clip_ratio/high_mean": 0.0007896211682236753, + "clip_ratio/low_mean": 0.0004435829960129922, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0012332041915215086, + "epoch": 7.317201166180758, + "grad_norm": 0.13172782957553864, + "learning_rate": 1e-06, + "loss": -0.0518, + "step": 706 + }, + { + "clip_ratio/high_max": 0.0019675174025906017, + "clip_ratio/high_mean": 0.000811134652394685, + "clip_ratio/low_mean": 0.0005115683743497357, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.001322703055848251, + "epoch": 7.326530612244898, + "grad_norm": 0.1299964338541031, + "learning_rate": 1e-06, + "loss": -0.0396, + "step": 707 + }, + { + "clip_ratio/high_max": 0.0019363403334864415, + "clip_ratio/high_mean": 0.0007731013447482837, + "clip_ratio/low_mean": 0.0005323018531271373, + "clip_ratio/low_min": 2.1118432414368726e-05, + "clip_ratio/region_mean": 0.001305403231526725, + "epoch": 7.335860058309038, + "grad_norm": 0.15011441707611084, + "learning_rate": 1e-06, + "loss": -0.0005, + "step": 708 + }, + { + "clip_ratio/high_max": 0.0020807597538805567, + "clip_ratio/high_mean": 0.0007859736288082786, + "clip_ratio/low_mean": 0.0004989729723092751, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.001284946600208059, + "epoch": 7.345189504373177, + "grad_norm": 0.12670396268367767, + "learning_rate": 1e-06, + "loss": -0.0189, + "step": 709 + }, + { + "clip_ratio/high_max": 0.00171028740805923, + "clip_ratio/high_mean": 0.0006777316666557454, + "clip_ratio/low_mean": 0.0005668956837325823, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0012446273758541793, + "epoch": 7.354518950437318, + "grad_norm": 0.12118559330701828, + "learning_rate": 1e-06, + "loss": -0.0101, + "step": 710 + }, + { + "clip_ratio/high_max": 0.0021373688505264, + "clip_ratio/high_mean": 0.0007869376004236983, + "clip_ratio/low_mean": 0.0005991235275359941, + "clip_ratio/low_min": 5.3041017963550985e-05, + "clip_ratio/region_mean": 0.001386061132507166, + "epoch": 7.363848396501457, + "grad_norm": 0.15470343828201294, + "learning_rate": 1e-06, + "loss": -0.0066, + "step": 711 + }, + { + "clip_ratio/high_max": 0.0020572402245306876, + "clip_ratio/high_mean": 0.0008137016102409689, + "clip_ratio/low_mean": 0.0005353947508410783, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0013490963719959836, + "epoch": 7.373177842565598, + "grad_norm": 0.12657277286052704, + "learning_rate": 1e-06, + "loss": -0.0631, + "step": 712 + }, + { + "clip_ratio/high_max": 0.002108126394887222, + "clip_ratio/high_mean": 0.0008721666526980698, + "clip_ratio/low_mean": 0.0005689751374120533, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0014411418087547645, + "epoch": 7.382507288629737, + "grad_norm": 0.1506998986005783, + "learning_rate": 1e-06, + "loss": -0.0117, + "step": 713 + }, + { + "clip_ratio/high_max": 0.0021407490348792635, + "clip_ratio/high_mean": 0.0007874676266510505, + "clip_ratio/low_mean": 0.0006852654159956728, + "clip_ratio/low_min": 3.16756540996721e-05, + "clip_ratio/region_mean": 0.0014727330490131862, + "epoch": 7.391836734693878, + "grad_norm": 0.14170649647712708, + "learning_rate": 1e-06, + "loss": 0.0095, + "step": 714 + }, + { + "clip_ratio/high_max": 0.001763935240887804, + "clip_ratio/high_mean": 0.0008219140472647268, + "clip_ratio/low_mean": 0.0006489899096777663, + "clip_ratio/low_min": 2.61451586993644e-05, + "clip_ratio/region_mean": 0.001470903956942493, + "epoch": 7.401166180758017, + "grad_norm": 0.13472793996334076, + "learning_rate": 1e-06, + "loss": 0.0177, + "step": 715 + }, + { + "clip_ratio/high_max": 0.0021679992059944198, + "clip_ratio/high_mean": 0.0008356792923223111, + "clip_ratio/low_mean": 0.0007308494523385889, + "clip_ratio/low_min": 3.1248534469341394e-05, + "clip_ratio/region_mean": 0.0015665287464798894, + "epoch": 7.410495626822158, + "grad_norm": 0.14047972857952118, + "learning_rate": 1e-06, + "loss": -0.012, + "step": 716 + }, + { + "clip_ratio/high_max": 0.0022411099416785873, + "clip_ratio/high_mean": 0.0007892521362009575, + "clip_ratio/low_mean": 0.0007932402222650126, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0015824923502805177, + "epoch": 7.419825072886297, + "grad_norm": 0.13195101916790009, + "learning_rate": 1e-06, + "loss": 0.0245, + "step": 717 + }, + { + "clip_ratio/high_max": 0.002300900043337606, + "clip_ratio/high_mean": 0.0009294850769947516, + "clip_ratio/low_mean": 0.0006163782636576798, + "clip_ratio/low_min": 1.4884496522427071e-05, + "clip_ratio/region_mean": 0.0015458633279195055, + "epoch": 7.429154518950437, + "grad_norm": 0.12871462106704712, + "learning_rate": 1e-06, + "loss": -0.0491, + "step": 718 + }, + { + "clip_ratio/high_max": 0.002381900525506353, + "clip_ratio/high_mean": 0.0010049530519609107, + "clip_ratio/low_mean": 0.0007968530899233883, + "clip_ratio/low_min": 1.3516436411009636e-05, + "clip_ratio/region_mean": 0.0018018061346083414, + "epoch": 7.438483965014577, + "grad_norm": 0.13149583339691162, + "learning_rate": 1e-06, + "loss": -0.0338, + "step": 719 + }, + { + "clip_ratio/high_max": 0.0022384045259968843, + "clip_ratio/high_mean": 0.0009122575393121224, + "clip_ratio/low_mean": 0.0008931772263167659, + "clip_ratio/low_min": 4.29996543971356e-05, + "clip_ratio/region_mean": 0.0018054347456200048, + "epoch": 7.447813411078717, + "grad_norm": 0.1355581134557724, + "learning_rate": 1e-06, + "loss": -0.0048, + "step": 720 + }, + { + "clip_ratio/high_max": 0.0022939459486224223, + "clip_ratio/high_mean": 0.0008962302754298435, + "clip_ratio/low_mean": 0.0007868133252486587, + "clip_ratio/low_min": 2.021999353019055e-05, + "clip_ratio/region_mean": 0.0016830435997690074, + "epoch": 7.457142857142857, + "grad_norm": 0.12592262029647827, + "learning_rate": 1e-06, + "loss": -0.0571, + "step": 721 + }, + { + "clip_ratio/high_max": 0.0022696035593980923, + "clip_ratio/high_mean": 0.0008649930314277299, + "clip_ratio/low_mean": 0.00077668963058386, + "clip_ratio/low_min": 3.1017370929475874e-05, + "clip_ratio/region_mean": 0.0016416826292697806, + "epoch": 7.466472303206997, + "grad_norm": 0.13758066296577454, + "learning_rate": 1e-06, + "loss": -0.0146, + "step": 722 + }, + { + "clip_ratio/high_max": 0.002263756272441242, + "clip_ratio/high_mean": 0.0008887717467587208, + "clip_ratio/low_mean": 0.0009162615606328472, + "clip_ratio/low_min": 3.6778646062884945e-05, + "clip_ratio/region_mean": 0.001805033316486515, + "epoch": 7.475801749271137, + "grad_norm": 0.1429128646850586, + "learning_rate": 1e-06, + "loss": -0.0103, + "step": 723 + }, + { + "clip_ratio/high_max": 0.002174385757825803, + "clip_ratio/high_mean": 0.0008165840026777005, + "clip_ratio/low_mean": 0.001072266481060069, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0018888504710048437, + "epoch": 7.485131195335277, + "grad_norm": 0.1299685388803482, + "learning_rate": 1e-06, + "loss": -0.0031, + "step": 724 + }, + { + "clip_ratio/high_max": 0.0025791304724407382, + "clip_ratio/high_mean": 0.0009592306014383212, + "clip_ratio/low_mean": 0.0008740468474570662, + "clip_ratio/low_min": 9.472469355387148e-05, + "clip_ratio/region_mean": 0.0018332774852751754, + "epoch": 7.494460641399417, + "grad_norm": 0.13293859362602234, + "learning_rate": 1e-06, + "loss": -0.0253, + "step": 725 + }, + { + "clip_ratio/high_max": 0.0021132149777258746, + "clip_ratio/high_mean": 0.0008707468296051957, + "clip_ratio/low_mean": 0.0008908439904189436, + "clip_ratio/low_min": 5.6357079301960766e-05, + "clip_ratio/region_mean": 0.0017615908582229167, + "epoch": 7.503790087463557, + "grad_norm": 0.16601741313934326, + "learning_rate": 1e-06, + "loss": -0.0005, + "step": 726 + }, + { + "clip_ratio/high_max": 0.00237304413531092, + "clip_ratio/high_mean": 0.0009437441749469144, + "clip_ratio/low_mean": 0.0008811013576632831, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0018248454907734413, + "epoch": 7.513119533527696, + "grad_norm": 0.12976625561714172, + "learning_rate": 1e-06, + "loss": 0.0076, + "step": 727 + }, + { + "clip_ratio/high_max": 0.0020073395935469307, + "clip_ratio/high_mean": 0.000853819359690533, + "clip_ratio/low_mean": 0.0010332491765439045, + "clip_ratio/low_min": 0.00010556827328400686, + "clip_ratio/region_mean": 0.0018870685453293845, + "epoch": 7.522448979591837, + "grad_norm": 0.1306147277355194, + "learning_rate": 1e-06, + "loss": 0.0245, + "step": 728 + }, + { + "clip_ratio/high_max": 0.0022646796387562063, + "clip_ratio/high_mean": 0.0009346798888145713, + "clip_ratio/low_mean": 0.0009723316361487377, + "clip_ratio/low_min": 3.4032127587124705e-05, + "clip_ratio/region_mean": 0.001907011515868362, + "epoch": 7.531778425655976, + "grad_norm": 0.1261693239212036, + "learning_rate": 1e-06, + "loss": 0.0064, + "step": 729 + }, + { + "clip_ratio/high_max": 0.0021227979741524905, + "clip_ratio/high_mean": 0.0009229148636222817, + "clip_ratio/low_mean": 0.0009381707404827466, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.001861085562268272, + "epoch": 7.541107871720117, + "grad_norm": 0.1381930261850357, + "learning_rate": 1e-06, + "loss": -0.0369, + "step": 730 + }, + { + "clip_ratio/high_max": 0.002701890691241715, + "clip_ratio/high_mean": 0.0010717016557464376, + "clip_ratio/low_mean": 0.0009449970402783947, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0020166987233096734, + "epoch": 7.550437317784256, + "grad_norm": 0.12776899337768555, + "learning_rate": 1e-06, + "loss": -0.009, + "step": 731 + }, + { + "clip_ratio/high_max": 0.0025922139902831987, + "clip_ratio/high_mean": 0.0010527947997616138, + "clip_ratio/low_mean": 0.0009150450914603425, + "clip_ratio/low_min": 7.38550370442681e-05, + "clip_ratio/region_mean": 0.0019678398821270093, + "epoch": 7.559766763848397, + "grad_norm": 0.12704047560691833, + "learning_rate": 1e-06, + "loss": -0.0294, + "step": 732 + }, + { + "clip_ratio/high_max": 0.0025765405007405207, + "clip_ratio/high_mean": 0.0010714326563174836, + "clip_ratio/low_mean": 0.000773973153627594, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0018454057790222578, + "epoch": 7.569096209912536, + "grad_norm": 0.12981204688549042, + "learning_rate": 1e-06, + "loss": -0.0459, + "step": 733 + }, + { + "clip_ratio/high_max": 0.0025848354889603797, + "clip_ratio/high_mean": 0.0010267427805956686, + "clip_ratio/low_mean": 0.0009916157578118145, + "clip_ratio/low_min": 7.33508177290787e-05, + "clip_ratio/region_mean": 0.002018358551140409, + "epoch": 7.578425655976677, + "grad_norm": 0.12835033237934113, + "learning_rate": 1e-06, + "loss": -0.0476, + "step": 734 + }, + { + "clip_ratio/high_max": 0.0029205042446847074, + "clip_ratio/high_mean": 0.0010689858536352403, + "clip_ratio/low_mean": 0.0009311280464316951, + "clip_ratio/low_min": 3.9735346945235506e-05, + "clip_ratio/region_mean": 0.002000113898247946, + "epoch": 7.587755102040816, + "grad_norm": 0.13488556444644928, + "learning_rate": 1e-06, + "loss": -0.0107, + "step": 735 + }, + { + "clip_ratio/high_max": 0.0026054226036649197, + "clip_ratio/high_mean": 0.0011152154329465702, + "clip_ratio/low_mean": 0.0009277506087528309, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0020429660435183905, + "epoch": 7.597084548104956, + "grad_norm": 0.14065741002559662, + "learning_rate": 1e-06, + "loss": -0.0273, + "step": 736 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0523507254464286, + "completions/max_length": 4096.0, + "completions/max_terminated_length": 4093.0, + "completions/mean_length": 726.693115234375, + "completions/mean_terminated_length": 540.5634155273438, + "completions/min_length": 44.0, + "completions/min_terminated_length": 44.0, + "epoch": 7.606413994169096, + "grad_norm": 0.14061662554740906, + "learning_rate": 1e-06, + "loss": -0.0664, + "num_tokens": 439141330.0, + "reward": 0.6386370062828064, + "reward_std": 0.1572299748659134, + "rewards/simpleverify_reward/mean": 0.6386370062828064, + "rewards/simpleverify_reward/std": 0.4804038405418396, + "step": 737 + }, + { + "clip_ratio/high_max": 0.0021191291889408603, + "clip_ratio/high_mean": 0.000694068457960384, + "clip_ratio/low_mean": 0.0005553743349082652, + "clip_ratio/low_min": 2.4898302399378736e-05, + "clip_ratio/region_mean": 0.0012494427974161226, + "epoch": 7.615743440233236, + "grad_norm": 0.13872262835502625, + "learning_rate": 1e-06, + "loss": -0.0296, + "step": 738 + }, + { + "clip_ratio/high_max": 0.001839825723436661, + "clip_ratio/high_mean": 0.0007257124880197807, + "clip_ratio/low_mean": 0.0005697241695088451, + "clip_ratio/low_min": 1.4695508980366867e-05, + "clip_ratio/region_mean": 0.0012954366466146894, + "epoch": 7.625072886297376, + "grad_norm": 0.1459311693906784, + "learning_rate": 1e-06, + "loss": 0.0056, + "step": 739 + }, + { + "clip_ratio/high_max": 0.0017843237947090529, + "clip_ratio/high_mean": 0.0007224646633403609, + "clip_ratio/low_mean": 0.00046399886696235626, + "clip_ratio/low_min": 1.4387661394721363e-05, + "clip_ratio/region_mean": 0.0011864635271194857, + "epoch": 7.634402332361516, + "grad_norm": 0.13359272480010986, + "learning_rate": 1e-06, + "loss": -0.0104, + "step": 740 + }, + { + "clip_ratio/high_max": 0.0019549584576452617, + "clip_ratio/high_mean": 0.0007238358693939517, + "clip_ratio/low_mean": 0.0005262705772111076, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.001250106462975964, + "epoch": 7.643731778425656, + "grad_norm": 0.13564907014369965, + "learning_rate": 1e-06, + "loss": 0.0085, + "step": 741 + }, + { + "clip_ratio/high_max": 0.0019569512478483375, + "clip_ratio/high_mean": 0.0007869689179642592, + "clip_ratio/low_mean": 0.0005607096295534575, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0013476785497914534, + "epoch": 7.653061224489796, + "grad_norm": 0.13925661146640778, + "learning_rate": 1e-06, + "loss": -0.013, + "step": 742 + }, + { + "clip_ratio/high_max": 0.002155585403670557, + "clip_ratio/high_mean": 0.0007721753099758644, + "clip_ratio/low_mean": 0.0006401544342224952, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0014123297041805927, + "epoch": 7.662390670553936, + "grad_norm": 0.1261594295501709, + "learning_rate": 1e-06, + "loss": 0.0021, + "step": 743 + }, + { + "clip_ratio/high_max": 0.0020438058563740924, + "clip_ratio/high_mean": 0.0007585656567243859, + "clip_ratio/low_mean": 0.000594217734033009, + "clip_ratio/low_min": 5.420122579380404e-05, + "clip_ratio/region_mean": 0.0013527834016713314, + "epoch": 7.671720116618076, + "grad_norm": 0.14886973798274994, + "learning_rate": 1e-06, + "loss": 0.0097, + "step": 744 + }, + { + "clip_ratio/high_max": 0.002141269600542728, + "clip_ratio/high_mean": 0.0007296354851860087, + "clip_ratio/low_mean": 0.0005828428238601191, + "clip_ratio/low_min": 3.678119173855521e-05, + "clip_ratio/region_mean": 0.0013124783035891596, + "epoch": 7.681049562682215, + "grad_norm": 0.1279527097940445, + "learning_rate": 1e-06, + "loss": -0.0107, + "step": 745 + }, + { + "clip_ratio/high_max": 0.0018894861641456373, + "clip_ratio/high_mean": 0.0008073787248576991, + "clip_ratio/low_mean": 0.0006899479803905706, + "clip_ratio/low_min": 1.3778659194940701e-05, + "clip_ratio/region_mean": 0.001497326695243828, + "epoch": 7.690379008746356, + "grad_norm": 0.12833702564239502, + "learning_rate": 1e-06, + "loss": -0.0189, + "step": 746 + }, + { + "clip_ratio/high_max": 0.0021815282816533, + "clip_ratio/high_mean": 0.0008724681774765486, + "clip_ratio/low_mean": 0.00075677987842937, + "clip_ratio/low_min": 6.36046752333641e-05, + "clip_ratio/region_mean": 0.0016292480286210775, + "epoch": 7.699708454810495, + "grad_norm": 0.14017045497894287, + "learning_rate": 1e-06, + "loss": -0.0239, + "step": 747 + }, + { + "clip_ratio/high_max": 0.002242789210868068, + "clip_ratio/high_mean": 0.000980944896582514, + "clip_ratio/low_mean": 0.0006289490929702879, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0016098940395750105, + "epoch": 7.709037900874636, + "grad_norm": 0.1378800868988037, + "learning_rate": 1e-06, + "loss": -0.069, + "step": 748 + }, + { + "clip_ratio/high_max": 0.0022026304941391572, + "clip_ratio/high_mean": 0.0008512298409186769, + "clip_ratio/low_mean": 0.0007073326523823198, + "clip_ratio/low_min": 5.845650503033539e-05, + "clip_ratio/region_mean": 0.0015585625224048272, + "epoch": 7.718367346938775, + "grad_norm": 0.15025298297405243, + "learning_rate": 1e-06, + "loss": -0.0259, + "step": 749 + }, + { + "clip_ratio/high_max": 0.0021277290979924146, + "clip_ratio/high_mean": 0.0008715810854482697, + "clip_ratio/low_mean": 0.0008244460714195156, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0016960271932475735, + "epoch": 7.727696793002916, + "grad_norm": 0.12647944688796997, + "learning_rate": 1e-06, + "loss": -0.0316, + "step": 750 + }, + { + "clip_ratio/high_max": 0.0019349800022609998, + "clip_ratio/high_mean": 0.0007705838361289352, + "clip_ratio/low_mean": 0.0007449139393429505, + "clip_ratio/low_min": 1.825883737183176e-05, + "clip_ratio/region_mean": 0.0015154977700149175, + "epoch": 7.737026239067055, + "grad_norm": 0.12538565695285797, + "learning_rate": 1e-06, + "loss": -0.0166, + "step": 751 + }, + { + "clip_ratio/high_max": 0.002175618767068954, + "clip_ratio/high_mean": 0.0007867826789151877, + "clip_ratio/low_mean": 0.0006965943339309888, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0014833769891993143, + "epoch": 7.746355685131196, + "grad_norm": 0.11999280750751495, + "learning_rate": 1e-06, + "loss": -0.0122, + "step": 752 + }, + { + "clip_ratio/high_max": 0.002342407766263932, + "clip_ratio/high_mean": 0.0008849341065797489, + "clip_ratio/low_mean": 0.0007787324575474486, + "clip_ratio/low_min": 6.030642907717265e-05, + "clip_ratio/region_mean": 0.0016636665386613458, + "epoch": 7.755685131195335, + "grad_norm": 0.12813863158226013, + "learning_rate": 1e-06, + "loss": 0.0155, + "step": 753 + }, + { + "clip_ratio/high_max": 0.002402739835815737, + "clip_ratio/high_mean": 0.0009222707431035815, + "clip_ratio/low_mean": 0.0007032123385215527, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0016254830807156395, + "epoch": 7.765014577259475, + "grad_norm": 0.12236373871564865, + "learning_rate": 1e-06, + "loss": -0.0404, + "step": 754 + }, + { + "clip_ratio/high_max": 0.0023693379189353436, + "clip_ratio/high_mean": 0.0009158612647297559, + "clip_ratio/low_mean": 0.0006638583918174845, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.001579719697474502, + "epoch": 7.774344023323615, + "grad_norm": 0.14664119482040405, + "learning_rate": 1e-06, + "loss": -0.02, + "step": 755 + }, + { + "clip_ratio/high_max": 0.0024966469463834073, + "clip_ratio/high_mean": 0.0009141220871242695, + "clip_ratio/low_mean": 0.0008772214750933927, + "clip_ratio/low_min": 4.241121860104613e-05, + "clip_ratio/region_mean": 0.0017913435731315985, + "epoch": 7.783673469387755, + "grad_norm": 0.1441788375377655, + "learning_rate": 1e-06, + "loss": -0.0246, + "step": 756 + }, + { + "clip_ratio/high_max": 0.002183746179071022, + "clip_ratio/high_mean": 0.0008900575940060662, + "clip_ratio/low_mean": 0.0007474913682017359, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0016375489758502226, + "epoch": 7.793002915451895, + "grad_norm": 0.1311672180891037, + "learning_rate": 1e-06, + "loss": -0.0248, + "step": 757 + }, + { + "clip_ratio/high_max": 0.0023040065098030027, + "clip_ratio/high_mean": 0.0009115982938965317, + "clip_ratio/low_mean": 0.0009188011445075972, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0018303994220332243, + "epoch": 7.802332361516035, + "grad_norm": 0.12278234958648682, + "learning_rate": 1e-06, + "loss": -0.0019, + "step": 758 + }, + { + "clip_ratio/high_max": 0.0024888798579922877, + "clip_ratio/high_mean": 0.0009157267086266074, + "clip_ratio/low_mean": 0.0009965313620341476, + "clip_ratio/low_min": 3.1063114874996245e-05, + "clip_ratio/region_mean": 0.0019122580779367127, + "epoch": 7.811661807580175, + "grad_norm": 0.16130737960338593, + "learning_rate": 1e-06, + "loss": -0.0011, + "step": 759 + }, + { + "clip_ratio/high_max": 0.0022073256404837593, + "clip_ratio/high_mean": 0.0008899613239918835, + "clip_ratio/low_mean": 0.0007628938619745895, + "clip_ratio/low_min": 1.6237983800238e-05, + "clip_ratio/region_mean": 0.0016528551605006214, + "epoch": 7.820991253644315, + "grad_norm": 0.14124083518981934, + "learning_rate": 1e-06, + "loss": -0.0324, + "step": 760 + }, + { + "clip_ratio/high_max": 0.0023219711292767897, + "clip_ratio/high_mean": 0.0009657984555815347, + "clip_ratio/low_mean": 0.0008797907903499436, + "clip_ratio/low_min": 2.1922132873442024e-05, + "clip_ratio/region_mean": 0.0018455891840858385, + "epoch": 7.830320699708455, + "grad_norm": 0.13350093364715576, + "learning_rate": 1e-06, + "loss": -0.0612, + "step": 761 + }, + { + "clip_ratio/high_max": 0.0023900306259747595, + "clip_ratio/high_mean": 0.000912744899324025, + "clip_ratio/low_mean": 0.0008640388550702482, + "clip_ratio/low_min": 4.823771450901404e-05, + "clip_ratio/region_mean": 0.0017767837634892203, + "epoch": 7.839650145772595, + "grad_norm": 0.1436462700366974, + "learning_rate": 1e-06, + "loss": 0.0099, + "step": 762 + }, + { + "clip_ratio/high_max": 0.0022774578974349424, + "clip_ratio/high_mean": 0.0009866814325505402, + "clip_ratio/low_mean": 0.0009157908698398387, + "clip_ratio/low_min": 7.198559251264669e-05, + "clip_ratio/region_mean": 0.00190247232967522, + "epoch": 7.848979591836734, + "grad_norm": 0.15024107694625854, + "learning_rate": 1e-06, + "loss": -0.0429, + "step": 763 + }, + { + "clip_ratio/high_max": 0.0024024269805522636, + "clip_ratio/high_mean": 0.0009643874273024267, + "clip_ratio/low_mean": 0.0010378498391219182, + "clip_ratio/low_min": 4.0650407754583284e-05, + "clip_ratio/region_mean": 0.002002237299166154, + "epoch": 7.858309037900875, + "grad_norm": 0.14950677752494812, + "learning_rate": 1e-06, + "loss": 0.0092, + "step": 764 + }, + { + "clip_ratio/high_max": 0.0025013785816554446, + "clip_ratio/high_mean": 0.0010364690988353686, + "clip_ratio/low_mean": 0.0008471385317534441, + "clip_ratio/low_min": 3.644208936748328e-05, + "clip_ratio/region_mean": 0.0018836076342267916, + "epoch": 7.867638483965014, + "grad_norm": 0.1644466072320938, + "learning_rate": 1e-06, + "loss": -0.0366, + "step": 765 + }, + { + "clip_ratio/high_max": 0.002225145770353265, + "clip_ratio/high_mean": 0.0009225417343259323, + "clip_ratio/low_mean": 0.000907451358216349, + "clip_ratio/low_min": 4.641090527002234e-05, + "clip_ratio/region_mean": 0.0018299930889043026, + "epoch": 7.876967930029155, + "grad_norm": 0.14500048756599426, + "learning_rate": 1e-06, + "loss": 0.0122, + "step": 766 + }, + { + "clip_ratio/high_max": 0.0023329905816353858, + "clip_ratio/high_mean": 0.0009116015207837336, + "clip_ratio/low_mean": 0.001174640399767668, + "clip_ratio/low_min": 9.06680324987974e-05, + "clip_ratio/region_mean": 0.0020862419478362426, + "epoch": 7.886297376093294, + "grad_norm": 0.14140664041042328, + "learning_rate": 1e-06, + "loss": 0.0479, + "step": 767 + }, + { + "clip_ratio/high_max": 0.002283850350067951, + "clip_ratio/high_mean": 0.0010036260773631511, + "clip_ratio/low_mean": 0.000938110258175584, + "clip_ratio/low_min": 3.4181022783741355e-05, + "clip_ratio/region_mean": 0.0019417363364482298, + "epoch": 7.895626822157435, + "grad_norm": 0.12925556302070618, + "learning_rate": 1e-06, + "loss": -0.0362, + "step": 768 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0570591517857143, + "completions/max_length": 4096.0, + "completions/max_terminated_length": 4073.0, + "completions/mean_length": 751.2974853515625, + "completions/mean_terminated_length": 548.903076171875, + "completions/min_length": 6.0, + "completions/min_terminated_length": 6.0, + "epoch": 8.00932944606414, + "grad_norm": 0.13675914704799652, + "learning_rate": 1e-06, + "loss": -0.0454, + "num_tokens": 456881505.0, + "reward": 0.6374163031578064, + "reward_std": 0.16118381917476654, + "rewards/simpleverify_reward/mean": 0.6374163031578064, + "rewards/simpleverify_reward/std": 0.48075446486473083, + "step": 769 + }, + { + "clip_ratio/high_max": 0.0016781171871116385, + "clip_ratio/high_mean": 0.0006622902747039916, + "clip_ratio/low_mean": 0.0004056672162278119, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0010679574879759457, + "epoch": 8.018658892128279, + "grad_norm": 0.12612979114055634, + "learning_rate": 1e-06, + "loss": -0.0346, + "step": 770 + }, + { + "clip_ratio/high_max": 0.0017111044380726526, + "clip_ratio/high_mean": 0.0006947221700102091, + "clip_ratio/low_mean": 0.0003641227492607868, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0010588449295028113, + "epoch": 8.02798833819242, + "grad_norm": 0.13261425495147705, + "learning_rate": 1e-06, + "loss": -0.0395, + "step": 771 + }, + { + "clip_ratio/high_max": 0.0014102601453487296, + "clip_ratio/high_mean": 0.0006350980161187181, + "clip_ratio/low_mean": 0.0005383820771385217, + "clip_ratio/low_min": 1.190249440696789e-05, + "clip_ratio/region_mean": 0.0011734801119018812, + "epoch": 8.03731778425656, + "grad_norm": 0.1318708062171936, + "learning_rate": 1e-06, + "loss": -0.0049, + "step": 772 + }, + { + "clip_ratio/high_max": 0.001980179651582148, + "clip_ratio/high_mean": 0.000854958598210942, + "clip_ratio/low_mean": 0.0005672808019880904, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0014222394020180218, + "epoch": 8.0466472303207, + "grad_norm": 0.13942308723926544, + "learning_rate": 1e-06, + "loss": -0.0381, + "step": 773 + }, + { + "clip_ratio/high_max": 0.0018664007293409668, + "clip_ratio/high_mean": 0.0008455624174530385, + "clip_ratio/low_mean": 0.0006330710912152426, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0014786335341341328, + "epoch": 8.055976676384839, + "grad_norm": 0.16271492838859558, + "learning_rate": 1e-06, + "loss": -0.0264, + "step": 774 + }, + { + "clip_ratio/high_max": 0.0018644443043740466, + "clip_ratio/high_mean": 0.0007480310832761461, + "clip_ratio/low_mean": 0.0005536467474485107, + "clip_ratio/low_min": 2.112557012878824e-05, + "clip_ratio/region_mean": 0.001301677868468687, + "epoch": 8.06530612244898, + "grad_norm": 0.12925513088703156, + "learning_rate": 1e-06, + "loss": -0.0489, + "step": 775 + }, + { + "clip_ratio/high_max": 0.0023161625758802984, + "clip_ratio/high_mean": 0.0009264021828130353, + "clip_ratio/low_mean": 0.0006490904725069413, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0015754926898807753, + "epoch": 8.07463556851312, + "grad_norm": 0.13870471715927124, + "learning_rate": 1e-06, + "loss": -0.033, + "step": 776 + }, + { + "clip_ratio/high_max": 0.0019738978226087056, + "clip_ratio/high_mean": 0.0007244862554216525, + "clip_ratio/low_mean": 0.0007899869851826224, + "clip_ratio/low_min": 6.817312169005163e-05, + "clip_ratio/region_mean": 0.0015144732169574127, + "epoch": 8.08396501457726, + "grad_norm": 0.12384528666734695, + "learning_rate": 1e-06, + "loss": 0.0128, + "step": 777 + }, + { + "clip_ratio/high_max": 0.0023334121251537, + "clip_ratio/high_mean": 0.0008317334923049202, + "clip_ratio/low_mean": 0.0006092061266826931, + "clip_ratio/low_min": 1.4541647033183835e-05, + "clip_ratio/region_mean": 0.0014409396462724544, + "epoch": 8.093294460641399, + "grad_norm": 0.1514134556055069, + "learning_rate": 1e-06, + "loss": -0.0476, + "step": 778 + }, + { + "clip_ratio/high_max": 0.00216744260978885, + "clip_ratio/high_mean": 0.0009052204914041795, + "clip_ratio/low_mean": 0.0006727804830006789, + "clip_ratio/low_min": 2.3656320990994573e-05, + "clip_ratio/region_mean": 0.00157800099987071, + "epoch": 8.102623906705539, + "grad_norm": 0.19655881822109222, + "learning_rate": 1e-06, + "loss": -0.0622, + "step": 779 + }, + { + "clip_ratio/high_max": 0.0019985899634775706, + "clip_ratio/high_mean": 0.0008344051639141981, + "clip_ratio/low_mean": 0.0007848602344893152, + "clip_ratio/low_min": 1.8502072634873912e-05, + "clip_ratio/region_mean": 0.0016192654002225026, + "epoch": 8.11195335276968, + "grad_norm": 0.1499754637479782, + "learning_rate": 1e-06, + "loss": 0.0097, + "step": 780 + }, + { + "clip_ratio/high_max": 0.001745296161971055, + "clip_ratio/high_mean": 0.0007103092029865365, + "clip_ratio/low_mean": 0.0007737592368357582, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0014840684198134113, + "epoch": 8.12128279883382, + "grad_norm": 0.12729890644550323, + "learning_rate": 1e-06, + "loss": -0.0037, + "step": 781 + }, + { + "clip_ratio/high_max": 0.0021704562386730686, + "clip_ratio/high_mean": 0.0008493876994180027, + "clip_ratio/low_mean": 0.0008668792506796308, + "clip_ratio/low_min": 3.071253013331443e-05, + "clip_ratio/region_mean": 0.00171626696101157, + "epoch": 8.130612244897959, + "grad_norm": 0.1464262455701828, + "learning_rate": 1e-06, + "loss": -0.0003, + "step": 782 + }, + { + "clip_ratio/high_max": 0.00218276544183027, + "clip_ratio/high_mean": 0.0009033630940393778, + "clip_ratio/low_mean": 0.0007663823653274449, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0016697454775567167, + "epoch": 8.139941690962099, + "grad_norm": 0.15434731543064117, + "learning_rate": 1e-06, + "loss": -0.0201, + "step": 783 + }, + { + "clip_ratio/high_max": 0.0021151314649614505, + "clip_ratio/high_mean": 0.0007573006714665098, + "clip_ratio/low_mean": 0.000899002931873838, + "clip_ratio/low_min": 9.366901849716669e-05, + "clip_ratio/region_mean": 0.0016563036188017577, + "epoch": 8.14927113702624, + "grad_norm": 0.27759093046188354, + "learning_rate": 1e-06, + "loss": 0.0238, + "step": 784 + }, + { + "clip_ratio/high_max": 0.002407892912742682, + "clip_ratio/high_mean": 0.0009699102156446315, + "clip_ratio/low_mean": 0.0008394020132982405, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0018093122307618614, + "epoch": 8.15860058309038, + "grad_norm": 0.12997399270534515, + "learning_rate": 1e-06, + "loss": -0.0494, + "step": 785 + }, + { + "clip_ratio/high_max": 0.0022966804062889423, + "clip_ratio/high_mean": 0.0009885221370495856, + "clip_ratio/low_mean": 0.0008676420920892269, + "clip_ratio/low_min": 6.781096453778446e-05, + "clip_ratio/region_mean": 0.0018561642282293178, + "epoch": 8.167930029154519, + "grad_norm": 0.12709791958332062, + "learning_rate": 1e-06, + "loss": -0.0519, + "step": 786 + }, + { + "clip_ratio/high_max": 0.0021771273750346154, + "clip_ratio/high_mean": 0.0009113561172853224, + "clip_ratio/low_mean": 0.0008371757976419758, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0017485319331171922, + "epoch": 8.177259475218658, + "grad_norm": 0.19155512750148773, + "learning_rate": 1e-06, + "loss": -0.0444, + "step": 787 + }, + { + "clip_ratio/high_max": 0.001957065673195757, + "clip_ratio/high_mean": 0.0007083869313646574, + "clip_ratio/low_mean": 0.0008589770914113615, + "clip_ratio/low_min": 5.75090161873959e-05, + "clip_ratio/region_mean": 0.0015673640082241036, + "epoch": 8.186588921282798, + "grad_norm": 0.1266499161720276, + "learning_rate": 1e-06, + "loss": 0.0401, + "step": 788 + }, + { + "clip_ratio/high_max": 0.0024794062628643587, + "clip_ratio/high_mean": 0.0008836291708576027, + "clip_ratio/low_mean": 0.0009431578437215649, + "clip_ratio/low_min": 3.724418820638675e-05, + "clip_ratio/region_mean": 0.0018267870036652312, + "epoch": 8.19591836734694, + "grad_norm": 0.1369238942861557, + "learning_rate": 1e-06, + "loss": -0.0126, + "step": 789 + }, + { + "clip_ratio/high_max": 0.0021670952482963912, + "clip_ratio/high_mean": 0.0008826126722851768, + "clip_ratio/low_mean": 0.0008767210147198057, + "clip_ratio/low_min": 1.9060689737671055e-05, + "clip_ratio/region_mean": 0.00175933372520376, + "epoch": 8.205247813411079, + "grad_norm": 0.13367097079753876, + "learning_rate": 1e-06, + "loss": -0.0295, + "step": 790 + }, + { + "clip_ratio/high_max": 0.0023366394598269835, + "clip_ratio/high_mean": 0.001017025560940965, + "clip_ratio/low_mean": 0.0008878636945155449, + "clip_ratio/low_min": 1.8701375665841624e-05, + "clip_ratio/region_mean": 0.0019048892863793299, + "epoch": 8.214577259475218, + "grad_norm": 0.14439278841018677, + "learning_rate": 1e-06, + "loss": -0.008, + "step": 791 + }, + { + "clip_ratio/high_max": 0.00213179298225441, + "clip_ratio/high_mean": 0.0008897867573978147, + "clip_ratio/low_mean": 0.0009545487991999835, + "clip_ratio/low_min": 4.5175278501119465e-05, + "clip_ratio/region_mean": 0.0018443355584167875, + "epoch": 8.223906705539358, + "grad_norm": 0.1411471962928772, + "learning_rate": 1e-06, + "loss": 0.0154, + "step": 792 + }, + { + "clip_ratio/high_max": 0.0025781976110010874, + "clip_ratio/high_mean": 0.0010088125764013967, + "clip_ratio/low_mean": 0.0006651837893514312, + "clip_ratio/low_min": 2.7734635295928456e-05, + "clip_ratio/region_mean": 0.001673996368481312, + "epoch": 8.2332361516035, + "grad_norm": 0.1256917268037796, + "learning_rate": 1e-06, + "loss": -0.0382, + "step": 793 + }, + { + "clip_ratio/high_max": 0.002320486633834662, + "clip_ratio/high_mean": 0.0009754740822245367, + "clip_ratio/low_mean": 0.0007639980030944571, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0017394720598531421, + "epoch": 8.242565597667639, + "grad_norm": 0.1294924020767212, + "learning_rate": 1e-06, + "loss": -0.0585, + "step": 794 + }, + { + "clip_ratio/high_max": 0.0025719086042954586, + "clip_ratio/high_mean": 0.0011508624993439298, + "clip_ratio/low_mean": 0.0007402381897918531, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.001891100691864267, + "epoch": 8.251895043731778, + "grad_norm": 0.14637455344200134, + "learning_rate": 1e-06, + "loss": -0.0483, + "step": 795 + }, + { + "clip_ratio/high_max": 0.0021692998343496583, + "clip_ratio/high_mean": 0.0009030763594637392, + "clip_ratio/low_mean": 0.000868905979586998, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0017719823736115359, + "epoch": 8.261224489795918, + "grad_norm": 0.12796564400196075, + "learning_rate": 1e-06, + "loss": -0.0128, + "step": 796 + }, + { + "clip_ratio/high_max": 0.002855377344531007, + "clip_ratio/high_mean": 0.0010504756482987432, + "clip_ratio/low_mean": 0.0009795163714443333, + "clip_ratio/low_min": 0.00010217772432952188, + "clip_ratio/region_mean": 0.0020299920142861083, + "epoch": 8.270553935860057, + "grad_norm": 0.14632874727249146, + "learning_rate": 1e-06, + "loss": 0.0078, + "step": 797 + }, + { + "clip_ratio/high_max": 0.0026055614071083255, + "clip_ratio/high_mean": 0.0009758277210494271, + "clip_ratio/low_mean": 0.0008346058320967131, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0018104335467796773, + "epoch": 8.279883381924199, + "grad_norm": 0.14142385125160217, + "learning_rate": 1e-06, + "loss": -0.0234, + "step": 798 + }, + { + "clip_ratio/high_max": 0.0023912920150905848, + "clip_ratio/high_mean": 0.0010060902204713784, + "clip_ratio/low_mean": 0.0007692072795180138, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0017752974963514134, + "epoch": 8.289212827988338, + "grad_norm": 0.13493581116199493, + "learning_rate": 1e-06, + "loss": -0.0447, + "step": 799 + }, + { + "clip_ratio/high_max": 0.0025832414685282856, + "clip_ratio/high_mean": 0.0009292196482419968, + "clip_ratio/low_mean": 0.0010997168410540326, + "clip_ratio/low_min": 5.557256918109488e-05, + "clip_ratio/region_mean": 0.002028936447459273, + "epoch": 8.298542274052478, + "grad_norm": 0.1228645071387291, + "learning_rate": 1e-06, + "loss": 0.007, + "step": 800 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0580008370535714, + "completions/max_length": 4096.0, + "completions/max_terminated_length": 4072.0, + "completions/mean_length": 747.948974609375, + "completions/mean_terminated_length": 541.802490234375, + "completions/min_length": 95.0, + "completions/min_terminated_length": 95.0, + "epoch": 8.307871720116617, + "grad_norm": 0.1516033560037613, + "learning_rate": 1e-06, + "loss": -0.0105, + "num_tokens": 474366680.0, + "reward": 0.6474260687828064, + "reward_std": 0.1622728705406189, + "rewards/simpleverify_reward/mean": 0.6474260687828064, + "rewards/simpleverify_reward/std": 0.4777797758579254, + "step": 801 + }, + { + "clip_ratio/high_max": 0.0017537986641400494, + "clip_ratio/high_mean": 0.0006630350926570827, + "clip_ratio/low_mean": 0.00047387506856466644, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0011369101484888233, + "epoch": 8.317201166180759, + "grad_norm": 0.14466431736946106, + "learning_rate": 1e-06, + "loss": 0.0116, + "step": 802 + }, + { + "clip_ratio/high_max": 0.0019687302847160026, + "clip_ratio/high_mean": 0.0008052143439272186, + "clip_ratio/low_mean": 0.0005499929629877442, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0013552073251048569, + "epoch": 8.326530612244898, + "grad_norm": 0.1408139318227768, + "learning_rate": 1e-06, + "loss": -0.0174, + "step": 803 + }, + { + "clip_ratio/high_max": 0.002125952061760472, + "clip_ratio/high_mean": 0.0007655736026208615, + "clip_ratio/low_mean": 0.00043082632464575, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0011963999022555072, + "epoch": 8.335860058309038, + "grad_norm": 0.12387403845787048, + "learning_rate": 1e-06, + "loss": -0.0215, + "step": 804 + }, + { + "clip_ratio/high_max": 0.0019115101858915295, + "clip_ratio/high_mean": 0.0008138671455526492, + "clip_ratio/low_mean": 0.0005153647325641941, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.001329231894487748, + "epoch": 8.345189504373177, + "grad_norm": 0.13465812802314758, + "learning_rate": 1e-06, + "loss": -0.0301, + "step": 805 + }, + { + "clip_ratio/high_max": 0.0018984133057529107, + "clip_ratio/high_mean": 0.0007280560739673092, + "clip_ratio/low_mean": 0.0006429082936847408, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0013709643790207338, + "epoch": 8.354518950437317, + "grad_norm": 0.14861463010311127, + "learning_rate": 1e-06, + "loss": 0.0125, + "step": 806 + }, + { + "clip_ratio/high_max": 0.0021903294109506533, + "clip_ratio/high_mean": 0.00087721385898476, + "clip_ratio/low_mean": 0.0005823174324177671, + "clip_ratio/low_min": 1.0235833542537875e-05, + "clip_ratio/region_mean": 0.0014595313004974741, + "epoch": 8.363848396501458, + "grad_norm": 0.13971786201000214, + "learning_rate": 1e-06, + "loss": -0.0437, + "step": 807 + }, + { + "clip_ratio/high_max": 0.0019012930497410707, + "clip_ratio/high_mean": 0.00080004764640762, + "clip_ratio/low_mean": 0.0007180351167335175, + "clip_ratio/low_min": 6.560621386597631e-05, + "clip_ratio/region_mean": 0.0015180827758740634, + "epoch": 8.373177842565598, + "grad_norm": 0.1959521472454071, + "learning_rate": 1e-06, + "loss": 0.0021, + "step": 808 + }, + { + "clip_ratio/high_max": 0.0021668398039764725, + "clip_ratio/high_mean": 0.0008851054335536901, + "clip_ratio/low_mean": 0.0007341410037042806, + "clip_ratio/low_min": 2.4323799152625725e-05, + "clip_ratio/region_mean": 0.0016192464827327058, + "epoch": 8.382507288629737, + "grad_norm": 0.16336694359779358, + "learning_rate": 1e-06, + "loss": -0.0438, + "step": 809 + }, + { + "clip_ratio/high_max": 0.002415544498944655, + "clip_ratio/high_mean": 0.0009118056823353982, + "clip_ratio/low_mean": 0.0005797651588181907, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0014915708525222726, + "epoch": 8.391836734693877, + "grad_norm": 0.1311698853969574, + "learning_rate": 1e-06, + "loss": -0.0214, + "step": 810 + }, + { + "clip_ratio/high_max": 0.0023332574783125892, + "clip_ratio/high_mean": 0.0008412513579969527, + "clip_ratio/low_mean": 0.0006205119052538066, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0014617633059970103, + "epoch": 8.401166180758018, + "grad_norm": 0.1968589723110199, + "learning_rate": 1e-06, + "loss": -0.025, + "step": 811 + }, + { + "clip_ratio/high_max": 0.0020871867855021264, + "clip_ratio/high_mean": 0.0008100756458588876, + "clip_ratio/low_mean": 0.0009123428335442441, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0017224184848600999, + "epoch": 8.410495626822158, + "grad_norm": 0.17827925086021423, + "learning_rate": 1e-06, + "loss": 0.0001, + "step": 812 + }, + { + "clip_ratio/high_max": 0.0022598059367737733, + "clip_ratio/high_mean": 0.0009513376862742007, + "clip_ratio/low_mean": 0.000689091301865119, + "clip_ratio/low_min": 3.802284572884673e-05, + "clip_ratio/region_mean": 0.0016404290072387084, + "epoch": 8.419825072886297, + "grad_norm": 0.12976522743701935, + "learning_rate": 1e-06, + "loss": -0.0408, + "step": 813 + }, + { + "clip_ratio/high_max": 0.002414627371763345, + "clip_ratio/high_mean": 0.0009501463482592953, + "clip_ratio/low_mean": 0.0007094226825756778, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0016595690321992151, + "epoch": 8.429154518950437, + "grad_norm": 0.13414177298545837, + "learning_rate": 1e-06, + "loss": -0.0379, + "step": 814 + }, + { + "clip_ratio/high_max": 0.0020716543585876934, + "clip_ratio/high_mean": 0.0009414587057108292, + "clip_ratio/low_mean": 0.0007051264256006107, + "clip_ratio/low_min": 2.5699013349367306e-05, + "clip_ratio/region_mean": 0.0016465851222164929, + "epoch": 8.438483965014576, + "grad_norm": 0.131557434797287, + "learning_rate": 1e-06, + "loss": -0.0194, + "step": 815 + }, + { + "clip_ratio/high_max": 0.0020102107628190424, + "clip_ratio/high_mean": 0.000899550527719839, + "clip_ratio/low_mean": 0.0007386505203612614, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0016382010471716058, + "epoch": 8.447813411078718, + "grad_norm": 0.14207573235034943, + "learning_rate": 1e-06, + "loss": -0.0376, + "step": 816 + }, + { + "clip_ratio/high_max": 0.0020864536418230273, + "clip_ratio/high_mean": 0.0008691776092746295, + "clip_ratio/low_mean": 0.0009405826713191345, + "clip_ratio/low_min": 1.6648908058414236e-05, + "clip_ratio/region_mean": 0.001809760280593764, + "epoch": 8.457142857142857, + "grad_norm": 0.13197103142738342, + "learning_rate": 1e-06, + "loss": -0.0235, + "step": 817 + }, + { + "clip_ratio/high_max": 0.0022347010344674345, + "clip_ratio/high_mean": 0.0008625889913673745, + "clip_ratio/low_mean": 0.0008217213689931668, + "clip_ratio/low_min": 3.0652281566290185e-05, + "clip_ratio/region_mean": 0.0016843103876453824, + "epoch": 8.466472303206997, + "grad_norm": 0.1277029812335968, + "learning_rate": 1e-06, + "loss": -0.0221, + "step": 818 + }, + { + "clip_ratio/high_max": 0.0024130923120537773, + "clip_ratio/high_mean": 0.001104865896195406, + "clip_ratio/low_mean": 0.0008431018450210104, + "clip_ratio/low_min": 1.2138279089413118e-05, + "clip_ratio/region_mean": 0.001947967779415194, + "epoch": 8.475801749271136, + "grad_norm": 0.15858964622020721, + "learning_rate": 1e-06, + "loss": -0.0702, + "step": 819 + }, + { + "clip_ratio/high_max": 0.0026420907088322565, + "clip_ratio/high_mean": 0.0010906054685619893, + "clip_ratio/low_mean": 0.000783914281782927, + "clip_ratio/low_min": 1.3368983672989998e-05, + "clip_ratio/region_mean": 0.0018745197376119904, + "epoch": 8.485131195335278, + "grad_norm": 0.15172161161899567, + "learning_rate": 1e-06, + "loss": -0.0132, + "step": 820 + }, + { + "clip_ratio/high_max": 0.002111362449795706, + "clip_ratio/high_mean": 0.0009811183545025415, + "clip_ratio/low_mean": 0.0009079625669983216, + "clip_ratio/low_min": 1.2178487850178499e-05, + "clip_ratio/region_mean": 0.0018890809296863154, + "epoch": 8.494460641399417, + "grad_norm": 0.14209824800491333, + "learning_rate": 1e-06, + "loss": -0.0104, + "step": 821 + }, + { + "clip_ratio/high_max": 0.0021146593426237814, + "clip_ratio/high_mean": 0.0009244757911801571, + "clip_ratio/low_mean": 0.0008680353193994961, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0017925111278600525, + "epoch": 8.503790087463557, + "grad_norm": 0.13806912302970886, + "learning_rate": 1e-06, + "loss": 0.0008, + "step": 822 + }, + { + "clip_ratio/high_max": 0.001988493932003621, + "clip_ratio/high_mean": 0.0008030673452594783, + "clip_ratio/low_mean": 0.0009028253880387638, + "clip_ratio/low_min": 4.0526813791075256e-05, + "clip_ratio/region_mean": 0.0017058927405741997, + "epoch": 8.513119533527696, + "grad_norm": 0.13951145112514496, + "learning_rate": 1e-06, + "loss": 0.0021, + "step": 823 + }, + { + "clip_ratio/high_max": 0.0024955428452813067, + "clip_ratio/high_mean": 0.0009691582217783434, + "clip_ratio/low_mean": 0.0009108888134505833, + "clip_ratio/low_min": 4.585593705996871e-05, + "clip_ratio/region_mean": 0.0018800470352289267, + "epoch": 8.522448979591836, + "grad_norm": 0.14607743918895721, + "learning_rate": 1e-06, + "loss": -0.0383, + "step": 824 + }, + { + "clip_ratio/high_max": 0.00227100537813385, + "clip_ratio/high_mean": 0.0009037887102749664, + "clip_ratio/low_mean": 0.0007138600303733256, + "clip_ratio/low_min": 2.584780850156676e-05, + "clip_ratio/region_mean": 0.0016176487442862708, + "epoch": 8.531778425655977, + "grad_norm": 0.12231608480215073, + "learning_rate": 1e-06, + "loss": -0.0312, + "step": 825 + }, + { + "clip_ratio/high_max": 0.002586353599326685, + "clip_ratio/high_mean": 0.001039758000842994, + "clip_ratio/low_mean": 0.0008937632101151394, + "clip_ratio/low_min": 3.640069917310029e-05, + "clip_ratio/region_mean": 0.0019335211982252076, + "epoch": 8.541107871720117, + "grad_norm": 0.13818369805812836, + "learning_rate": 1e-06, + "loss": -0.0298, + "step": 826 + }, + { + "clip_ratio/high_max": 0.0021612934942822903, + "clip_ratio/high_mean": 0.0010622190238791518, + "clip_ratio/low_mean": 0.0010118979116668925, + "clip_ratio/low_min": 4.829984391108155e-05, + "clip_ratio/region_mean": 0.002074116862786468, + "epoch": 8.550437317784256, + "grad_norm": 0.1717173010110855, + "learning_rate": 1e-06, + "loss": -0.0393, + "step": 827 + }, + { + "clip_ratio/high_max": 0.0023573423459311016, + "clip_ratio/high_mean": 0.0009583396094967611, + "clip_ratio/low_mean": 0.0009784350786503637, + "clip_ratio/low_min": 7.57310390326893e-05, + "clip_ratio/region_mean": 0.0019367746681382414, + "epoch": 8.559766763848396, + "grad_norm": 0.17738987505435944, + "learning_rate": 1e-06, + "loss": 0.0059, + "step": 828 + }, + { + "clip_ratio/high_max": 0.0025482142955297604, + "clip_ratio/high_mean": 0.000998525614704704, + "clip_ratio/low_mean": 0.0008901449473341927, + "clip_ratio/low_min": 2.2599891963182017e-05, + "clip_ratio/region_mean": 0.0018886705656768754, + "epoch": 8.569096209912537, + "grad_norm": 0.14093036949634552, + "learning_rate": 1e-06, + "loss": -0.0091, + "step": 829 + }, + { + "clip_ratio/high_max": 0.0021053235614090227, + "clip_ratio/high_mean": 0.0009183491893054452, + "clip_ratio/low_mean": 0.000790588781455881, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0017089379689423367, + "epoch": 8.578425655976677, + "grad_norm": 0.126109778881073, + "learning_rate": 1e-06, + "loss": -0.0194, + "step": 830 + }, + { + "clip_ratio/high_max": 0.002523310438846238, + "clip_ratio/high_mean": 0.0009514377707091626, + "clip_ratio/low_mean": 0.0009702378229121678, + "clip_ratio/low_min": 4.616568094206741e-05, + "clip_ratio/region_mean": 0.001921675582707394, + "epoch": 8.587755102040816, + "grad_norm": 0.14047396183013916, + "learning_rate": 1e-06, + "loss": -0.0247, + "step": 831 + }, + { + "clip_ratio/high_max": 0.003317179507575929, + "clip_ratio/high_mean": 0.0011828214010165539, + "clip_ratio/low_mean": 0.000908896574856044, + "clip_ratio/low_min": 5.2624192903749645e-05, + "clip_ratio/region_mean": 0.002091717913572211, + "epoch": 8.597084548104956, + "grad_norm": 0.14829492568969727, + "learning_rate": 1e-06, + "loss": -0.0135, + "step": 832 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0633370535714286, + "completions/max_length": 4096.0, + "completions/max_terminated_length": 4092.0, + "completions/mean_length": 771.0543212890625, + "completions/mean_terminated_length": 546.2217407226562, + "completions/min_length": 5.0, + "completions/min_terminated_length": 5.0, + "epoch": 8.606413994169095, + "grad_norm": 0.1405651569366455, + "learning_rate": 1e-06, + "loss": -0.0467, + "num_tokens": 491854571.0, + "reward": 0.6417410969734192, + "reward_std": 0.15736746788024902, + "rewards/simpleverify_reward/mean": 0.6417410969734192, + "rewards/simpleverify_reward/std": 0.47949716448783875, + "step": 833 + }, + { + "clip_ratio/high_max": 0.0020139113257755525, + "clip_ratio/high_mean": 0.0008633421166450717, + "clip_ratio/low_mean": 0.00046989334896352375, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.001333235461061122, + "epoch": 8.615743440233237, + "grad_norm": 0.13367526233196259, + "learning_rate": 1e-06, + "loss": -0.0218, + "step": 834 + }, + { + "clip_ratio/high_max": 0.002141184486390557, + "clip_ratio/high_mean": 0.0008294206363643752, + "clip_ratio/low_mean": 0.00038089149211373297, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0012103121262043715, + "epoch": 8.625072886297376, + "grad_norm": 0.13916561007499695, + "learning_rate": 1e-06, + "loss": -0.0267, + "step": 835 + }, + { + "clip_ratio/high_max": 0.001740491181408288, + "clip_ratio/high_mean": 0.0007440051322191721, + "clip_ratio/low_mean": 0.0005984672025078908, + "clip_ratio/low_min": 4.800307215191424e-05, + "clip_ratio/region_mean": 0.001342472358373925, + "epoch": 8.634402332361516, + "grad_norm": 0.15485632419586182, + "learning_rate": 1e-06, + "loss": -0.0397, + "step": 836 + }, + { + "clip_ratio/high_max": 0.002032392942055594, + "clip_ratio/high_mean": 0.0007078141152305761, + "clip_ratio/low_mean": 0.0005370698117985739, + "clip_ratio/low_min": 0.0001028032766043907, + "clip_ratio/region_mean": 0.0012448839224816766, + "epoch": 8.643731778425655, + "grad_norm": 0.13908182084560394, + "learning_rate": 1e-06, + "loss": -0.0203, + "step": 837 + }, + { + "clip_ratio/high_max": 0.0017979219264816493, + "clip_ratio/high_mean": 0.0007961944247654174, + "clip_ratio/low_mean": 0.0005261639134914731, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0013223583191575017, + "epoch": 8.653061224489797, + "grad_norm": 0.12595003843307495, + "learning_rate": 1e-06, + "loss": -0.0343, + "step": 838 + }, + { + "clip_ratio/high_max": 0.002292386212502606, + "clip_ratio/high_mean": 0.0009561050537740812, + "clip_ratio/low_mean": 0.0005483322347572539, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0015044372557895258, + "epoch": 8.662390670553936, + "grad_norm": 0.14232099056243896, + "learning_rate": 1e-06, + "loss": -0.0391, + "step": 839 + }, + { + "clip_ratio/high_max": 0.001992901903577149, + "clip_ratio/high_mean": 0.0009374034398206277, + "clip_ratio/low_mean": 0.0004780116523761535, + "clip_ratio/low_min": 1.5972400433383882e-05, + "clip_ratio/region_mean": 0.0014154150921967812, + "epoch": 8.671720116618076, + "grad_norm": 0.17381148040294647, + "learning_rate": 1e-06, + "loss": -0.0247, + "step": 840 + }, + { + "clip_ratio/high_max": 0.0020656290325860027, + "clip_ratio/high_mean": 0.0008125649655994494, + "clip_ratio/low_mean": 0.0005825711668876465, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0013951361397630535, + "epoch": 8.681049562682215, + "grad_norm": 0.1460714489221573, + "learning_rate": 1e-06, + "loss": 0.0085, + "step": 841 + }, + { + "clip_ratio/high_max": 0.0019548289055819623, + "clip_ratio/high_mean": 0.0007950219624035526, + "clip_ratio/low_mean": 0.0007099590866346261, + "clip_ratio/low_min": 2.2587639250559732e-05, + "clip_ratio/region_mean": 0.0015049810644995887, + "epoch": 8.690379008746355, + "grad_norm": 0.15236659348011017, + "learning_rate": 1e-06, + "loss": -0.0208, + "step": 842 + }, + { + "clip_ratio/high_max": 0.0020667382632382214, + "clip_ratio/high_mean": 0.0007644728430022951, + "clip_ratio/low_mean": 0.0007049965988699114, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0014694694291392807, + "epoch": 8.699708454810496, + "grad_norm": 0.13557924330234528, + "learning_rate": 1e-06, + "loss": 0.0126, + "step": 843 + }, + { + "clip_ratio/high_max": 0.0024244647356681526, + "clip_ratio/high_mean": 0.0009140740894508781, + "clip_ratio/low_mean": 0.0006336965607260936, + "clip_ratio/low_min": 2.9460286896210164e-05, + "clip_ratio/region_mean": 0.0015477706692763604, + "epoch": 8.709037900874636, + "grad_norm": 0.12969401478767395, + "learning_rate": 1e-06, + "loss": -0.0196, + "step": 844 + }, + { + "clip_ratio/high_max": 0.001969263033970492, + "clip_ratio/high_mean": 0.0007806028243066976, + "clip_ratio/low_mean": 0.0006583489393960917, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.001438951752788853, + "epoch": 8.718367346938775, + "grad_norm": 0.12705589830875397, + "learning_rate": 1e-06, + "loss": -0.0223, + "step": 845 + }, + { + "clip_ratio/high_max": 0.0023710055902483873, + "clip_ratio/high_mean": 0.0009322769874415826, + "clip_ratio/low_mean": 0.0007197825716502848, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0016520596036571078, + "epoch": 8.727696793002915, + "grad_norm": 0.14861077070236206, + "learning_rate": 1e-06, + "loss": -0.0123, + "step": 846 + }, + { + "clip_ratio/high_max": 0.002109671004291158, + "clip_ratio/high_mean": 0.0008622139730505296, + "clip_ratio/low_mean": 0.0007059752788336482, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0015681892873544712, + "epoch": 8.737026239067056, + "grad_norm": 0.13615943491458893, + "learning_rate": 1e-06, + "loss": -0.0244, + "step": 847 + }, + { + "clip_ratio/high_max": 0.0022711220517521724, + "clip_ratio/high_mean": 0.0009589893797965487, + "clip_ratio/low_mean": 0.0008089785569609376, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0017679679294815287, + "epoch": 8.746355685131196, + "grad_norm": 0.14134782552719116, + "learning_rate": 1e-06, + "loss": -0.0467, + "step": 848 + }, + { + "clip_ratio/high_max": 0.0025540151946188416, + "clip_ratio/high_mean": 0.0009891991085169138, + "clip_ratio/low_mean": 0.0007917930579424137, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0017809922064770944, + "epoch": 8.755685131195335, + "grad_norm": 0.13083255290985107, + "learning_rate": 1e-06, + "loss": -0.0139, + "step": 849 + }, + { + "clip_ratio/high_max": 0.0023866647825343534, + "clip_ratio/high_mean": 0.0009160980953311082, + "clip_ratio/low_mean": 0.0006603442161576822, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.001576442340592621, + "epoch": 8.765014577259475, + "grad_norm": 0.1456131786108017, + "learning_rate": 1e-06, + "loss": -0.0592, + "step": 850 + }, + { + "clip_ratio/high_max": 0.002494656575436238, + "clip_ratio/high_mean": 0.0009352633715025149, + "clip_ratio/low_mean": 0.0007715230840403819, + "clip_ratio/low_min": 2.8331822250038385e-05, + "clip_ratio/region_mean": 0.0017067864682758227, + "epoch": 8.774344023323614, + "grad_norm": 0.12012716382741928, + "learning_rate": 1e-06, + "loss": 0.0012, + "step": 851 + }, + { + "clip_ratio/high_max": 0.0022803933534305543, + "clip_ratio/high_mean": 0.0009966295747290133, + "clip_ratio/low_mean": 0.0007608390424138634, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0017574685989529826, + "epoch": 8.783673469387756, + "grad_norm": 0.1424596756696701, + "learning_rate": 1e-06, + "loss": -0.0378, + "step": 852 + }, + { + "clip_ratio/high_max": 0.002275281192851253, + "clip_ratio/high_mean": 0.0009698879621282686, + "clip_ratio/low_mean": 0.000846161432491499, + "clip_ratio/low_min": 1.1913839443877805e-05, + "clip_ratio/region_mean": 0.0018160494291805662, + "epoch": 8.793002915451895, + "grad_norm": 0.13369524478912354, + "learning_rate": 1e-06, + "loss": -0.04, + "step": 853 + }, + { + "clip_ratio/high_max": 0.0024288226341013797, + "clip_ratio/high_mean": 0.0010673616961867083, + "clip_ratio/low_mean": 0.000930726502701873, + "clip_ratio/low_min": 4.8840844101505354e-05, + "clip_ratio/region_mean": 0.001998088257096242, + "epoch": 8.802332361516035, + "grad_norm": 0.12861855328083038, + "learning_rate": 1e-06, + "loss": -0.031, + "step": 854 + }, + { + "clip_ratio/high_max": 0.002092519549478311, + "clip_ratio/high_mean": 0.0008946466059569502, + "clip_ratio/low_mean": 0.00072539451252851, + "clip_ratio/low_min": 2.8026905056321993e-05, + "clip_ratio/region_mean": 0.001620041140995454, + "epoch": 8.811661807580174, + "grad_norm": 0.1658993512392044, + "learning_rate": 1e-06, + "loss": -0.0357, + "step": 855 + }, + { + "clip_ratio/high_max": 0.002359290658205282, + "clip_ratio/high_mean": 0.0009629812084313016, + "clip_ratio/low_mean": 0.0009085323217732366, + "clip_ratio/low_min": 3.4712578781181946e-05, + "clip_ratio/region_mean": 0.0018715135083766654, + "epoch": 8.820991253644316, + "grad_norm": 0.16082775592803955, + "learning_rate": 1e-06, + "loss": -0.006, + "step": 856 + }, + { + "clip_ratio/high_max": 0.0028039188619004562, + "clip_ratio/high_mean": 0.00101169560366543, + "clip_ratio/low_mean": 0.0009264432374038734, + "clip_ratio/low_min": 5.167424387764186e-05, + "clip_ratio/region_mean": 0.0019381388410693035, + "epoch": 8.830320699708455, + "grad_norm": 0.13666211068630219, + "learning_rate": 1e-06, + "loss": -0.0073, + "step": 857 + }, + { + "clip_ratio/high_max": 0.002292525088705588, + "clip_ratio/high_mean": 0.0009539639413560508, + "clip_ratio/low_mean": 0.0007223619941214565, + "clip_ratio/low_min": 2.9164722946006805e-05, + "clip_ratio/region_mean": 0.0016763259409344755, + "epoch": 8.839650145772595, + "grad_norm": 0.12612462043762207, + "learning_rate": 1e-06, + "loss": -0.0168, + "step": 858 + }, + { + "clip_ratio/high_max": 0.002549354379880242, + "clip_ratio/high_mean": 0.0008722816619410878, + "clip_ratio/low_mean": 0.0008769092128204647, + "clip_ratio/low_min": 3.9382481190841645e-05, + "clip_ratio/region_mean": 0.0017491908874944784, + "epoch": 8.848979591836734, + "grad_norm": 0.13609710335731506, + "learning_rate": 1e-06, + "loss": 0.0032, + "step": 859 + }, + { + "clip_ratio/high_max": 0.0024448685035167728, + "clip_ratio/high_mean": 0.0009766333550942363, + "clip_ratio/low_mean": 0.0007997267639439087, + "clip_ratio/low_min": 6.836960710643325e-05, + "clip_ratio/region_mean": 0.0017763601208571345, + "epoch": 8.858309037900874, + "grad_norm": 0.13199026882648468, + "learning_rate": 1e-06, + "loss": -0.0264, + "step": 860 + }, + { + "clip_ratio/high_max": 0.002441056858515367, + "clip_ratio/high_mean": 0.0010800373784149997, + "clip_ratio/low_mean": 0.0008245835870184237, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0019046209490625188, + "epoch": 8.867638483965015, + "grad_norm": 0.14391858875751495, + "learning_rate": 1e-06, + "loss": -0.0303, + "step": 861 + }, + { + "clip_ratio/high_max": 0.0022580993027077056, + "clip_ratio/high_mean": 0.0009184454575006384, + "clip_ratio/low_mean": 0.0008429733952652896, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0017614188473089598, + "epoch": 8.876967930029155, + "grad_norm": 0.13431082665920258, + "learning_rate": 1e-06, + "loss": -0.0231, + "step": 862 + }, + { + "clip_ratio/high_max": 0.0022402869908546563, + "clip_ratio/high_mean": 0.0009044435319083277, + "clip_ratio/low_mean": 0.0009824261505855247, + "clip_ratio/low_min": 6.632088843616657e-05, + "clip_ratio/region_mean": 0.0018868696497520432, + "epoch": 8.886297376093294, + "grad_norm": 0.12800784409046173, + "learning_rate": 1e-06, + "loss": -0.0093, + "step": 863 + }, + { + "clip_ratio/high_max": 0.0023474494300899096, + "clip_ratio/high_mean": 0.0009831388015300035, + "clip_ratio/low_mean": 0.0009691117556940299, + "clip_ratio/low_min": 2.5206694772350602e-05, + "clip_ratio/region_mean": 0.0019522505317581818, + "epoch": 8.895626822157434, + "grad_norm": 0.15675704181194305, + "learning_rate": 1e-06, + "loss": -0.0218, + "step": 864 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0659528459821429, + "completions/max_length": 4096.0, + "completions/max_terminated_length": 4088.0, + "completions/mean_length": 784.3197631835938, + "completions/mean_terminated_length": 550.4828491210938, + "completions/min_length": 3.0, + "completions/min_terminated_length": 3.0, + "epoch": 9.00932944606414, + "grad_norm": 0.14521193504333496, + "learning_rate": 1e-06, + "loss": 0.0117, + "num_tokens": 509499180.0, + "reward": 0.641671359539032, + "reward_std": 0.16063126921653748, + "rewards/simpleverify_reward/mean": 0.6416712999343872, + "rewards/simpleverify_reward/std": 0.47951772809028625, + "step": 865 + }, + { + "clip_ratio/high_max": 0.0024364194250665605, + "clip_ratio/high_mean": 0.0009134492465818767, + "clip_ratio/low_mean": 0.00040719718981563346, + "clip_ratio/low_min": 1.4695508980366867e-05, + "clip_ratio/region_mean": 0.0013206464282120578, + "epoch": 9.018658892128279, + "grad_norm": 0.16433118283748627, + "learning_rate": 1e-06, + "loss": -0.0653, + "step": 866 + }, + { + "clip_ratio/high_max": 0.0019430474858381785, + "clip_ratio/high_mean": 0.0007149408756959019, + "clip_ratio/low_mean": 0.0004546089248833596, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.001169549777841894, + "epoch": 9.02798833819242, + "grad_norm": 0.13100212812423706, + "learning_rate": 1e-06, + "loss": -0.0202, + "step": 867 + }, + { + "clip_ratio/high_max": 0.0018072276361635886, + "clip_ratio/high_mean": 0.0007499644798372174, + "clip_ratio/low_mean": 0.0005435647397007415, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.001293529203394428, + "epoch": 9.03731778425656, + "grad_norm": 0.1497403234243393, + "learning_rate": 1e-06, + "loss": -0.0411, + "step": 868 + }, + { + "clip_ratio/high_max": 0.0017921896287589334, + "clip_ratio/high_mean": 0.000762457370001357, + "clip_ratio/low_mean": 0.0005329809991962975, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.001295438389206538, + "epoch": 9.0466472303207, + "grad_norm": 0.1435271054506302, + "learning_rate": 1e-06, + "loss": -0.0211, + "step": 869 + }, + { + "clip_ratio/high_max": 0.002030614428804256, + "clip_ratio/high_mean": 0.000821765683213016, + "clip_ratio/low_mean": 0.000541309720119898, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0013630753892357461, + "epoch": 9.055976676384839, + "grad_norm": 0.1355751007795334, + "learning_rate": 1e-06, + "loss": -0.037, + "step": 870 + }, + { + "clip_ratio/high_max": 0.002037711939919973, + "clip_ratio/high_mean": 0.000821711926619173, + "clip_ratio/low_mean": 0.0006188861989357974, + "clip_ratio/low_min": 1.001121290755691e-05, + "clip_ratio/region_mean": 0.0014405981200980023, + "epoch": 9.06530612244898, + "grad_norm": 0.1574908196926117, + "learning_rate": 1e-06, + "loss": -0.0352, + "step": 871 + }, + { + "clip_ratio/high_max": 0.0021282535781210754, + "clip_ratio/high_mean": 0.000931289270738489, + "clip_ratio/low_mean": 0.0006665466107733664, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0015978358787833713, + "epoch": 9.07463556851312, + "grad_norm": 0.15857058763504028, + "learning_rate": 1e-06, + "loss": -0.002, + "step": 872 + }, + { + "clip_ratio/high_max": 0.002413314927252941, + "clip_ratio/high_mean": 0.0008469961994705955, + "clip_ratio/low_mean": 0.0008364817949768621, + "clip_ratio/low_min": 2.830615994753316e-05, + "clip_ratio/region_mean": 0.0016834779817145318, + "epoch": 9.08396501457726, + "grad_norm": 0.13699011504650116, + "learning_rate": 1e-06, + "loss": 0.0012, + "step": 873 + }, + { + "clip_ratio/high_max": 0.001854141904914286, + "clip_ratio/high_mean": 0.0007477157305402216, + "clip_ratio/low_mean": 0.0006288414078881033, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.001376557171170134, + "epoch": 9.093294460641399, + "grad_norm": 0.13408535718917847, + "learning_rate": 1e-06, + "loss": -0.0174, + "step": 874 + }, + { + "clip_ratio/high_max": 0.001909519916807767, + "clip_ratio/high_mean": 0.0007421680829793331, + "clip_ratio/low_mean": 0.0007377129140877514, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.001479881026170915, + "epoch": 9.102623906705539, + "grad_norm": 0.1423283964395523, + "learning_rate": 1e-06, + "loss": 0.0104, + "step": 875 + }, + { + "clip_ratio/high_max": 0.0023590355485794134, + "clip_ratio/high_mean": 0.0010331771973142168, + "clip_ratio/low_mean": 0.0006847015729363193, + "clip_ratio/low_min": 8.903273919713683e-05, + "clip_ratio/region_mean": 0.0017178787747980095, + "epoch": 9.11195335276968, + "grad_norm": 0.12830820679664612, + "learning_rate": 1e-06, + "loss": -0.0576, + "step": 876 + }, + { + "clip_ratio/high_max": 0.002145286292943638, + "clip_ratio/high_mean": 0.0009106013840209926, + "clip_ratio/low_mean": 0.000685119616719021, + "clip_ratio/low_min": 2.841599052771926e-05, + "clip_ratio/region_mean": 0.0015957209579937626, + "epoch": 9.12128279883382, + "grad_norm": 0.1287694126367569, + "learning_rate": 1e-06, + "loss": -0.0183, + "step": 877 + }, + { + "clip_ratio/high_max": 0.0021193885659158695, + "clip_ratio/high_mean": 0.0009188613312289817, + "clip_ratio/low_mean": 0.0008369850747840246, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.001755846431478858, + "epoch": 9.130612244897959, + "grad_norm": 0.1833963245153427, + "learning_rate": 1e-06, + "loss": -0.0609, + "step": 878 + }, + { + "clip_ratio/high_max": 0.002296690221555764, + "clip_ratio/high_mean": 0.001003718052743352, + "clip_ratio/low_mean": 0.0008277043680209317, + "clip_ratio/low_min": 6.366182788042352e-05, + "clip_ratio/region_mean": 0.001831422410759842, + "epoch": 9.139941690962099, + "grad_norm": 0.1349063366651535, + "learning_rate": 1e-06, + "loss": -0.0221, + "step": 879 + }, + { + "clip_ratio/high_max": 0.002385257885180181, + "clip_ratio/high_mean": 0.0009333109192084521, + "clip_ratio/low_mean": 0.0007122137849364663, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0016455246986879501, + "epoch": 9.14927113702624, + "grad_norm": 0.12756817042827606, + "learning_rate": 1e-06, + "loss": -0.0318, + "step": 880 + }, + { + "clip_ratio/high_max": 0.00244465890864376, + "clip_ratio/high_mean": 0.0009807757523958571, + "clip_ratio/low_mean": 0.0009915324444591533, + "clip_ratio/low_min": 1.4259639101510402e-05, + "clip_ratio/region_mean": 0.0019723081932170317, + "epoch": 9.15860058309038, + "grad_norm": 0.1390228271484375, + "learning_rate": 1e-06, + "loss": -0.0122, + "step": 881 + }, + { + "clip_ratio/high_max": 0.002297080136486329, + "clip_ratio/high_mean": 0.000955383791733766, + "clip_ratio/low_mean": 0.0009365522073494503, + "clip_ratio/low_min": 5.11177750013303e-05, + "clip_ratio/region_mean": 0.0018919359499705024, + "epoch": 9.167930029154519, + "grad_norm": 0.13791263103485107, + "learning_rate": 1e-06, + "loss": 0.0105, + "step": 882 + }, + { + "clip_ratio/high_max": 0.002606173169624526, + "clip_ratio/high_mean": 0.0011081011762144044, + "clip_ratio/low_mean": 0.0007196222613856662, + "clip_ratio/low_min": 1.628028076083865e-05, + "clip_ratio/region_mean": 0.0018277234412380494, + "epoch": 9.177259475218658, + "grad_norm": 0.13819114863872528, + "learning_rate": 1e-06, + "loss": -0.0547, + "step": 883 + }, + { + "clip_ratio/high_max": 0.002367043045524042, + "clip_ratio/high_mean": 0.0008472119334328454, + "clip_ratio/low_mean": 0.000717189993338252, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0015644018567400053, + "epoch": 9.186588921282798, + "grad_norm": 0.13511182367801666, + "learning_rate": 1e-06, + "loss": -0.0331, + "step": 884 + }, + { + "clip_ratio/high_max": 0.0023582188223372214, + "clip_ratio/high_mean": 0.0009955569930752972, + "clip_ratio/low_mean": 0.000746332599192101, + "clip_ratio/low_min": 1.848292231443338e-05, + "clip_ratio/region_mean": 0.0017418895695300307, + "epoch": 9.19591836734694, + "grad_norm": 0.14253586530685425, + "learning_rate": 1e-06, + "loss": 0.0025, + "step": 885 + }, + { + "clip_ratio/high_max": 0.0023443693935405463, + "clip_ratio/high_mean": 0.0009998419973271666, + "clip_ratio/low_mean": 0.0008602960460848408, + "clip_ratio/low_min": 4.051099676871672e-05, + "clip_ratio/region_mean": 0.0018601380397740286, + "epoch": 9.205247813411079, + "grad_norm": 0.1792212277650833, + "learning_rate": 1e-06, + "loss": 0.0204, + "step": 886 + }, + { + "clip_ratio/high_max": 0.002691985217097681, + "clip_ratio/high_mean": 0.0010963495406031143, + "clip_ratio/low_mean": 0.0007340300808209577, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0018303796096006408, + "epoch": 9.214577259475218, + "grad_norm": 0.12959793210029602, + "learning_rate": 1e-06, + "loss": -0.0782, + "step": 887 + }, + { + "clip_ratio/high_max": 0.0027285710093565285, + "clip_ratio/high_mean": 0.0010925341503025265, + "clip_ratio/low_mean": 0.0007487652110285126, + "clip_ratio/low_min": 4.35825659224065e-05, + "clip_ratio/region_mean": 0.0018412994031677954, + "epoch": 9.223906705539358, + "grad_norm": 0.12535342574119568, + "learning_rate": 1e-06, + "loss": -0.0286, + "step": 888 + }, + { + "clip_ratio/high_max": 0.0022187597714946605, + "clip_ratio/high_mean": 0.0009067620048881508, + "clip_ratio/low_mean": 0.000779069689087919, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.001685831717622932, + "epoch": 9.2332361516035, + "grad_norm": 0.14810459315776825, + "learning_rate": 1e-06, + "loss": -0.0147, + "step": 889 + }, + { + "clip_ratio/high_max": 0.0024139151282724924, + "clip_ratio/high_mean": 0.0010855920045287348, + "clip_ratio/low_mean": 0.0008292405182146467, + "clip_ratio/low_min": 2.0226536435075104e-05, + "clip_ratio/region_mean": 0.001914832493639551, + "epoch": 9.242565597667639, + "grad_norm": 1.5517913103103638, + "learning_rate": 1e-06, + "loss": -0.0294, + "step": 890 + }, + { + "clip_ratio/high_max": 0.0022826083513791673, + "clip_ratio/high_mean": 0.0009988567380787572, + "clip_ratio/low_mean": 0.0007746484843664803, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0017735052315401845, + "epoch": 9.251895043731778, + "grad_norm": 0.1243315041065216, + "learning_rate": 1e-06, + "loss": -0.0161, + "step": 891 + }, + { + "clip_ratio/high_max": 0.0024114770130836405, + "clip_ratio/high_mean": 0.0010205379367107525, + "clip_ratio/low_mean": 0.000787219390986138, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0018077573186019436, + "epoch": 9.261224489795918, + "grad_norm": 0.21037563681602478, + "learning_rate": 1e-06, + "loss": -0.0489, + "step": 892 + }, + { + "clip_ratio/high_max": 0.0032194397499551997, + "clip_ratio/high_mean": 0.0011879017220053356, + "clip_ratio/low_mean": 0.0008789769035502104, + "clip_ratio/low_min": 1.4595982975151855e-05, + "clip_ratio/region_mean": 0.0020668786310125142, + "epoch": 9.270553935860057, + "grad_norm": 0.1283675879240036, + "learning_rate": 1e-06, + "loss": -0.0359, + "step": 893 + }, + { + "clip_ratio/high_max": 0.002518921763112303, + "clip_ratio/high_mean": 0.0010827663318195846, + "clip_ratio/low_mean": 0.0008738407523196656, + "clip_ratio/low_min": 7.028763320704456e-05, + "clip_ratio/region_mean": 0.0019566070841392502, + "epoch": 9.279883381924199, + "grad_norm": 0.14181050658226013, + "learning_rate": 1e-06, + "loss": -0.0145, + "step": 894 + }, + { + "clip_ratio/high_max": 0.002490412203769665, + "clip_ratio/high_mean": 0.0010082446679007262, + "clip_ratio/low_mean": 0.0009095173118112143, + "clip_ratio/low_min": 6.218976886884775e-05, + "clip_ratio/region_mean": 0.0019177619578840677, + "epoch": 9.289212827988338, + "grad_norm": 0.14314906299114227, + "learning_rate": 1e-06, + "loss": -0.0007, + "step": 895 + }, + { + "clip_ratio/high_max": 0.002657593671756331, + "clip_ratio/high_mean": 0.0010601921894703992, + "clip_ratio/low_mean": 0.0007704217950958991, + "clip_ratio/low_min": 1.8328446458326653e-05, + "clip_ratio/region_mean": 0.0018306139463675208, + "epoch": 9.298542274052478, + "grad_norm": 0.14605265855789185, + "learning_rate": 1e-06, + "loss": -0.0295, + "step": 896 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0738699776785714, + "completions/max_length": 4096.0, + "completions/max_terminated_length": 4082.0, + "completions/mean_length": 802.8367919921875, + "completions/mean_terminated_length": 540.16748046875, + "completions/min_length": 4.0, + "completions/min_terminated_length": 4.0, + "epoch": 9.307871720116617, + "grad_norm": 0.13294248282909393, + "learning_rate": 1e-06, + "loss": -0.0295, + "num_tokens": 526703124.0, + "reward": 0.6496233344078064, + "reward_std": 0.15484841167926788, + "rewards/simpleverify_reward/mean": 0.6496233344078064, + "rewards/simpleverify_reward/std": 0.47709622979164124, + "step": 897 + }, + { + "clip_ratio/high_max": 0.0018489369431335945, + "clip_ratio/high_mean": 0.0007859781935621868, + "clip_ratio/low_mean": 0.000501667747812462, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0012876459477411117, + "epoch": 9.317201166180759, + "grad_norm": 0.15338732302188873, + "learning_rate": 1e-06, + "loss": -0.0355, + "step": 898 + }, + { + "clip_ratio/high_max": 0.0020353350846562535, + "clip_ratio/high_mean": 0.0007973251358635025, + "clip_ratio/low_mean": 0.000407775437906821, + "clip_ratio/low_min": 1.2590652659127954e-05, + "clip_ratio/region_mean": 0.0012051006087858696, + "epoch": 9.326530612244898, + "grad_norm": 0.14864759147167206, + "learning_rate": 1e-06, + "loss": -0.0295, + "step": 899 + }, + { + "clip_ratio/high_max": 0.002195424778619781, + "clip_ratio/high_mean": 0.0006893596782902023, + "clip_ratio/low_mean": 0.00047398182232427644, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0011633415342657827, + "epoch": 9.335860058309038, + "grad_norm": 0.1383877396583557, + "learning_rate": 1e-06, + "loss": -0.0123, + "step": 900 + }, + { + "clip_ratio/high_max": 0.0021147289808141068, + "clip_ratio/high_mean": 0.0008393442094529746, + "clip_ratio/low_mean": 0.0005201814681186079, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0013595257078122813, + "epoch": 9.345189504373177, + "grad_norm": 0.15192995965480804, + "learning_rate": 1e-06, + "loss": -0.0428, + "step": 901 + }, + { + "clip_ratio/high_max": 0.002063903808448231, + "clip_ratio/high_mean": 0.0007696333414060064, + "clip_ratio/low_mean": 0.000701427194144344, + "clip_ratio/low_min": 2.8400838345987722e-05, + "clip_ratio/region_mean": 0.001471060520998435, + "epoch": 9.354518950437317, + "grad_norm": 4.191668510437012, + "learning_rate": 1e-06, + "loss": -0.0049, + "step": 902 + }, + { + "clip_ratio/high_max": 0.0025516079913359135, + "clip_ratio/high_mean": 0.0009315781180703198, + "clip_ratio/low_mean": 0.0005230867236605263, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0014546648308169097, + "epoch": 9.363848396501458, + "grad_norm": 0.13582298159599304, + "learning_rate": 1e-06, + "loss": -0.0609, + "step": 903 + }, + { + "clip_ratio/high_max": 0.001924297415826004, + "clip_ratio/high_mean": 0.0007267159489856567, + "clip_ratio/low_mean": 0.000714097479431075, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0014408134011318907, + "epoch": 9.373177842565598, + "grad_norm": 0.17259620130062103, + "learning_rate": 1e-06, + "loss": 0.0305, + "step": 904 + }, + { + "clip_ratio/high_max": 0.002297422422998352, + "clip_ratio/high_mean": 0.0010069530871987808, + "clip_ratio/low_mean": 0.0005643024314849754, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0015712555396021344, + "epoch": 9.382507288629737, + "grad_norm": 0.13771282136440277, + "learning_rate": 1e-06, + "loss": -0.02, + "step": 905 + }, + { + "clip_ratio/high_max": 0.0020101481750316452, + "clip_ratio/high_mean": 0.0009243096374120796, + "clip_ratio/low_mean": 0.0007376313924396527, + "clip_ratio/low_min": 4.073871787113603e-05, + "clip_ratio/region_mean": 0.001661941030761227, + "epoch": 9.391836734693877, + "grad_norm": 0.15268857777118683, + "learning_rate": 1e-06, + "loss": -0.0167, + "step": 906 + }, + { + "clip_ratio/high_max": 0.0024281362602778245, + "clip_ratio/high_mean": 0.000886371630258509, + "clip_ratio/low_mean": 0.0006705998439429095, + "clip_ratio/low_min": 1.3196790860092733e-05, + "clip_ratio/region_mean": 0.0015569714887533337, + "epoch": 9.401166180758018, + "grad_norm": 0.14203545451164246, + "learning_rate": 1e-06, + "loss": -0.0279, + "step": 907 + }, + { + "clip_ratio/high_max": 0.002260651883261744, + "clip_ratio/high_mean": 0.0009041009507200215, + "clip_ratio/low_mean": 0.0008411090529989451, + "clip_ratio/low_min": 2.9925784474471584e-05, + "clip_ratio/region_mean": 0.001745210014632903, + "epoch": 9.410495626822158, + "grad_norm": 0.14670445024967194, + "learning_rate": 1e-06, + "loss": -0.0201, + "step": 908 + }, + { + "clip_ratio/high_max": 0.0028523376568045933, + "clip_ratio/high_mean": 0.0010644993308233097, + "clip_ratio/low_mean": 0.0006402868766599568, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0017047862493200228, + "epoch": 9.419825072886297, + "grad_norm": 0.32806357741355896, + "learning_rate": 1e-06, + "loss": -0.0364, + "step": 909 + }, + { + "clip_ratio/high_max": 0.0020576808783516753, + "clip_ratio/high_mean": 0.0008409076972384355, + "clip_ratio/low_mean": 0.0005672940469594323, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0014082017260079738, + "epoch": 9.429154518950437, + "grad_norm": 0.13929541409015656, + "learning_rate": 1e-06, + "loss": -0.0455, + "step": 910 + }, + { + "clip_ratio/high_max": 0.002748642309597926, + "clip_ratio/high_mean": 0.0010775229129649233, + "clip_ratio/low_mean": 0.0007187987357610837, + "clip_ratio/low_min": 2.8682881747954525e-05, + "clip_ratio/region_mean": 0.00179632160870824, + "epoch": 9.438483965014576, + "grad_norm": 0.14641906321048737, + "learning_rate": 1e-06, + "loss": -0.0461, + "step": 911 + }, + { + "clip_ratio/high_max": 0.00220919926505303, + "clip_ratio/high_mean": 0.0008464168568025343, + "clip_ratio/low_mean": 0.0008198327122954652, + "clip_ratio/low_min": 4.1326868085889146e-05, + "clip_ratio/region_mean": 0.001666249580011936, + "epoch": 9.447813411078718, + "grad_norm": 0.13365840911865234, + "learning_rate": 1e-06, + "loss": -0.0027, + "step": 912 + }, + { + "clip_ratio/high_max": 0.0023387129294860642, + "clip_ratio/high_mean": 0.0008236713329097256, + "clip_ratio/low_mean": 0.0007478946163246292, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0015715659501438495, + "epoch": 9.457142857142857, + "grad_norm": 0.1388624608516693, + "learning_rate": 1e-06, + "loss": -0.0206, + "step": 913 + }, + { + "clip_ratio/high_max": 0.002195760105678346, + "clip_ratio/high_mean": 0.0009363750432385132, + "clip_ratio/low_mean": 0.000804728825642087, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0017411038934369572, + "epoch": 9.466472303206997, + "grad_norm": 0.37228792905807495, + "learning_rate": 1e-06, + "loss": 0.0032, + "step": 914 + }, + { + "clip_ratio/high_max": 0.0021250151621643454, + "clip_ratio/high_mean": 0.0009652398130128859, + "clip_ratio/low_mean": 0.0007873637578086345, + "clip_ratio/low_min": 2.9377285500231665e-05, + "clip_ratio/region_mean": 0.0017526035226183012, + "epoch": 9.475801749271136, + "grad_norm": 0.1489114910364151, + "learning_rate": 1e-06, + "loss": -0.0593, + "step": 915 + }, + { + "clip_ratio/high_max": 0.002142500175978057, + "clip_ratio/high_mean": 0.000912011008040281, + "clip_ratio/low_mean": 0.0008255670236394508, + "clip_ratio/low_min": 9.25621934584342e-05, + "clip_ratio/region_mean": 0.0017375780080328695, + "epoch": 9.485131195335278, + "grad_norm": 0.12724773585796356, + "learning_rate": 1e-06, + "loss": -0.0462, + "step": 916 + }, + { + "clip_ratio/high_max": 0.002542140573495999, + "clip_ratio/high_mean": 0.0009185514627461089, + "clip_ratio/low_mean": 0.0008871549835021142, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0018057064444292337, + "epoch": 9.494460641399417, + "grad_norm": 0.14874504506587982, + "learning_rate": 1e-06, + "loss": 0.0173, + "step": 917 + }, + { + "clip_ratio/high_max": 0.0026165570234297775, + "clip_ratio/high_mean": 0.0009514420962659642, + "clip_ratio/low_mean": 0.0009032052030306659, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0018546472638263367, + "epoch": 9.503790087463557, + "grad_norm": 0.26071786880493164, + "learning_rate": 1e-06, + "loss": -0.0016, + "step": 918 + }, + { + "clip_ratio/high_max": 0.002320240644621663, + "clip_ratio/high_mean": 0.000963028876867611, + "clip_ratio/low_mean": 0.0008858062192302896, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0018488350833649747, + "epoch": 9.513119533527696, + "grad_norm": 0.1486770063638687, + "learning_rate": 1e-06, + "loss": -0.0353, + "step": 919 + }, + { + "clip_ratio/high_max": 0.0023801244315109216, + "clip_ratio/high_mean": 0.0009178674863505876, + "clip_ratio/low_mean": 0.0009687192214187235, + "clip_ratio/low_min": 5.618819341179915e-05, + "clip_ratio/region_mean": 0.0018865866877604276, + "epoch": 9.522448979591836, + "grad_norm": 0.1972588300704956, + "learning_rate": 1e-06, + "loss": -0.0036, + "step": 920 + }, + { + "clip_ratio/high_max": 0.0024615703368908726, + "clip_ratio/high_mean": 0.0010931789802270941, + "clip_ratio/low_mean": 0.0009637643106543692, + "clip_ratio/low_min": 4.1408433389733545e-05, + "clip_ratio/region_mean": 0.002056943289062474, + "epoch": 9.531778425655977, + "grad_norm": 0.15567514300346375, + "learning_rate": 1e-06, + "loss": -0.0805, + "step": 921 + }, + { + "clip_ratio/high_max": 0.0024112346836773213, + "clip_ratio/high_mean": 0.0008924721605580999, + "clip_ratio/low_mean": 0.000902465867511637, + "clip_ratio/low_min": 4.443024590727873e-05, + "clip_ratio/region_mean": 0.0017949380198842846, + "epoch": 9.541107871720117, + "grad_norm": 0.1386972814798355, + "learning_rate": 1e-06, + "loss": -0.0034, + "step": 922 + }, + { + "clip_ratio/high_max": 0.0026421247457619756, + "clip_ratio/high_mean": 0.001152587039541686, + "clip_ratio/low_mean": 0.0009233891105395742, + "clip_ratio/low_min": 1.9869654352078214e-05, + "clip_ratio/region_mean": 0.0020759761391673237, + "epoch": 9.550437317784256, + "grad_norm": 0.1365169733762741, + "learning_rate": 1e-06, + "loss": -0.0408, + "step": 923 + }, + { + "clip_ratio/high_max": 0.0023661662999074906, + "clip_ratio/high_mean": 0.0009867211047094315, + "clip_ratio/low_mean": 0.0011236081500101136, + "clip_ratio/low_min": 3.420908615225926e-05, + "clip_ratio/region_mean": 0.002110329245624598, + "epoch": 9.559766763848396, + "grad_norm": 0.14760929346084595, + "learning_rate": 1e-06, + "loss": -0.0201, + "step": 924 + }, + { + "clip_ratio/high_max": 0.002754248365818057, + "clip_ratio/high_mean": 0.0012121745976401144, + "clip_ratio/low_mean": 0.0009409423691977281, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0021531169550144114, + "epoch": 9.569096209912537, + "grad_norm": 0.14264555275440216, + "learning_rate": 1e-06, + "loss": -0.0666, + "step": 925 + }, + { + "clip_ratio/high_max": 0.003279394790297374, + "clip_ratio/high_mean": 0.001171923639049055, + "clip_ratio/low_mean": 0.0010396044708613772, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0022115280662546866, + "epoch": 9.578425655976677, + "grad_norm": 0.1504513919353485, + "learning_rate": 1e-06, + "loss": 0.0008, + "step": 926 + }, + { + "clip_ratio/high_max": 0.002248778815555852, + "clip_ratio/high_mean": 0.0009007656153698917, + "clip_ratio/low_mean": 0.0009944847370206844, + "clip_ratio/low_min": 4.9996930101769976e-05, + "clip_ratio/region_mean": 0.001895250337838661, + "epoch": 9.587755102040816, + "grad_norm": 0.14505545794963837, + "learning_rate": 1e-06, + "loss": 0.0217, + "step": 927 + }, + { + "clip_ratio/high_max": 0.002946940003312193, + "clip_ratio/high_mean": 0.0011926191473321524, + "clip_ratio/low_mean": 0.0010505889367777854, + "clip_ratio/low_min": 7.204565736174118e-05, + "clip_ratio/region_mean": 0.0022432081241277047, + "epoch": 9.597084548104956, + "grad_norm": 0.13531829416751862, + "learning_rate": 1e-06, + "loss": -0.0584, + "step": 928 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0738699776785714, + "completions/max_length": 4096.0, + "completions/max_terminated_length": 4075.0, + "completions/mean_length": 805.7715454101562, + "completions/mean_terminated_length": 543.3363037109375, + "completions/min_length": 5.0, + "completions/min_terminated_length": 5.0, + "epoch": 9.606413994169095, + "grad_norm": 0.14112375676631927, + "learning_rate": 1e-06, + "loss": -0.0645, + "num_tokens": 543967325.0, + "reward": 0.6425432562828064, + "reward_std": 0.15433722734451294, + "rewards/simpleverify_reward/mean": 0.6425432562828064, + "rewards/simpleverify_reward/std": 0.4792592525482178, + "step": 929 + }, + { + "clip_ratio/high_max": 0.0020987102761864662, + "clip_ratio/high_mean": 0.0007994007464731112, + "clip_ratio/low_mean": 0.00048655450063961325, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0012859552480222192, + "epoch": 9.615743440233237, + "grad_norm": 0.15093941986560822, + "learning_rate": 1e-06, + "loss": -0.0253, + "step": 930 + }, + { + "clip_ratio/high_max": 0.0018653494771569967, + "clip_ratio/high_mean": 0.0007498073318856768, + "clip_ratio/low_mean": 0.00040532318053010385, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.001155130523329717, + "epoch": 9.625072886297376, + "grad_norm": 0.14469389617443085, + "learning_rate": 1e-06, + "loss": -0.0605, + "step": 931 + }, + { + "clip_ratio/high_max": 0.0017922082042787224, + "clip_ratio/high_mean": 0.0006247796482057311, + "clip_ratio/low_mean": 0.00044246219249544083, + "clip_ratio/low_min": 1.3423539712675847e-05, + "clip_ratio/region_mean": 0.0010672418247850146, + "epoch": 9.634402332361516, + "grad_norm": 0.13388541340827942, + "learning_rate": 1e-06, + "loss": 0.0017, + "step": 932 + }, + { + "clip_ratio/high_max": 0.0019075371528742835, + "clip_ratio/high_mean": 0.0007945345150801586, + "clip_ratio/low_mean": 0.0005729380218326696, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.00136747251599445, + "epoch": 9.643731778425655, + "grad_norm": 0.13685829937458038, + "learning_rate": 1e-06, + "loss": -0.0106, + "step": 933 + }, + { + "clip_ratio/high_max": 0.0017625001273700036, + "clip_ratio/high_mean": 0.0006951841442059958, + "clip_ratio/low_mean": 0.0005532228292395303, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0012484070102800615, + "epoch": 9.653061224489797, + "grad_norm": 0.13377708196640015, + "learning_rate": 1e-06, + "loss": -0.0358, + "step": 934 + }, + { + "clip_ratio/high_max": 0.0017062682309187949, + "clip_ratio/high_mean": 0.0006445129001804162, + "clip_ratio/low_mean": 0.0006428167789636063, + "clip_ratio/low_min": 3.075030690524727e-05, + "clip_ratio/region_mean": 0.0012873296909674536, + "epoch": 9.662390670553936, + "grad_norm": 0.1544046550989151, + "learning_rate": 1e-06, + "loss": 0.0065, + "step": 935 + }, + { + "clip_ratio/high_max": 0.0021471931104315445, + "clip_ratio/high_mean": 0.0008416242453677114, + "clip_ratio/low_mean": 0.0005645988157993997, + "clip_ratio/low_min": 1.0792608918563928e-05, + "clip_ratio/region_mean": 0.0014062230366107542, + "epoch": 9.671720116618076, + "grad_norm": 0.1447170525789261, + "learning_rate": 1e-06, + "loss": 0.0034, + "step": 936 + }, + { + "clip_ratio/high_max": 0.002280427162986598, + "clip_ratio/high_mean": 0.0007694852911299677, + "clip_ratio/low_mean": 0.0006271884849411435, + "clip_ratio/low_min": 4.1026994949788786e-05, + "clip_ratio/region_mean": 0.0013966737569717225, + "epoch": 9.681049562682215, + "grad_norm": 0.14356733858585358, + "learning_rate": 1e-06, + "loss": -0.0507, + "step": 937 + }, + { + "clip_ratio/high_max": 0.0023910022500786, + "clip_ratio/high_mean": 0.0008914276631912799, + "clip_ratio/low_mean": 0.0006133094175311271, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.001504737083450891, + "epoch": 9.690379008746355, + "grad_norm": 0.13298338651657104, + "learning_rate": 1e-06, + "loss": -0.0472, + "step": 938 + }, + { + "clip_ratio/high_max": 0.0021206866440479644, + "clip_ratio/high_mean": 0.000876473819516832, + "clip_ratio/low_mean": 0.0006826361877756426, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0015591100382152945, + "epoch": 9.699708454810496, + "grad_norm": 0.14761479198932648, + "learning_rate": 1e-06, + "loss": -0.0333, + "step": 939 + }, + { + "clip_ratio/high_max": 0.0022532403490913566, + "clip_ratio/high_mean": 0.0008637648279545829, + "clip_ratio/low_mean": 0.0007729321687293123, + "clip_ratio/low_min": 3.0113225875538774e-05, + "clip_ratio/region_mean": 0.0016366969648515806, + "epoch": 9.709037900874636, + "grad_norm": 0.14637258648872375, + "learning_rate": 1e-06, + "loss": -0.0069, + "step": 940 + }, + { + "clip_ratio/high_max": 0.0019963298182119615, + "clip_ratio/high_mean": 0.0007406541462842142, + "clip_ratio/low_mean": 0.0006486438278443529, + "clip_ratio/low_min": 1.6120711734401993e-05, + "clip_ratio/region_mean": 0.0013892979332013056, + "epoch": 9.718367346938775, + "grad_norm": 0.1345418095588684, + "learning_rate": 1e-06, + "loss": -0.0168, + "step": 941 + }, + { + "clip_ratio/high_max": 0.0027508915663929656, + "clip_ratio/high_mean": 0.0010285658572684042, + "clip_ratio/low_mean": 0.0005490175335580716, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0015775834253872745, + "epoch": 9.727696793002915, + "grad_norm": 0.12532925605773926, + "learning_rate": 1e-06, + "loss": -0.0575, + "step": 942 + }, + { + "clip_ratio/high_max": 0.002239793619082775, + "clip_ratio/high_mean": 0.0008860702728270553, + "clip_ratio/low_mean": 0.0006159189852041891, + "clip_ratio/low_min": 1.7322616258752532e-05, + "clip_ratio/region_mean": 0.001501989238022361, + "epoch": 9.737026239067056, + "grad_norm": 0.12550191581249237, + "learning_rate": 1e-06, + "loss": -0.0662, + "step": 943 + }, + { + "clip_ratio/high_max": 0.002336008237762144, + "clip_ratio/high_mean": 0.0008461864526907448, + "clip_ratio/low_mean": 0.0008429012323176721, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0016890876649995334, + "epoch": 9.746355685131196, + "grad_norm": 0.14688219130039215, + "learning_rate": 1e-06, + "loss": 0.0095, + "step": 944 + }, + { + "clip_ratio/high_max": 0.0022440484863182064, + "clip_ratio/high_mean": 0.0008293414812214905, + "clip_ratio/low_mean": 0.0007935643543532933, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0016229058164753951, + "epoch": 9.755685131195335, + "grad_norm": 0.1360652595758438, + "learning_rate": 1e-06, + "loss": -0.0094, + "step": 945 + }, + { + "clip_ratio/high_max": 0.0023962190389283933, + "clip_ratio/high_mean": 0.0009036373121489305, + "clip_ratio/low_mean": 0.0007466155984729994, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0016502528815180995, + "epoch": 9.765014577259475, + "grad_norm": 0.14179359376430511, + "learning_rate": 1e-06, + "loss": -0.048, + "step": 946 + }, + { + "clip_ratio/high_max": 0.0026654254470486194, + "clip_ratio/high_mean": 0.0010938354153040564, + "clip_ratio/low_mean": 0.0008020839327400608, + "clip_ratio/low_min": 4.934125900035724e-05, + "clip_ratio/region_mean": 0.0018959193766932003, + "epoch": 9.774344023323614, + "grad_norm": 0.1481010466814041, + "learning_rate": 1e-06, + "loss": -0.0376, + "step": 947 + }, + { + "clip_ratio/high_max": 0.0025731913483468816, + "clip_ratio/high_mean": 0.0009571427381160902, + "clip_ratio/low_mean": 0.0008804390326986322, + "clip_ratio/low_min": 4.955558506480884e-05, + "clip_ratio/region_mean": 0.0018375817744527012, + "epoch": 9.783673469387756, + "grad_norm": 0.17433428764343262, + "learning_rate": 1e-06, + "loss": 0.0002, + "step": 948 + }, + { + "clip_ratio/high_max": 0.0022778754209866747, + "clip_ratio/high_mean": 0.0009427083605260123, + "clip_ratio/low_mean": 0.0006592823738174047, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0016019907197915018, + "epoch": 9.793002915451895, + "grad_norm": 0.11593896895647049, + "learning_rate": 1e-06, + "loss": -0.079, + "step": 949 + }, + { + "clip_ratio/high_max": 0.002450108717312105, + "clip_ratio/high_mean": 0.000996564293927804, + "clip_ratio/low_mean": 0.0009397516132594319, + "clip_ratio/low_min": 3.1743858016852755e-05, + "clip_ratio/region_mean": 0.0019363159808563069, + "epoch": 9.802332361516035, + "grad_norm": 0.1490112692117691, + "learning_rate": 1e-06, + "loss": -0.0053, + "step": 950 + }, + { + "clip_ratio/high_max": 0.002661286336660851, + "clip_ratio/high_mean": 0.0010696067693061195, + "clip_ratio/low_mean": 0.0009575898402545135, + "clip_ratio/low_min": 2.5826446290011518e-05, + "clip_ratio/region_mean": 0.0020271966495784, + "epoch": 9.811661807580174, + "grad_norm": 0.14619016647338867, + "learning_rate": 1e-06, + "loss": -0.0569, + "step": 951 + }, + { + "clip_ratio/high_max": 0.002604166518722195, + "clip_ratio/high_mean": 0.001022584103338886, + "clip_ratio/low_mean": 0.000799032362010621, + "clip_ratio/low_min": 1.5574383724015206e-05, + "clip_ratio/region_mean": 0.0018216164826299064, + "epoch": 9.820991253644316, + "grad_norm": 0.14257757365703583, + "learning_rate": 1e-06, + "loss": -0.0578, + "step": 952 + }, + { + "clip_ratio/high_max": 0.0025133486633421853, + "clip_ratio/high_mean": 0.0009504987392574549, + "clip_ratio/low_mean": 0.0008947350024754996, + "clip_ratio/low_min": 4.656071541830897e-05, + "clip_ratio/region_mean": 0.0018452337681083009, + "epoch": 9.830320699708455, + "grad_norm": 0.13759483397006989, + "learning_rate": 1e-06, + "loss": -0.0167, + "step": 953 + }, + { + "clip_ratio/high_max": 0.0026442376256454736, + "clip_ratio/high_mean": 0.0010522497577767354, + "clip_ratio/low_mean": 0.0007457789815816795, + "clip_ratio/low_min": 4.582609744829824e-05, + "clip_ratio/region_mean": 0.0017980287229875103, + "epoch": 9.839650145772595, + "grad_norm": 0.149187371134758, + "learning_rate": 1e-06, + "loss": -0.055, + "step": 954 + }, + { + "clip_ratio/high_max": 0.0023868798598414287, + "clip_ratio/high_mean": 0.0009610945671738591, + "clip_ratio/low_mean": 0.000800088853793568, + "clip_ratio/low_min": 3.902185198967345e-05, + "clip_ratio/region_mean": 0.00176118341187248, + "epoch": 9.848979591836734, + "grad_norm": 0.13883022964000702, + "learning_rate": 1e-06, + "loss": -0.0438, + "step": 955 + }, + { + "clip_ratio/high_max": 0.0019565389593481086, + "clip_ratio/high_mean": 0.000855520960612921, + "clip_ratio/low_mean": 0.000916341752599692, + "clip_ratio/low_min": 1.6783029423095286e-05, + "clip_ratio/region_mean": 0.0017718627204885706, + "epoch": 9.858309037900874, + "grad_norm": 0.1580863744020462, + "learning_rate": 1e-06, + "loss": 0.0115, + "step": 956 + }, + { + "clip_ratio/high_max": 0.0021067335655970965, + "clip_ratio/high_mean": 0.000857573374560161, + "clip_ratio/low_mean": 0.0008684557051310549, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.001726029055134859, + "epoch": 9.867638483965015, + "grad_norm": 0.14364555478096008, + "learning_rate": 1e-06, + "loss": -0.0197, + "step": 957 + }, + { + "clip_ratio/high_max": 0.002229661498859059, + "clip_ratio/high_mean": 0.0009492823483014945, + "clip_ratio/low_mean": 0.0008248334534073365, + "clip_ratio/low_min": 6.832440703874454e-05, + "clip_ratio/region_mean": 0.0017741157826094422, + "epoch": 9.876967930029155, + "grad_norm": 0.13506869971752167, + "learning_rate": 1e-06, + "loss": -0.0274, + "step": 958 + }, + { + "clip_ratio/high_max": 0.0022754226156393997, + "clip_ratio/high_mean": 0.0010122565799974836, + "clip_ratio/low_mean": 0.000683598967953003, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0016958555206656456, + "epoch": 9.886297376093294, + "grad_norm": 0.14167319238185883, + "learning_rate": 1e-06, + "loss": -0.0589, + "step": 959 + }, + { + "clip_ratio/high_max": 0.002559137537900824, + "clip_ratio/high_mean": 0.001125937244069064, + "clip_ratio/low_mean": 0.0008741367091715802, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0020000739677925594, + "epoch": 9.895626822157434, + "grad_norm": 0.13148806989192963, + "learning_rate": 1e-06, + "loss": -0.0495, + "step": 960 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0765904017857143, + "completions/max_length": 4096.0, + "completions/max_terminated_length": 4091.0, + "completions/mean_length": 815.6360473632812, + "completions/mean_terminated_length": 543.5526733398438, + "completions/min_length": 25.0, + "completions/min_terminated_length": 25.0, + "epoch": 10.00932944606414, + "grad_norm": 0.1500326693058014, + "learning_rate": 1e-06, + "loss": -0.0043, + "num_tokens": 561264962.0, + "reward": 0.6439732313156128, + "reward_std": 0.15593308210372925, + "rewards/simpleverify_reward/mean": 0.6439732313156128, + "rewards/simpleverify_reward/std": 0.47883158922195435, + "step": 961 + }, + { + "clip_ratio/high_max": 0.001909110196720576, + "clip_ratio/high_mean": 0.0006390837424987694, + "clip_ratio/low_mean": 0.00045998847508599283, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0010990722221322358, + "epoch": 10.018658892128279, + "grad_norm": 0.12861394882202148, + "learning_rate": 1e-06, + "loss": -0.0153, + "step": 962 + }, + { + "clip_ratio/high_max": 0.001726972492178902, + "clip_ratio/high_mean": 0.0006959185730011086, + "clip_ratio/low_mean": 0.0004754106967084226, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.00117132927334751, + "epoch": 10.02798833819242, + "grad_norm": 0.14925359189510345, + "learning_rate": 1e-06, + "loss": -0.0361, + "step": 963 + }, + { + "clip_ratio/high_max": 0.0020835115428781137, + "clip_ratio/high_mean": 0.0007047521248750854, + "clip_ratio/low_mean": 0.00050725807705021, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0012120101964683272, + "epoch": 10.03731778425656, + "grad_norm": 0.1678958386182785, + "learning_rate": 1e-06, + "loss": -0.0414, + "step": 964 + }, + { + "clip_ratio/high_max": 0.0018111032077285927, + "clip_ratio/high_mean": 0.0007327352122956654, + "clip_ratio/low_mean": 0.0005728224514314206, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0013055576564511284, + "epoch": 10.0466472303207, + "grad_norm": 0.17154543101787567, + "learning_rate": 1e-06, + "loss": -0.0272, + "step": 965 + }, + { + "clip_ratio/high_max": 0.0022328296727209818, + "clip_ratio/high_mean": 0.0007948934726300649, + "clip_ratio/low_mean": 0.0005207437361605116, + "clip_ratio/low_min": 1.6356974811060354e-05, + "clip_ratio/region_mean": 0.001315637222432997, + "epoch": 10.055976676384839, + "grad_norm": 0.14976218342781067, + "learning_rate": 1e-06, + "loss": -0.0237, + "step": 966 + }, + { + "clip_ratio/high_max": 0.0021570081589743495, + "clip_ratio/high_mean": 0.0008220616673497716, + "clip_ratio/low_mean": 0.0005323067780409474, + "clip_ratio/low_min": 1.504573901911499e-05, + "clip_ratio/region_mean": 0.0013543684217438567, + "epoch": 10.06530612244898, + "grad_norm": 0.1393507421016693, + "learning_rate": 1e-06, + "loss": -0.0162, + "step": 967 + }, + { + "clip_ratio/high_max": 0.0016665338625898585, + "clip_ratio/high_mean": 0.0006571479025296867, + "clip_ratio/low_mean": 0.0006993826282268856, + "clip_ratio/low_min": 3.510578608256765e-05, + "clip_ratio/region_mean": 0.0013565305198426358, + "epoch": 10.07463556851312, + "grad_norm": 0.14176590740680695, + "learning_rate": 1e-06, + "loss": -0.0061, + "step": 968 + }, + { + "clip_ratio/high_max": 0.0020020450137963053, + "clip_ratio/high_mean": 0.0007357875847446849, + "clip_ratio/low_mean": 0.0006004850783938309, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0013362726676859893, + "epoch": 10.08396501457726, + "grad_norm": 0.13857552409172058, + "learning_rate": 1e-06, + "loss": 0.0088, + "step": 969 + }, + { + "clip_ratio/high_max": 0.0020954376086592674, + "clip_ratio/high_mean": 0.0008055659618548816, + "clip_ratio/low_mean": 0.0006883723835926503, + "clip_ratio/low_min": 4.625426026905188e-05, + "clip_ratio/region_mean": 0.0014939383763703518, + "epoch": 10.093294460641399, + "grad_norm": 0.13805639743804932, + "learning_rate": 1e-06, + "loss": -0.0118, + "step": 970 + }, + { + "clip_ratio/high_max": 0.0020044581760885194, + "clip_ratio/high_mean": 0.0008033949143282371, + "clip_ratio/low_mean": 0.0006440322326852765, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0014474271447397768, + "epoch": 10.102623906705539, + "grad_norm": 0.15415208041667938, + "learning_rate": 1e-06, + "loss": -0.0237, + "step": 971 + }, + { + "clip_ratio/high_max": 0.0021812322447658516, + "clip_ratio/high_mean": 0.0008289018569485052, + "clip_ratio/low_mean": 0.00048655639693606645, + "clip_ratio/low_min": 1.7189218851854093e-05, + "clip_ratio/region_mean": 0.0013154582702554762, + "epoch": 10.11195335276968, + "grad_norm": 0.14797736704349518, + "learning_rate": 1e-06, + "loss": -0.0567, + "step": 972 + }, + { + "clip_ratio/high_max": 0.0020825552601309028, + "clip_ratio/high_mean": 0.0008909320931707043, + "clip_ratio/low_mean": 0.0007273344635905232, + "clip_ratio/low_min": 1.7610595023143105e-05, + "clip_ratio/region_mean": 0.0016182665895030368, + "epoch": 10.12128279883382, + "grad_norm": 0.15112736821174622, + "learning_rate": 1e-06, + "loss": -0.047, + "step": 973 + }, + { + "clip_ratio/high_max": 0.00220344615081558, + "clip_ratio/high_mean": 0.0009626862138247816, + "clip_ratio/low_mean": 0.0007337629031098913, + "clip_ratio/low_min": 1.0976466910506133e-05, + "clip_ratio/region_mean": 0.001696449086011853, + "epoch": 10.130612244897959, + "grad_norm": 0.1311543732881546, + "learning_rate": 1e-06, + "loss": -0.0352, + "step": 974 + }, + { + "clip_ratio/high_max": 0.002281056142237503, + "clip_ratio/high_mean": 0.0008180264849215746, + "clip_ratio/low_mean": 0.0008300371300720144, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.001648063614993589, + "epoch": 10.139941690962099, + "grad_norm": 0.15632712841033936, + "learning_rate": 1e-06, + "loss": 0.0079, + "step": 975 + }, + { + "clip_ratio/high_max": 0.0021112567228556145, + "clip_ratio/high_mean": 0.0008205298727261834, + "clip_ratio/low_mean": 0.0007860472078391467, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0016065770796558354, + "epoch": 10.14927113702624, + "grad_norm": 0.1610146015882492, + "learning_rate": 1e-06, + "loss": 0.0208, + "step": 976 + }, + { + "clip_ratio/high_max": 0.002420800185063854, + "clip_ratio/high_mean": 0.0010541762094362639, + "clip_ratio/low_mean": 0.000777995390308206, + "clip_ratio/low_min": 3.944130912714172e-05, + "clip_ratio/region_mean": 0.0018321715797355864, + "epoch": 10.15860058309038, + "grad_norm": 0.15158908069133759, + "learning_rate": 1e-06, + "loss": -0.0482, + "step": 977 + }, + { + "clip_ratio/high_max": 0.0025289334371336736, + "clip_ratio/high_mean": 0.001032315978591214, + "clip_ratio/low_mean": 0.0007892725134297507, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0018215884956589434, + "epoch": 10.167930029154519, + "grad_norm": 0.1517321765422821, + "learning_rate": 1e-06, + "loss": -0.0449, + "step": 978 + }, + { + "clip_ratio/high_max": 0.00251186663081171, + "clip_ratio/high_mean": 0.0009472684323554859, + "clip_ratio/low_mean": 0.0010198395466431975, + "clip_ratio/low_min": 6.164200749481097e-05, + "clip_ratio/region_mean": 0.001967107964446768, + "epoch": 10.177259475218658, + "grad_norm": 0.17969931662082672, + "learning_rate": 1e-06, + "loss": -0.023, + "step": 979 + }, + { + "clip_ratio/high_max": 0.0024426607706118375, + "clip_ratio/high_mean": 0.0010387922484369483, + "clip_ratio/low_mean": 0.0008045482863963116, + "clip_ratio/low_min": 1.3954007954453118e-05, + "clip_ratio/region_mean": 0.0018433404984534718, + "epoch": 10.186588921282798, + "grad_norm": 0.13014066219329834, + "learning_rate": 1e-06, + "loss": -0.036, + "step": 980 + }, + { + "clip_ratio/high_max": 0.0023669466099818237, + "clip_ratio/high_mean": 0.0009498952094872948, + "clip_ratio/low_mean": 0.0007533242551289732, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0017032194300554693, + "epoch": 10.19591836734694, + "grad_norm": 0.140021413564682, + "learning_rate": 1e-06, + "loss": -0.0382, + "step": 981 + }, + { + "clip_ratio/high_max": 0.0024754825062700547, + "clip_ratio/high_mean": 0.0010548727586865425, + "clip_ratio/low_mean": 0.000770688675402198, + "clip_ratio/low_min": 4.954419819114264e-05, + "clip_ratio/region_mean": 0.0018255614413646981, + "epoch": 10.205247813411079, + "grad_norm": 0.16048754751682281, + "learning_rate": 1e-06, + "loss": -0.0356, + "step": 982 + }, + { + "clip_ratio/high_max": 0.0026544872162048705, + "clip_ratio/high_mean": 0.0011467996628198307, + "clip_ratio/low_mean": 0.0007798313927196432, + "clip_ratio/low_min": 4.544352850643918e-05, + "clip_ratio/region_mean": 0.0019266310628154315, + "epoch": 10.214577259475218, + "grad_norm": 0.14217230677604675, + "learning_rate": 1e-06, + "loss": -0.103, + "step": 983 + }, + { + "clip_ratio/high_max": 0.002223150913778227, + "clip_ratio/high_mean": 0.0009643569683248643, + "clip_ratio/low_mean": 0.0008032557434489718, + "clip_ratio/low_min": 6.634566125285346e-05, + "clip_ratio/region_mean": 0.0017676126954029314, + "epoch": 10.223906705539358, + "grad_norm": 0.14624422788619995, + "learning_rate": 1e-06, + "loss": -0.0517, + "step": 984 + }, + { + "clip_ratio/high_max": 0.0025765645259525627, + "clip_ratio/high_mean": 0.0010526941732678097, + "clip_ratio/low_mean": 0.0008813243184704334, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.001934018473548349, + "epoch": 10.2332361516035, + "grad_norm": 0.14733164012432098, + "learning_rate": 1e-06, + "loss": -0.0382, + "step": 985 + }, + { + "clip_ratio/high_max": 0.0025515191155136563, + "clip_ratio/high_mean": 0.0010626111306919483, + "clip_ratio/low_mean": 0.0009340102224086877, + "clip_ratio/low_min": 4.5101674913894385e-05, + "clip_ratio/region_mean": 0.0019966213731095195, + "epoch": 10.242565597667639, + "grad_norm": 0.146484836935997, + "learning_rate": 1e-06, + "loss": -0.0013, + "step": 986 + }, + { + "clip_ratio/high_max": 0.002395473995420616, + "clip_ratio/high_mean": 0.0009680404637038009, + "clip_ratio/low_mean": 0.0007183443294707104, + "clip_ratio/low_min": 1.2942638022650499e-05, + "clip_ratio/region_mean": 0.0016863847995409742, + "epoch": 10.251895043731778, + "grad_norm": 0.14619509875774384, + "learning_rate": 1e-06, + "loss": -0.067, + "step": 987 + }, + { + "clip_ratio/high_max": 0.0019169974184478633, + "clip_ratio/high_mean": 0.0008501538814016385, + "clip_ratio/low_mean": 0.0009479271257077926, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.001798080986191053, + "epoch": 10.261224489795918, + "grad_norm": 0.13349026441574097, + "learning_rate": 1e-06, + "loss": -0.0003, + "step": 988 + }, + { + "clip_ratio/high_max": 0.002456573558447417, + "clip_ratio/high_mean": 0.0011083521458203904, + "clip_ratio/low_mean": 0.0006360037540389385, + "clip_ratio/low_min": 1.4082920642977115e-05, + "clip_ratio/region_mean": 0.0017443558826926164, + "epoch": 10.270553935860057, + "grad_norm": 0.1264098435640335, + "learning_rate": 1e-06, + "loss": -0.1013, + "step": 989 + }, + { + "clip_ratio/high_max": 0.002665226122189779, + "clip_ratio/high_mean": 0.001071601793228183, + "clip_ratio/low_mean": 0.001032287153066136, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0021038888953626156, + "epoch": 10.279883381924199, + "grad_norm": 0.15136873722076416, + "learning_rate": 1e-06, + "loss": 0.0035, + "step": 990 + }, + { + "clip_ratio/high_max": 0.002894303062930703, + "clip_ratio/high_mean": 0.001205394135467941, + "clip_ratio/low_mean": 0.0009282167047786061, + "clip_ratio/low_min": 3.288608422735706e-05, + "clip_ratio/region_mean": 0.002133610840246547, + "epoch": 10.289212827988338, + "grad_norm": 0.1624498814344406, + "learning_rate": 1e-06, + "loss": -0.0426, + "step": 991 + }, + { + "clip_ratio/high_max": 0.002337928242923226, + "clip_ratio/high_mean": 0.0010890516423387453, + "clip_ratio/low_mean": 0.0009289082154282369, + "clip_ratio/low_min": 5.011562643630896e-05, + "clip_ratio/region_mean": 0.0020179598650429398, + "epoch": 10.298542274052478, + "grad_norm": 0.13214270770549774, + "learning_rate": 1e-06, + "loss": -0.0424, + "step": 992 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.074951171875, + "completions/max_length": 4096.0, + "completions/max_terminated_length": 4039.0, + "completions/mean_length": 799.0311889648438, + "completions/mean_terminated_length": 531.8975219726562, + "completions/min_length": 102.0, + "completions/min_terminated_length": 102.0, + "epoch": 10.307871720116617, + "grad_norm": 0.13746856153011322, + "learning_rate": 1e-06, + "loss": -0.029, + "num_tokens": 578202624.0, + "reward": 0.6584821939468384, + "reward_std": 0.14528833329677582, + "rewards/simpleverify_reward/mean": 0.6584821343421936, + "rewards/simpleverify_reward/std": 0.47422701120376587, + "step": 993 + }, + { + "clip_ratio/high_max": 0.0015481885375265847, + "clip_ratio/high_mean": 0.0006157521645491215, + "clip_ratio/low_mean": 0.00044636771599471103, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0010621198707667645, + "epoch": 10.317201166180759, + "grad_norm": 0.14652684330940247, + "learning_rate": 1e-06, + "loss": -0.0046, + "step": 994 + }, + { + "clip_ratio/high_max": 0.0014798309166508261, + "clip_ratio/high_mean": 0.0005467086066346383, + "clip_ratio/low_mean": 0.0004052429840157856, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0009519515879219398, + "epoch": 10.326530612244898, + "grad_norm": 0.13823182880878448, + "learning_rate": 1e-06, + "loss": -0.03, + "step": 995 + }, + { + "clip_ratio/high_max": 0.001953846891410649, + "clip_ratio/high_mean": 0.0008146686814143322, + "clip_ratio/low_mean": 0.0004966830947523704, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.001311351777985692, + "epoch": 10.335860058309038, + "grad_norm": 0.16025929152965546, + "learning_rate": 1e-06, + "loss": -0.0394, + "step": 996 + }, + { + "clip_ratio/high_max": 0.0020486487228481565, + "clip_ratio/high_mean": 0.0006944371525605675, + "clip_ratio/low_mean": 0.0004157131415922777, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.001110150309614255, + "epoch": 10.345189504373177, + "grad_norm": 0.17115995287895203, + "learning_rate": 1e-06, + "loss": -0.0344, + "step": 997 + }, + { + "clip_ratio/high_max": 0.0016736180223233532, + "clip_ratio/high_mean": 0.0005873534410056891, + "clip_ratio/low_mean": 0.000505985150084598, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0010933385747193824, + "epoch": 10.354518950437317, + "grad_norm": 0.15793941915035248, + "learning_rate": 1e-06, + "loss": 0.0112, + "step": 998 + }, + { + "clip_ratio/high_max": 0.0020567545798257925, + "clip_ratio/high_mean": 0.0008095356388366781, + "clip_ratio/low_mean": 0.00044864863411930855, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0012581843002408277, + "epoch": 10.363848396501458, + "grad_norm": 0.13997121155261993, + "learning_rate": 1e-06, + "loss": -0.0425, + "step": 999 + }, + { + "clip_ratio/high_max": 0.001971263758605346, + "clip_ratio/high_mean": 0.0007790296240273165, + "clip_ratio/low_mean": 0.0005122356722040422, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.001291265331019531, + "epoch": 10.373177842565598, + "grad_norm": 0.13614171743392944, + "learning_rate": 1e-06, + "loss": -0.0523, + "step": 1000 + }, + { + "clip_ratio/high_max": 0.0017270641328650527, + "clip_ratio/high_mean": 0.0006994562040745222, + "clip_ratio/low_mean": 0.0005206879040997592, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0012201440931676188, + "epoch": 10.382507288629737, + "grad_norm": 0.1247684508562088, + "learning_rate": 1e-06, + "loss": -0.0098, + "step": 1001 + }, + { + "clip_ratio/high_max": 0.0020272995679988526, + "clip_ratio/high_mean": 0.0008106346303975442, + "clip_ratio/low_mean": 0.0007284987614184502, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0015391333799925633, + "epoch": 10.391836734693877, + "grad_norm": 0.1570490151643753, + "learning_rate": 1e-06, + "loss": -0.0212, + "step": 1002 + }, + { + "clip_ratio/high_max": 0.002310659623617539, + "clip_ratio/high_mean": 0.000947707203522441, + "clip_ratio/low_mean": 0.0005226368625699251, + "clip_ratio/low_min": 1.7085838408092968e-05, + "clip_ratio/region_mean": 0.001470344082918018, + "epoch": 10.401166180758018, + "grad_norm": 0.13579373061656952, + "learning_rate": 1e-06, + "loss": -0.0647, + "step": 1003 + }, + { + "clip_ratio/high_max": 0.002140488439181354, + "clip_ratio/high_mean": 0.0007807352449162863, + "clip_ratio/low_mean": 0.0006728586513418122, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0014535938717017416, + "epoch": 10.410495626822158, + "grad_norm": 0.16249175369739532, + "learning_rate": 1e-06, + "loss": -0.0038, + "step": 1004 + }, + { + "clip_ratio/high_max": 0.0019618572914623655, + "clip_ratio/high_mean": 0.0007476367372873938, + "clip_ratio/low_mean": 0.0006206943739925919, + "clip_ratio/low_min": 2.3062730178935453e-05, + "clip_ratio/region_mean": 0.001368331093544839, + "epoch": 10.419825072886297, + "grad_norm": 0.13393568992614746, + "learning_rate": 1e-06, + "loss": -0.0501, + "step": 1005 + }, + { + "clip_ratio/high_max": 0.002194969914853573, + "clip_ratio/high_mean": 0.0008562507773604011, + "clip_ratio/low_mean": 0.0007611364399053855, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0016173872318177018, + "epoch": 10.429154518950437, + "grad_norm": 0.1739315390586853, + "learning_rate": 1e-06, + "loss": -0.0046, + "step": 1006 + }, + { + "clip_ratio/high_max": 0.002169353683711961, + "clip_ratio/high_mean": 0.0008425281757808989, + "clip_ratio/low_mean": 0.0008274836281998432, + "clip_ratio/low_min": 1.9531249563442543e-05, + "clip_ratio/region_mean": 0.001670011792157311, + "epoch": 10.438483965014576, + "grad_norm": 0.13166306912899017, + "learning_rate": 1e-06, + "loss": 0.0061, + "step": 1007 + }, + { + "clip_ratio/high_max": 0.0024623638528282754, + "clip_ratio/high_mean": 0.0008831531267787796, + "clip_ratio/low_mean": 0.0006474850888480432, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.001530638248368632, + "epoch": 10.447813411078718, + "grad_norm": 0.14744344353675842, + "learning_rate": 1e-06, + "loss": -0.045, + "step": 1008 + }, + { + "clip_ratio/high_max": 0.0020738662824442144, + "clip_ratio/high_mean": 0.0008392149939027149, + "clip_ratio/low_mean": 0.0006633309924382047, + "clip_ratio/low_min": 3.963221388403326e-05, + "clip_ratio/region_mean": 0.001502545925177401, + "epoch": 10.457142857142857, + "grad_norm": 0.13794247806072235, + "learning_rate": 1e-06, + "loss": -0.0291, + "step": 1009 + }, + { + "clip_ratio/high_max": 0.0019332764422870241, + "clip_ratio/high_mean": 0.000819532713649096, + "clip_ratio/low_mean": 0.0006383089366863715, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0014578416448784992, + "epoch": 10.466472303206997, + "grad_norm": 0.1313249170780182, + "learning_rate": 1e-06, + "loss": -0.028, + "step": 1010 + }, + { + "clip_ratio/high_max": 0.0024970816666609608, + "clip_ratio/high_mean": 0.0010146628519578371, + "clip_ratio/low_mean": 0.0006212481612237752, + "clip_ratio/low_min": 1.6425756257376634e-05, + "clip_ratio/region_mean": 0.001635911063203821, + "epoch": 10.475801749271136, + "grad_norm": 0.15788981318473816, + "learning_rate": 1e-06, + "loss": -0.0315, + "step": 1011 + }, + { + "clip_ratio/high_max": 0.001872238477517385, + "clip_ratio/high_mean": 0.000791782209489611, + "clip_ratio/low_mean": 0.0007423156257573282, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0015340978534368332, + "epoch": 10.485131195335278, + "grad_norm": 0.12612873315811157, + "learning_rate": 1e-06, + "loss": -0.0201, + "step": 1012 + }, + { + "clip_ratio/high_max": 0.002122684250934981, + "clip_ratio/high_mean": 0.0009315368479292374, + "clip_ratio/low_mean": 0.0006933887671038974, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.00162492562958505, + "epoch": 10.494460641399417, + "grad_norm": 0.13648264110088348, + "learning_rate": 1e-06, + "loss": -0.0476, + "step": 1013 + }, + { + "clip_ratio/high_max": 0.0026277231590938754, + "clip_ratio/high_mean": 0.0010553726569924038, + "clip_ratio/low_mean": 0.0007376682660833467, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0017930409012478776, + "epoch": 10.503790087463557, + "grad_norm": 0.15639662742614746, + "learning_rate": 1e-06, + "loss": -0.0353, + "step": 1014 + }, + { + "clip_ratio/high_max": 0.0022894211288075894, + "clip_ratio/high_mean": 0.0008977182860689936, + "clip_ratio/low_mean": 0.0006727097893417522, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0015704280922363978, + "epoch": 10.513119533527696, + "grad_norm": 0.1324109584093094, + "learning_rate": 1e-06, + "loss": -0.047, + "step": 1015 + }, + { + "clip_ratio/high_max": 0.0024130656456691213, + "clip_ratio/high_mean": 0.0010579552144918125, + "clip_ratio/low_mean": 0.0007388108697341522, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0017967660933209118, + "epoch": 10.522448979591836, + "grad_norm": 0.1491599828004837, + "learning_rate": 1e-06, + "loss": -0.0614, + "step": 1016 + }, + { + "clip_ratio/high_max": 0.002259333457914181, + "clip_ratio/high_mean": 0.0008822530953693786, + "clip_ratio/low_mean": 0.0005619981329800794, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0014442512001551222, + "epoch": 10.531778425655977, + "grad_norm": 0.12334480881690979, + "learning_rate": 1e-06, + "loss": -0.0539, + "step": 1017 + }, + { + "clip_ratio/high_max": 0.002269917298690416, + "clip_ratio/high_mean": 0.0009123131312662736, + "clip_ratio/low_mean": 0.0007771314358251402, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.001689444536168594, + "epoch": 10.541107871720117, + "grad_norm": 0.13667048513889313, + "learning_rate": 1e-06, + "loss": -0.0554, + "step": 1018 + }, + { + "clip_ratio/high_max": 0.0021292364071996417, + "clip_ratio/high_mean": 0.0008424350307905115, + "clip_ratio/low_mean": 0.0006537908955124294, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0014962259156163782, + "epoch": 10.550437317784256, + "grad_norm": 0.12534432113170624, + "learning_rate": 1e-06, + "loss": -0.0401, + "step": 1019 + }, + { + "clip_ratio/high_max": 0.002169070368836401, + "clip_ratio/high_mean": 0.0008813484819256701, + "clip_ratio/low_mean": 0.0007044645008136285, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0015858130136621185, + "epoch": 10.559766763848396, + "grad_norm": 0.15574602782726288, + "learning_rate": 1e-06, + "loss": -0.0144, + "step": 1020 + }, + { + "clip_ratio/high_max": 0.002250806392112281, + "clip_ratio/high_mean": 0.0009773394449439365, + "clip_ratio/low_mean": 0.0006898111887494451, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0016671506164129823, + "epoch": 10.569096209912537, + "grad_norm": 0.15688715875148773, + "learning_rate": 1e-06, + "loss": -0.0297, + "step": 1021 + }, + { + "clip_ratio/high_max": 0.002465681689500343, + "clip_ratio/high_mean": 0.0009920224474626593, + "clip_ratio/low_mean": 0.0007774367732054088, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0017694592170300893, + "epoch": 10.578425655976677, + "grad_norm": 0.13506655395030975, + "learning_rate": 1e-06, + "loss": -0.0508, + "step": 1022 + }, + { + "clip_ratio/high_max": 0.0027156087235198356, + "clip_ratio/high_mean": 0.001017479526126408, + "clip_ratio/low_mean": 0.0008608054013166111, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.001878284921986051, + "epoch": 10.587755102040816, + "grad_norm": 0.14973346889019012, + "learning_rate": 1e-06, + "loss": -0.0209, + "step": 1023 + }, + { + "clip_ratio/high_max": 0.002389958310232032, + "clip_ratio/high_mean": 0.0010566878281679237, + "clip_ratio/low_mean": 0.0008665779268994811, + "clip_ratio/low_min": 4.5609340304508805e-05, + "clip_ratio/region_mean": 0.0019232657468819525, + "epoch": 10.597084548104956, + "grad_norm": 11.152640342712402, + "learning_rate": 1e-06, + "loss": -0.0647, + "step": 1024 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0752650669642857, + "completions/max_length": 4096.0, + "completions/max_terminated_length": 4062.0, + "completions/mean_length": 803.6248168945312, + "completions/mean_terminated_length": 535.6552124023438, + "completions/min_length": 98.0, + "completions/min_terminated_length": 98.0, + "epoch": 10.606413994169095, + "grad_norm": 0.1410028487443924, + "learning_rate": 1e-06, + "loss": 0.0005, + "num_tokens": 595257418.0, + "reward": 0.6659110188484192, + "reward_std": 0.15039822459220886, + "rewards/simpleverify_reward/mean": 0.6659110188484192, + "rewards/simpleverify_reward/std": 0.4716792404651642, + "step": 1025 + }, + { + "clip_ratio/high_max": 0.002021494052314665, + "clip_ratio/high_mean": 0.0007912164637673413, + "clip_ratio/low_mean": 0.00037528156758526166, + "clip_ratio/low_min": 1.2195121598779224e-05, + "clip_ratio/region_mean": 0.0011664980265777558, + "epoch": 10.615743440233237, + "grad_norm": 0.14194558560848236, + "learning_rate": 1e-06, + "loss": -0.0791, + "step": 1026 + }, + { + "clip_ratio/high_max": 0.0018872605214710347, + "clip_ratio/high_mean": 0.0007259628055180656, + "clip_ratio/low_mean": 0.00042551893329800805, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0011514817488205153, + "epoch": 10.625072886297376, + "grad_norm": 0.14285390079021454, + "learning_rate": 1e-06, + "loss": -0.0176, + "step": 1027 + }, + { + "clip_ratio/high_max": 0.0016201781836571172, + "clip_ratio/high_mean": 0.0006363311986206099, + "clip_ratio/low_mean": 0.00041314868667541305, + "clip_ratio/low_min": 1.901429823192302e-05, + "clip_ratio/region_mean": 0.0010494798443687614, + "epoch": 10.634402332361516, + "grad_norm": 0.13606666028499603, + "learning_rate": 1e-06, + "loss": -0.0602, + "step": 1028 + }, + { + "clip_ratio/high_max": 0.002066865847154986, + "clip_ratio/high_mean": 0.0008451949615846388, + "clip_ratio/low_mean": 0.00043474988342495635, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0012799448377336375, + "epoch": 10.643731778425655, + "grad_norm": 0.13159887492656708, + "learning_rate": 1e-06, + "loss": -0.0591, + "step": 1029 + }, + { + "clip_ratio/high_max": 0.002269327269459609, + "clip_ratio/high_mean": 0.0009070521882676985, + "clip_ratio/low_mean": 0.00044856136355519993, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0013556135527323931, + "epoch": 10.653061224489797, + "grad_norm": 0.14049817621707916, + "learning_rate": 1e-06, + "loss": -0.0614, + "step": 1030 + }, + { + "clip_ratio/high_max": 0.0023682578103034757, + "clip_ratio/high_mean": 0.0008562794646422844, + "clip_ratio/low_mean": 0.0005246619630270288, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0013809413976559881, + "epoch": 10.662390670553936, + "grad_norm": 0.15171289443969727, + "learning_rate": 1e-06, + "loss": 0.012, + "step": 1031 + }, + { + "clip_ratio/high_max": 0.0021879767955397256, + "clip_ratio/high_mean": 0.0007816203960828716, + "clip_ratio/low_mean": 0.0005043730348006648, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0012859934104199056, + "epoch": 10.671720116618076, + "grad_norm": 0.15205611288547516, + "learning_rate": 1e-06, + "loss": -0.0093, + "step": 1032 + }, + { + "clip_ratio/high_max": 0.0019532801998138893, + "clip_ratio/high_mean": 0.0007974514246598119, + "clip_ratio/low_mean": 0.0005037566470491583, + "clip_ratio/low_min": 1.664447336224839e-05, + "clip_ratio/region_mean": 0.0013012080526095815, + "epoch": 10.681049562682215, + "grad_norm": 15584.228515625, + "learning_rate": 1e-06, + "loss": 0.5, + "step": 1033 + }, + { + "clip_ratio/high_max": 0.0018114284139301162, + "clip_ratio/high_mean": 0.0007672809588257223, + "clip_ratio/low_mean": 0.0005547672235479695, + "clip_ratio/low_min": 1.0955302059301175e-05, + "clip_ratio/region_mean": 0.0013220481705502607, + "epoch": 10.690379008746355, + "grad_norm": 0.171482652425766, + "learning_rate": 1e-06, + "loss": -0.0325, + "step": 1034 + }, + { + "clip_ratio/high_max": 0.0020000563999929, + "clip_ratio/high_mean": 0.0007531167448178167, + "clip_ratio/low_mean": 0.0004810703903785907, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.001234187151567312, + "epoch": 10.699708454810496, + "grad_norm": 0.2236158698797226, + "learning_rate": 1e-06, + "loss": -0.0238, + "step": 1035 + }, + { + "clip_ratio/high_max": 0.0020849624852417037, + "clip_ratio/high_mean": 0.0008802694610494655, + "clip_ratio/low_mean": 0.0006394156494025083, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0015196851218206575, + "epoch": 10.709037900874636, + "grad_norm": 0.5721369981765747, + "learning_rate": 1e-06, + "loss": -0.0393, + "step": 1036 + }, + { + "clip_ratio/high_max": 0.0017929532914422452, + "clip_ratio/high_mean": 0.0008235112454713089, + "clip_ratio/low_mean": 0.0007058635137582314, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0015293747892428655, + "epoch": 10.718367346938775, + "grad_norm": 0.14972050487995148, + "learning_rate": 1e-06, + "loss": 0.0004, + "step": 1037 + }, + { + "clip_ratio/high_max": 0.002275098788231844, + "clip_ratio/high_mean": 0.00086568585538771, + "clip_ratio/low_mean": 0.0006933855574970949, + "clip_ratio/low_min": 4.1519044316373765e-05, + "clip_ratio/region_mean": 0.0015590714174322784, + "epoch": 10.727696793002915, + "grad_norm": 0.14840513467788696, + "learning_rate": 1e-06, + "loss": -0.0305, + "step": 1038 + }, + { + "clip_ratio/high_max": 0.002317817572475178, + "clip_ratio/high_mean": 0.0008742880145291565, + "clip_ratio/low_mean": 0.00068940183155064, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0015636898024240509, + "epoch": 10.737026239067056, + "grad_norm": 0.8079626560211182, + "learning_rate": 1e-06, + "loss": 0.0016, + "step": 1039 + }, + { + "clip_ratio/high_max": 0.0022558583441423252, + "clip_ratio/high_mean": 0.0009273416599171469, + "clip_ratio/low_mean": 0.0007737451942375628, + "clip_ratio/low_min": 1.4507892046822235e-05, + "clip_ratio/region_mean": 0.0017010868796205614, + "epoch": 10.746355685131196, + "grad_norm": 0.14360742270946503, + "learning_rate": 1e-06, + "loss": -0.036, + "step": 1040 + }, + { + "clip_ratio/high_max": 0.0023380135316983797, + "clip_ratio/high_mean": 0.0010714017262216657, + "clip_ratio/low_mean": 0.0007289969998964807, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0018003987133852206, + "epoch": 10.755685131195335, + "grad_norm": 1.0572714805603027, + "learning_rate": 1e-06, + "loss": -0.0315, + "step": 1041 + }, + { + "clip_ratio/high_max": 0.0025086066671065055, + "clip_ratio/high_mean": 0.0009457175438001286, + "clip_ratio/low_mean": 0.0006730296145178727, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0016187471919693053, + "epoch": 10.765014577259475, + "grad_norm": 0.13252829015254974, + "learning_rate": 1e-06, + "loss": -0.0452, + "step": 1042 + }, + { + "clip_ratio/high_max": 0.002531970283598639, + "clip_ratio/high_mean": 0.0009256004741473589, + "clip_ratio/low_mean": 0.0007404327625408769, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0016660332257742994, + "epoch": 10.774344023323614, + "grad_norm": 0.1442527323961258, + "learning_rate": 1e-06, + "loss": -0.0367, + "step": 1043 + }, + { + "clip_ratio/high_max": 0.0022089687918196432, + "clip_ratio/high_mean": 0.0007793087006575661, + "clip_ratio/low_mean": 0.0007593696791445836, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0015386783597932663, + "epoch": 10.783673469387756, + "grad_norm": 0.12284589558839798, + "learning_rate": 1e-06, + "loss": -0.0211, + "step": 1044 + }, + { + "clip_ratio/high_max": 0.0022242231389100198, + "clip_ratio/high_mean": 0.0010228058781649452, + "clip_ratio/low_mean": 0.0008191006645574817, + "clip_ratio/low_min": 2.860411950678099e-05, + "clip_ratio/region_mean": 0.001841906581830699, + "epoch": 10.793002915451895, + "grad_norm": 0.15630225837230682, + "learning_rate": 1e-06, + "loss": -0.0672, + "step": 1045 + }, + { + "clip_ratio/high_max": 0.002444708348775748, + "clip_ratio/high_mean": 0.0009511769749224186, + "clip_ratio/low_mean": 0.00080776714366948, + "clip_ratio/low_min": 1.2603347386175301e-05, + "clip_ratio/region_mean": 0.0017589441340533085, + "epoch": 10.802332361516035, + "grad_norm": 0.16826385259628296, + "learning_rate": 1e-06, + "loss": -0.004, + "step": 1046 + }, + { + "clip_ratio/high_max": 0.0023710838868282735, + "clip_ratio/high_mean": 0.0009455571434955345, + "clip_ratio/low_mean": 0.0007226421157611185, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0016681992783560418, + "epoch": 10.811661807580174, + "grad_norm": 0.16420753300189972, + "learning_rate": 1e-06, + "loss": -0.0201, + "step": 1047 + }, + { + "clip_ratio/high_max": 0.002061777740891557, + "clip_ratio/high_mean": 0.0008383155145565979, + "clip_ratio/low_mean": 0.0008746818220970454, + "clip_ratio/low_min": 5.1062092097708955e-05, + "clip_ratio/region_mean": 0.001712997331196675, + "epoch": 10.820991253644316, + "grad_norm": 0.1311642974615097, + "learning_rate": 1e-06, + "loss": 0.0009, + "step": 1048 + }, + { + "clip_ratio/high_max": 0.0023175532405730337, + "clip_ratio/high_mean": 0.0009104331784328679, + "clip_ratio/low_mean": 0.0007680237549720914, + "clip_ratio/low_min": 1.806358341127634e-05, + "clip_ratio/region_mean": 0.00167845691612456, + "epoch": 10.830320699708455, + "grad_norm": 0.14354373514652252, + "learning_rate": 1e-06, + "loss": -0.0456, + "step": 1049 + }, + { + "clip_ratio/high_max": 0.0026579302721074782, + "clip_ratio/high_mean": 0.0010823156262631528, + "clip_ratio/low_mean": 0.0008512995691489778, + "clip_ratio/low_min": 3.4530385164543986e-05, + "clip_ratio/region_mean": 0.0019336151381139643, + "epoch": 10.839650145772595, + "grad_norm": 0.1458643078804016, + "learning_rate": 1e-06, + "loss": -0.0411, + "step": 1050 + }, + { + "clip_ratio/high_max": 0.002385486623097677, + "clip_ratio/high_mean": 0.0009239543069270439, + "clip_ratio/low_mean": 0.0006246837492653867, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0015486380470974836, + "epoch": 10.848979591836734, + "grad_norm": 0.12350092083215714, + "learning_rate": 1e-06, + "loss": -0.0407, + "step": 1051 + }, + { + "clip_ratio/high_max": 0.002507519064238295, + "clip_ratio/high_mean": 0.001014399287669221, + "clip_ratio/low_mean": 0.0009685164277470903, + "clip_ratio/low_min": 2.2030313630239107e-05, + "clip_ratio/region_mean": 0.0019829157354251947, + "epoch": 10.858309037900874, + "grad_norm": 0.1365729421377182, + "learning_rate": 1e-06, + "loss": -0.0484, + "step": 1052 + }, + { + "clip_ratio/high_max": 0.0026707619763328694, + "clip_ratio/high_mean": 0.0010016462292696815, + "clip_ratio/low_mean": 0.0008648984112369362, + "clip_ratio/low_min": 2.8014343115501106e-05, + "clip_ratio/region_mean": 0.0018665446332306601, + "epoch": 10.867638483965015, + "grad_norm": 0.17057707905769348, + "learning_rate": 1e-06, + "loss": -0.0409, + "step": 1053 + }, + { + "clip_ratio/high_max": 0.002549129589169752, + "clip_ratio/high_mean": 0.0010482151537871687, + "clip_ratio/low_mean": 0.0009107267833314836, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0019589419753174298, + "epoch": 10.876967930029155, + "grad_norm": 0.32911109924316406, + "learning_rate": 1e-06, + "loss": -0.0245, + "step": 1054 + }, + { + "clip_ratio/high_max": 0.002313146091182716, + "clip_ratio/high_mean": 0.000970059651081101, + "clip_ratio/low_mean": 0.0009378099002788076, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0019078695331700146, + "epoch": 10.886297376093294, + "grad_norm": 0.14788420498371124, + "learning_rate": 1e-06, + "loss": -0.0046, + "step": 1055 + }, + { + "clip_ratio/high_max": 0.002571078708569985, + "clip_ratio/high_mean": 0.00102688998413214, + "clip_ratio/low_mean": 0.0008060240352278925, + "clip_ratio/low_min": 1.539029835839756e-05, + "clip_ratio/region_mean": 0.0018329140439163893, + "epoch": 10.895626822157434, + "grad_norm": 0.14282891154289246, + "learning_rate": 1e-06, + "loss": -0.0094, + "step": 1056 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0776715959821429, + "completions/max_length": 4096.0, + "completions/max_terminated_length": 4023.0, + "completions/mean_length": 813.9260864257812, + "completions/mean_terminated_length": 537.5343627929688, + "completions/min_length": 96.0, + "completions/min_terminated_length": 96.0, + "epoch": 11.00932944606414, + "grad_norm": 0.15120987594127655, + "learning_rate": 1e-06, + "loss": -0.0189, + "num_tokens": 612352642.0, + "reward": 0.6597726345062256, + "reward_std": 0.1406591534614563, + "rewards/simpleverify_reward/mean": 0.6597725749015808, + "rewards/simpleverify_reward/std": 0.47379374504089355, + "step": 1057 + }, + { + "clip_ratio/high_max": 0.0015920113619358744, + "clip_ratio/high_mean": 0.0006614641097257845, + "clip_ratio/low_mean": 0.0003684297939798853, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0010298939087078907, + "epoch": 11.018658892128279, + "grad_norm": 0.12487295269966125, + "learning_rate": 1e-06, + "loss": -0.042, + "step": 1058 + }, + { + "clip_ratio/high_max": 0.0018454749588272534, + "clip_ratio/high_mean": 0.0007487962429877371, + "clip_ratio/low_mean": 0.00045943752138555283, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0012082337161700707, + "epoch": 11.02798833819242, + "grad_norm": 0.1372573971748352, + "learning_rate": 1e-06, + "loss": -0.0206, + "step": 1059 + }, + { + "clip_ratio/high_max": 0.0018122699639206985, + "clip_ratio/high_mean": 0.0006949476301087998, + "clip_ratio/low_mean": 0.0004402407466841396, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0011351884058967698, + "epoch": 11.03731778425656, + "grad_norm": 0.12935875356197357, + "learning_rate": 1e-06, + "loss": -0.0111, + "step": 1060 + }, + { + "clip_ratio/high_max": 0.0018673782069527078, + "clip_ratio/high_mean": 0.0007298047512449557, + "clip_ratio/low_mean": 0.00034867336216848344, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0010784781370603014, + "epoch": 11.0466472303207, + "grad_norm": 0.1439608484506607, + "learning_rate": 1e-06, + "loss": -0.1065, + "step": 1061 + }, + { + "clip_ratio/high_max": 0.0016545736907573882, + "clip_ratio/high_mean": 0.0006580228446182446, + "clip_ratio/low_mean": 0.0004363252883194946, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0010943481265712762, + "epoch": 11.055976676384839, + "grad_norm": 0.14292700588703156, + "learning_rate": 1e-06, + "loss": -0.0589, + "step": 1062 + }, + { + "clip_ratio/high_max": 0.002028540722676553, + "clip_ratio/high_mean": 0.000700796017099492, + "clip_ratio/low_mean": 0.0006795988219892024, + "clip_ratio/low_min": 2.517019493097905e-05, + "clip_ratio/region_mean": 0.001380394820444053, + "epoch": 11.06530612244898, + "grad_norm": 1.630363941192627, + "learning_rate": 1e-06, + "loss": 0.0159, + "step": 1063 + }, + { + "clip_ratio/high_max": 0.002253053469758015, + "clip_ratio/high_mean": 0.0008082645563263213, + "clip_ratio/low_mean": 0.0005325971733327606, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0013408617378445342, + "epoch": 11.07463556851312, + "grad_norm": 0.15001767873764038, + "learning_rate": 1e-06, + "loss": -0.0407, + "step": 1064 + }, + { + "clip_ratio/high_max": 0.0017601002182345837, + "clip_ratio/high_mean": 0.0006535378470289288, + "clip_ratio/low_mean": 0.0005012225556129124, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0011547603953658836, + "epoch": 11.08396501457726, + "grad_norm": 0.12722033262252808, + "learning_rate": 1e-06, + "loss": -0.018, + "step": 1065 + }, + { + "clip_ratio/high_max": 0.0016888787613424938, + "clip_ratio/high_mean": 0.000663585405163758, + "clip_ratio/low_mean": 0.0005574824645009357, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.001221067886945093, + "epoch": 11.093294460641399, + "grad_norm": 0.1354394406080246, + "learning_rate": 1e-06, + "loss": -0.0403, + "step": 1066 + }, + { + "clip_ratio/high_max": 0.002266250623506494, + "clip_ratio/high_mean": 0.0008081148334895261, + "clip_ratio/low_mean": 0.0005596310902546975, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.001367745946481591, + "epoch": 11.102623906705539, + "grad_norm": 0.13672518730163574, + "learning_rate": 1e-06, + "loss": -0.0069, + "step": 1067 + }, + { + "clip_ratio/high_max": 0.0021165791622479446, + "clip_ratio/high_mean": 0.0008931939355534269, + "clip_ratio/low_mean": 0.0007323976897168905, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0016255916525551584, + "epoch": 11.11195335276968, + "grad_norm": 0.16483911871910095, + "learning_rate": 1e-06, + "loss": -0.0311, + "step": 1068 + }, + { + "clip_ratio/high_max": 0.001983118178031873, + "clip_ratio/high_mean": 0.0007438092798111029, + "clip_ratio/low_mean": 0.0008516333882653271, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0015954426853568293, + "epoch": 11.12128279883382, + "grad_norm": 0.1664646863937378, + "learning_rate": 1e-06, + "loss": -0.0162, + "step": 1069 + }, + { + "clip_ratio/high_max": 0.0021535297055379488, + "clip_ratio/high_mean": 0.0008280100446427241, + "clip_ratio/low_mean": 0.0005900636169826612, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.00141807364707347, + "epoch": 11.130612244897959, + "grad_norm": 0.195279061794281, + "learning_rate": 1e-06, + "loss": -0.0162, + "step": 1070 + }, + { + "clip_ratio/high_max": 0.0019865077701979317, + "clip_ratio/high_mean": 0.0008226514473790303, + "clip_ratio/low_mean": 0.0006189503556015552, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0014416018129850272, + "epoch": 11.139941690962099, + "grad_norm": 0.15905873477458954, + "learning_rate": 1e-06, + "loss": -0.0485, + "step": 1071 + }, + { + "clip_ratio/high_max": 0.00243154483177932, + "clip_ratio/high_mean": 0.0009928193921950879, + "clip_ratio/low_mean": 0.0006957122373023594, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0016885316363186575, + "epoch": 11.14927113702624, + "grad_norm": 0.13798582553863525, + "learning_rate": 1e-06, + "loss": -0.0599, + "step": 1072 + }, + { + "clip_ratio/high_max": 0.0019647090084617957, + "clip_ratio/high_mean": 0.000768944821174955, + "clip_ratio/low_mean": 0.0008300694935314823, + "clip_ratio/low_min": 3.128128082607873e-05, + "clip_ratio/region_mean": 0.0015990143037925009, + "epoch": 11.15860058309038, + "grad_norm": 0.16089555621147156, + "learning_rate": 1e-06, + "loss": -0.0308, + "step": 1073 + }, + { + "clip_ratio/high_max": 0.002555462058808189, + "clip_ratio/high_mean": 0.0009445642044738634, + "clip_ratio/low_mean": 0.0009199578753396054, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0018645220552571118, + "epoch": 11.167930029154519, + "grad_norm": 0.15208464860916138, + "learning_rate": 1e-06, + "loss": -0.0245, + "step": 1074 + }, + { + "clip_ratio/high_max": 0.002173075459722895, + "clip_ratio/high_mean": 0.0008532288757123752, + "clip_ratio/low_mean": 0.001072563537491078, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0019257924432167783, + "epoch": 11.177259475218658, + "grad_norm": 0.13196150958538055, + "learning_rate": 1e-06, + "loss": -0.0531, + "step": 1075 + }, + { + "clip_ratio/high_max": 0.0022783958047511987, + "clip_ratio/high_mean": 0.0008463830054097343, + "clip_ratio/low_mean": 0.0007740050641587004, + "clip_ratio/low_min": 5.8647666264732834e-05, + "clip_ratio/region_mean": 0.0016203880804823712, + "epoch": 11.186588921282798, + "grad_norm": 0.15812310576438904, + "learning_rate": 1e-06, + "loss": -0.0209, + "step": 1076 + }, + { + "clip_ratio/high_max": 0.0025219095041393302, + "clip_ratio/high_mean": 0.0008976641238405136, + "clip_ratio/low_mean": 0.0009242955366062233, + "clip_ratio/low_min": 7.02002398611512e-05, + "clip_ratio/region_mean": 0.0018219596859125886, + "epoch": 11.19591836734694, + "grad_norm": 0.14058281481266022, + "learning_rate": 1e-06, + "loss": -0.0031, + "step": 1077 + }, + { + "clip_ratio/high_max": 0.0021862751091248356, + "clip_ratio/high_mean": 0.0009059881931534619, + "clip_ratio/low_mean": 0.0009726934404170606, + "clip_ratio/low_min": 1.5151515071920585e-05, + "clip_ratio/region_mean": 0.0018786816399369854, + "epoch": 11.205247813411079, + "grad_norm": 0.13323284685611725, + "learning_rate": 1e-06, + "loss": -0.0022, + "step": 1078 + }, + { + "clip_ratio/high_max": 0.002862885819922667, + "clip_ratio/high_mean": 0.0011893186674569733, + "clip_ratio/low_mean": 0.0008400276219617808, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0020293462694098707, + "epoch": 11.214577259475218, + "grad_norm": 0.12973734736442566, + "learning_rate": 1e-06, + "loss": -0.0907, + "step": 1079 + }, + { + "clip_ratio/high_max": 0.002883542016206775, + "clip_ratio/high_mean": 0.0010056324135803152, + "clip_ratio/low_mean": 0.0008221942205182131, + "clip_ratio/low_min": 1.2067966963513754e-05, + "clip_ratio/region_mean": 0.0018278266215929762, + "epoch": 11.223906705539358, + "grad_norm": 0.1280481070280075, + "learning_rate": 1e-06, + "loss": -0.0327, + "step": 1080 + }, + { + "clip_ratio/high_max": 0.0023884848778834566, + "clip_ratio/high_mean": 0.0009425718526472338, + "clip_ratio/low_mean": 0.0008561756294511724, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0017987474566325545, + "epoch": 11.2332361516035, + "grad_norm": 0.16232584416866302, + "learning_rate": 1e-06, + "loss": -0.0307, + "step": 1081 + }, + { + "clip_ratio/high_max": 0.0026021743105957285, + "clip_ratio/high_mean": 0.001054947157172137, + "clip_ratio/low_mean": 0.0008475461308989907, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0019024933026230428, + "epoch": 11.242565597667639, + "grad_norm": 0.1246662512421608, + "learning_rate": 1e-06, + "loss": -0.0312, + "step": 1082 + }, + { + "clip_ratio/high_max": 0.0024336452333955094, + "clip_ratio/high_mean": 0.0009560207709000679, + "clip_ratio/low_mean": 0.0007528945643571205, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0017089152934204321, + "epoch": 11.251895043731778, + "grad_norm": 0.16376838088035583, + "learning_rate": 1e-06, + "loss": -0.046, + "step": 1083 + }, + { + "clip_ratio/high_max": 0.002522799048165325, + "clip_ratio/high_mean": 0.001010906449664617, + "clip_ratio/low_mean": 0.0008124366049742093, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0018233430819236673, + "epoch": 11.261224489795918, + "grad_norm": 0.13568460941314697, + "learning_rate": 1e-06, + "loss": -0.0469, + "step": 1084 + }, + { + "clip_ratio/high_max": 0.002387663222179981, + "clip_ratio/high_mean": 0.0009682664513093187, + "clip_ratio/low_mean": 0.0007788152188368258, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0017470816892455332, + "epoch": 11.270553935860057, + "grad_norm": 0.12385565042495728, + "learning_rate": 1e-06, + "loss": -0.0633, + "step": 1085 + }, + { + "clip_ratio/high_max": 0.0021982938997098245, + "clip_ratio/high_mean": 0.0009584870022081304, + "clip_ratio/low_mean": 0.0010174376839131583, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0019759247079491615, + "epoch": 11.279883381924199, + "grad_norm": 0.14474475383758545, + "learning_rate": 1e-06, + "loss": -0.0035, + "step": 1086 + }, + { + "clip_ratio/high_max": 0.002302657645486761, + "clip_ratio/high_mean": 0.0010387126931163948, + "clip_ratio/low_mean": 0.0007650050119991647, + "clip_ratio/low_min": 3.943217598134652e-05, + "clip_ratio/region_mean": 0.0018037176887446549, + "epoch": 11.289212827988338, + "grad_norm": 0.13189628720283508, + "learning_rate": 1e-06, + "loss": -0.0493, + "step": 1087 + }, + { + "clip_ratio/high_max": 0.0030264778033597395, + "clip_ratio/high_mean": 0.0011500599903229158, + "clip_ratio/low_mean": 0.0010938255545624997, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0022438855885411613, + "epoch": 11.298542274052478, + "grad_norm": 0.14285577833652496, + "learning_rate": 1e-06, + "loss": -0.0025, + "step": 1088 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0746023995535714, + "completions/max_length": 4096.0, + "completions/max_terminated_length": 4005.0, + "completions/mean_length": 801.2931518554688, + "completions/mean_terminated_length": 535.6849975585938, + "completions/min_length": 93.0, + "completions/min_terminated_length": 93.0, + "epoch": 11.307871720116617, + "grad_norm": 0.1754704862833023, + "learning_rate": 1e-06, + "loss": -0.0253, + "num_tokens": 629390973.0, + "reward": 0.6665388345718384, + "reward_std": 0.14645832777023315, + "rewards/simpleverify_reward/mean": 0.6665387749671936, + "rewards/simpleverify_reward/std": 0.47145789861679077, + "step": 1089 + }, + { + "clip_ratio/high_max": 0.0017820471184677444, + "clip_ratio/high_mean": 0.0006932171554581146, + "clip_ratio/low_mean": 0.00031046748927110457, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0010036846579168923, + "epoch": 11.317201166180759, + "grad_norm": 0.1414964497089386, + "learning_rate": 1e-06, + "loss": -0.0402, + "step": 1090 + }, + { + "clip_ratio/high_max": 0.001742104286677204, + "clip_ratio/high_mean": 0.0007483205354219535, + "clip_ratio/low_mean": 0.00037099295786902076, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0011193134923814796, + "epoch": 11.326530612244898, + "grad_norm": 0.14169956743717194, + "learning_rate": 1e-06, + "loss": -0.0568, + "step": 1091 + }, + { + "clip_ratio/high_max": 0.0015461074726772495, + "clip_ratio/high_mean": 0.0006735009446856566, + "clip_ratio/low_mean": 0.0004402636313898256, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0011137645651615458, + "epoch": 11.335860058309038, + "grad_norm": 0.11127534508705139, + "learning_rate": 1e-06, + "loss": -0.0299, + "step": 1092 + }, + { + "clip_ratio/high_max": 0.0017622153463889845, + "clip_ratio/high_mean": 0.000721482259905315, + "clip_ratio/low_mean": 0.00044460590356720786, + "clip_ratio/low_min": 1.5042118320707232e-05, + "clip_ratio/region_mean": 0.001166088150057476, + "epoch": 11.345189504373177, + "grad_norm": 0.14358475804328918, + "learning_rate": 1e-06, + "loss": -0.0506, + "step": 1093 + }, + { + "clip_ratio/high_max": 0.002066727141937008, + "clip_ratio/high_mean": 0.0008654190587549238, + "clip_ratio/low_mean": 0.000463999939711357, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0013294189920998178, + "epoch": 11.354518950437317, + "grad_norm": 0.1574108600616455, + "learning_rate": 1e-06, + "loss": -0.0665, + "step": 1094 + }, + { + "clip_ratio/high_max": 0.0022268158791121095, + "clip_ratio/high_mean": 0.0008985387466964312, + "clip_ratio/low_mean": 0.0005623355675652419, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0014608743585995398, + "epoch": 11.363848396501458, + "grad_norm": 0.14443717896938324, + "learning_rate": 1e-06, + "loss": -0.079, + "step": 1095 + }, + { + "clip_ratio/high_max": 0.0020587054823408835, + "clip_ratio/high_mean": 0.0007668820617254823, + "clip_ratio/low_mean": 0.0005758043498644838, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.001342686424322892, + "epoch": 11.373177842565598, + "grad_norm": 0.1323336809873581, + "learning_rate": 1e-06, + "loss": -0.0268, + "step": 1096 + }, + { + "clip_ratio/high_max": 0.0019139542564516887, + "clip_ratio/high_mean": 0.0007136751073630876, + "clip_ratio/low_mean": 0.0005419626295406488, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.001255637744179694, + "epoch": 11.382507288629737, + "grad_norm": 0.16975180804729462, + "learning_rate": 1e-06, + "loss": -0.0369, + "step": 1097 + }, + { + "clip_ratio/high_max": 0.0016596432360529434, + "clip_ratio/high_mean": 0.0006236433364392724, + "clip_ratio/low_mean": 0.0006460134773078607, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0012696568155661225, + "epoch": 11.391836734693877, + "grad_norm": 0.14871634542942047, + "learning_rate": 1e-06, + "loss": 0.0164, + "step": 1098 + }, + { + "clip_ratio/high_max": 0.0024096415436360985, + "clip_ratio/high_mean": 0.0008074062152445549, + "clip_ratio/low_mean": 0.0005820828559990332, + "clip_ratio/low_min": 1.2254901776032057e-05, + "clip_ratio/region_mean": 0.0013894890835217666, + "epoch": 11.401166180758018, + "grad_norm": 0.1535893678665161, + "learning_rate": 1e-06, + "loss": -0.0605, + "step": 1099 + }, + { + "clip_ratio/high_max": 0.0019717682516784407, + "clip_ratio/high_mean": 0.0007068977811286459, + "clip_ratio/low_mean": 0.0006789030367144733, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.001385800806019688, + "epoch": 11.410495626822158, + "grad_norm": 0.15019330382347107, + "learning_rate": 1e-06, + "loss": -0.0229, + "step": 1100 + }, + { + "clip_ratio/high_max": 0.002162405078706797, + "clip_ratio/high_mean": 0.0009733231090649497, + "clip_ratio/low_mean": 0.0006966889504838036, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0016700120613677427, + "epoch": 11.419825072886297, + "grad_norm": 0.14072251319885254, + "learning_rate": 1e-06, + "loss": -0.0583, + "step": 1101 + }, + { + "clip_ratio/high_max": 0.0024383579584537074, + "clip_ratio/high_mean": 0.0009564513356963289, + "clip_ratio/low_mean": 0.0007808191749063553, + "clip_ratio/low_min": 2.9531922336900607e-05, + "clip_ratio/region_mean": 0.0017372705551679246, + "epoch": 11.429154518950437, + "grad_norm": 0.15458250045776367, + "learning_rate": 1e-06, + "loss": -0.0182, + "step": 1102 + }, + { + "clip_ratio/high_max": 0.0020752936543431133, + "clip_ratio/high_mean": 0.0009176840958389221, + "clip_ratio/low_mean": 0.0007021674146017176, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0016198515222640708, + "epoch": 11.438483965014576, + "grad_norm": 0.13857978582382202, + "learning_rate": 1e-06, + "loss": -0.0262, + "step": 1103 + }, + { + "clip_ratio/high_max": 0.0022316328904707916, + "clip_ratio/high_mean": 0.0009267121322409366, + "clip_ratio/low_mean": 0.0006105196152930148, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.001537231779366266, + "epoch": 11.447813411078718, + "grad_norm": 0.12467081099748611, + "learning_rate": 1e-06, + "loss": -0.0285, + "step": 1104 + }, + { + "clip_ratio/high_max": 0.00216190998617094, + "clip_ratio/high_mean": 0.0008915891085052863, + "clip_ratio/low_mean": 0.0007676189543417422, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0016592080792179331, + "epoch": 11.457142857142857, + "grad_norm": 0.14669372141361237, + "learning_rate": 1e-06, + "loss": -0.0145, + "step": 1105 + }, + { + "clip_ratio/high_max": 0.0025387482783116866, + "clip_ratio/high_mean": 0.0009284275101890671, + "clip_ratio/low_mean": 0.0005932793610554654, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0015217068685160484, + "epoch": 11.466472303206997, + "grad_norm": 0.124748095870018, + "learning_rate": 1e-06, + "loss": -0.0241, + "step": 1106 + }, + { + "clip_ratio/high_max": 0.002182352251111297, + "clip_ratio/high_mean": 0.0008965330180217279, + "clip_ratio/low_mean": 0.0006304568923951592, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0015269898794940673, + "epoch": 11.475801749271136, + "grad_norm": 0.14877058565616608, + "learning_rate": 1e-06, + "loss": -0.0382, + "step": 1107 + }, + { + "clip_ratio/high_max": 0.0022348828715621494, + "clip_ratio/high_mean": 0.0009596692616469227, + "clip_ratio/low_mean": 0.0006714849441777915, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0016311542167386506, + "epoch": 11.485131195335278, + "grad_norm": 0.22986245155334473, + "learning_rate": 1e-06, + "loss": -0.0471, + "step": 1108 + }, + { + "clip_ratio/high_max": 0.0021594388381345198, + "clip_ratio/high_mean": 0.0008019785054784734, + "clip_ratio/low_mean": 0.0007085141814968665, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0015104926933418028, + "epoch": 11.494460641399417, + "grad_norm": 0.15406882762908936, + "learning_rate": 1e-06, + "loss": -0.0038, + "step": 1109 + }, + { + "clip_ratio/high_max": 0.002414992093690671, + "clip_ratio/high_mean": 0.0009492818662693026, + "clip_ratio/low_mean": 0.0006458067891799146, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0015950886445352808, + "epoch": 11.503790087463557, + "grad_norm": 0.16856832802295685, + "learning_rate": 1e-06, + "loss": -0.0496, + "step": 1110 + }, + { + "clip_ratio/high_max": 0.002206098590249894, + "clip_ratio/high_mean": 0.0007776361526339315, + "clip_ratio/low_mean": 0.0007788886541675311, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0015565247886115685, + "epoch": 11.513119533527696, + "grad_norm": 0.1853359490633011, + "learning_rate": 1e-06, + "loss": 0.0125, + "step": 1111 + }, + { + "clip_ratio/high_max": 0.002284768743265886, + "clip_ratio/high_mean": 0.0009445244395465124, + "clip_ratio/low_mean": 0.0008526741839887109, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0017971985798794776, + "epoch": 11.522448979591836, + "grad_norm": 0.16563715040683746, + "learning_rate": 1e-06, + "loss": -0.0036, + "step": 1112 + }, + { + "clip_ratio/high_max": 0.0026219166393275373, + "clip_ratio/high_mean": 0.0010779583171824925, + "clip_ratio/low_mean": 0.0007722936206846498, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0018502519378671423, + "epoch": 11.531778425655977, + "grad_norm": 0.15771019458770752, + "learning_rate": 1e-06, + "loss": -0.0478, + "step": 1113 + }, + { + "clip_ratio/high_max": 0.00226580751768779, + "clip_ratio/high_mean": 0.0009537130390526727, + "clip_ratio/low_mean": 0.0008044518890528707, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0017581649153726175, + "epoch": 11.541107871720117, + "grad_norm": 0.14264440536499023, + "learning_rate": 1e-06, + "loss": -0.0349, + "step": 1114 + }, + { + "clip_ratio/high_max": 0.0023334657889790833, + "clip_ratio/high_mean": 0.0008980763013823889, + "clip_ratio/low_mean": 0.0006712785125273513, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0015693548193667084, + "epoch": 11.550437317784256, + "grad_norm": 0.15568388998508453, + "learning_rate": 1e-06, + "loss": -0.0144, + "step": 1115 + }, + { + "clip_ratio/high_max": 0.0024960231021395884, + "clip_ratio/high_mean": 0.001090276135073509, + "clip_ratio/low_mean": 0.0008367642330995295, + "clip_ratio/low_min": 2.8674108762061223e-05, + "clip_ratio/region_mean": 0.0019270403863629326, + "epoch": 11.559766763848396, + "grad_norm": 0.2161525934934616, + "learning_rate": 1e-06, + "loss": -0.0224, + "step": 1116 + }, + { + "clip_ratio/high_max": 0.002084986503177788, + "clip_ratio/high_mean": 0.0008541378192603588, + "clip_ratio/low_mean": 0.0006564938057636027, + "clip_ratio/low_min": 1.7346656022709794e-05, + "clip_ratio/region_mean": 0.001510631660494255, + "epoch": 11.569096209912537, + "grad_norm": 0.14218589663505554, + "learning_rate": 1e-06, + "loss": -0.0287, + "step": 1117 + }, + { + "clip_ratio/high_max": 0.002428565036098007, + "clip_ratio/high_mean": 0.0008952251610025996, + "clip_ratio/low_mean": 0.0007816510260454379, + "clip_ratio/low_min": 8.259017522505019e-05, + "clip_ratio/region_mean": 0.0016768761743151117, + "epoch": 11.578425655976677, + "grad_norm": 0.1381969451904297, + "learning_rate": 1e-06, + "loss": -0.0185, + "step": 1118 + }, + { + "clip_ratio/high_max": 0.0025781829353945795, + "clip_ratio/high_mean": 0.001114851766033098, + "clip_ratio/low_mean": 0.0008869078947100206, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0020017596398247406, + "epoch": 11.587755102040816, + "grad_norm": 0.14114214479923248, + "learning_rate": 1e-06, + "loss": -0.0125, + "step": 1119 + }, + { + "clip_ratio/high_max": 0.00310697757959133, + "clip_ratio/high_mean": 0.0012362512752588373, + "clip_ratio/low_mean": 0.0008617528910690453, + "clip_ratio/low_min": 4.026196984341368e-05, + "clip_ratio/region_mean": 0.002098004195431713, + "epoch": 11.597084548104956, + "grad_norm": 0.14526280760765076, + "learning_rate": 1e-06, + "loss": -0.0502, + "step": 1120 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.082763671875, + "completions/max_length": 4096.0, + "completions/max_terminated_length": 4056.0, + "completions/mean_length": 828.3167114257812, + "completions/mean_terminated_length": 533.468505859375, + "completions/min_length": 5.0, + "completions/min_terminated_length": 5.0, + "epoch": 11.606413994169095, + "grad_norm": 0.1565808355808258, + "learning_rate": 1e-06, + "loss": -0.0216, + "num_tokens": 646304013.0, + "reward": 0.6595633625984192, + "reward_std": 0.1461329460144043, + "rewards/simpleverify_reward/mean": 0.6595633625984192, + "rewards/simpleverify_reward/std": 0.47386428713798523, + "step": 1121 + }, + { + "clip_ratio/high_max": 0.0017801259782572743, + "clip_ratio/high_mean": 0.0006902775126036431, + "clip_ratio/low_mean": 0.0003831740370969783, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0010734515453805216, + "epoch": 11.615743440233237, + "grad_norm": 0.16050131618976593, + "learning_rate": 1e-06, + "loss": -0.0236, + "step": 1122 + }, + { + "clip_ratio/high_max": 0.002041045550868148, + "clip_ratio/high_mean": 0.0007911560469437973, + "clip_ratio/low_mean": 0.00032368482652600505, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0011148408520966768, + "epoch": 11.625072886297376, + "grad_norm": 0.13591721653938293, + "learning_rate": 1e-06, + "loss": -0.049, + "step": 1123 + }, + { + "clip_ratio/high_max": 0.0019790680162259378, + "clip_ratio/high_mean": 0.0006888173520565033, + "clip_ratio/low_mean": 0.00039165548423625296, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0010804728335642722, + "epoch": 11.634402332361516, + "grad_norm": 0.1681247055530548, + "learning_rate": 1e-06, + "loss": 0.0051, + "step": 1124 + }, + { + "clip_ratio/high_max": 0.0018455690842529293, + "clip_ratio/high_mean": 0.0006823032963438891, + "clip_ratio/low_mean": 0.0005648048909279169, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0012471081790863536, + "epoch": 11.643731778425655, + "grad_norm": 0.13590890169143677, + "learning_rate": 1e-06, + "loss": -0.0385, + "step": 1125 + }, + { + "clip_ratio/high_max": 0.0015292885655071586, + "clip_ratio/high_mean": 0.0006225187535164878, + "clip_ratio/low_mean": 0.00045897044947196264, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0010814892229973339, + "epoch": 11.653061224489797, + "grad_norm": 0.22801119089126587, + "learning_rate": 1e-06, + "loss": -0.017, + "step": 1126 + }, + { + "clip_ratio/high_max": 0.0018514734074415173, + "clip_ratio/high_mean": 0.0007027422670944361, + "clip_ratio/low_mean": 0.0005222668733040337, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0012250091349415015, + "epoch": 11.662390670553936, + "grad_norm": 0.19190406799316406, + "learning_rate": 1e-06, + "loss": -0.0325, + "step": 1127 + }, + { + "clip_ratio/high_max": 0.002189035360061098, + "clip_ratio/high_mean": 0.0007538662812294206, + "clip_ratio/low_mean": 0.0005416949161372031, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0012955612219229806, + "epoch": 11.671720116618076, + "grad_norm": 0.1414567530155182, + "learning_rate": 1e-06, + "loss": -0.0404, + "step": 1128 + }, + { + "clip_ratio/high_max": 0.0018500980258977506, + "clip_ratio/high_mean": 0.0008095502416836098, + "clip_ratio/low_mean": 0.0006172982866701204, + "clip_ratio/low_min": 1.4266149264585692e-05, + "clip_ratio/region_mean": 0.0014268485283537302, + "epoch": 11.681049562682215, + "grad_norm": 0.15000969171524048, + "learning_rate": 1e-06, + "loss": -0.022, + "step": 1129 + }, + { + "clip_ratio/high_max": 0.001987406205444131, + "clip_ratio/high_mean": 0.0008222470642067492, + "clip_ratio/low_mean": 0.000526983537383785, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0013492306097759865, + "epoch": 11.690379008746355, + "grad_norm": 0.13672062754631042, + "learning_rate": 1e-06, + "loss": -0.0539, + "step": 1130 + }, + { + "clip_ratio/high_max": 0.0023744198042550124, + "clip_ratio/high_mean": 0.000983795503998408, + "clip_ratio/low_mean": 0.0005859847988176625, + "clip_ratio/low_min": 7.875242954469286e-05, + "clip_ratio/region_mean": 0.0015697802664362825, + "epoch": 11.699708454810496, + "grad_norm": 0.14747218787670135, + "learning_rate": 1e-06, + "loss": -0.0589, + "step": 1131 + }, + { + "clip_ratio/high_max": 0.0022438299274654128, + "clip_ratio/high_mean": 0.0008859738409228157, + "clip_ratio/low_mean": 0.000632581080935779, + "clip_ratio/low_min": 1.681011235632468e-05, + "clip_ratio/region_mean": 0.001518554927315563, + "epoch": 11.709037900874636, + "grad_norm": 0.16229085624217987, + "learning_rate": 1e-06, + "loss": -0.017, + "step": 1132 + }, + { + "clip_ratio/high_max": 0.0023319523097597994, + "clip_ratio/high_mean": 0.0008855273881636094, + "clip_ratio/low_mean": 0.0007731926016276702, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.001658720000705216, + "epoch": 11.718367346938775, + "grad_norm": 0.15086589753627777, + "learning_rate": 1e-06, + "loss": 0.0011, + "step": 1133 + }, + { + "clip_ratio/high_max": 0.0021720065706176683, + "clip_ratio/high_mean": 0.0008946373563958332, + "clip_ratio/low_mean": 0.0007507508071284974, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0016453881762572564, + "epoch": 11.727696793002915, + "grad_norm": 0.14836829900741577, + "learning_rate": 1e-06, + "loss": -0.0335, + "step": 1134 + }, + { + "clip_ratio/high_max": 0.002065588501864113, + "clip_ratio/high_mean": 0.0008268410601885989, + "clip_ratio/low_mean": 0.0007444371713063447, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.001571278175106272, + "epoch": 11.737026239067056, + "grad_norm": 0.14221884310245514, + "learning_rate": 1e-06, + "loss": -0.0492, + "step": 1135 + }, + { + "clip_ratio/high_max": 0.0023058720107655972, + "clip_ratio/high_mean": 0.0008397777110076277, + "clip_ratio/low_mean": 0.000686158112330304, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0015259358442563098, + "epoch": 11.746355685131196, + "grad_norm": 0.153856560587883, + "learning_rate": 1e-06, + "loss": -0.0087, + "step": 1136 + }, + { + "clip_ratio/high_max": 0.002365320349781541, + "clip_ratio/high_mean": 0.0009605860432202462, + "clip_ratio/low_mean": 0.0005241689832473639, + "clip_ratio/low_min": 1.842027631937526e-05, + "clip_ratio/region_mean": 0.001484755066485377, + "epoch": 11.755685131195335, + "grad_norm": 0.15080800652503967, + "learning_rate": 1e-06, + "loss": -0.0715, + "step": 1137 + }, + { + "clip_ratio/high_max": 0.0025613146572140977, + "clip_ratio/high_mean": 0.0009624600697861752, + "clip_ratio/low_mean": 0.0006789249946450582, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0016413850753451698, + "epoch": 11.765014577259475, + "grad_norm": 0.1395125538110733, + "learning_rate": 1e-06, + "loss": -0.0699, + "step": 1138 + }, + { + "clip_ratio/high_max": 0.0024246389730251394, + "clip_ratio/high_mean": 0.0008917832983570406, + "clip_ratio/low_mean": 0.0009530246443318902, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0018448079281370156, + "epoch": 11.774344023323614, + "grad_norm": 0.17317137122154236, + "learning_rate": 1e-06, + "loss": -0.0064, + "step": 1139 + }, + { + "clip_ratio/high_max": 0.002195367793319747, + "clip_ratio/high_mean": 0.0009457959349674638, + "clip_ratio/low_mean": 0.0007271658605532139, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0016729618364479393, + "epoch": 11.783673469387756, + "grad_norm": 0.1614680141210556, + "learning_rate": 1e-06, + "loss": -0.0669, + "step": 1140 + }, + { + "clip_ratio/high_max": 0.0023895534322946332, + "clip_ratio/high_mean": 0.0009715906599012669, + "clip_ratio/low_mean": 0.0007251465067383833, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0016967371921055019, + "epoch": 11.793002915451895, + "grad_norm": 0.13667620718479156, + "learning_rate": 1e-06, + "loss": -0.0345, + "step": 1141 + }, + { + "clip_ratio/high_max": 0.002259471220895648, + "clip_ratio/high_mean": 0.0009282710616389522, + "clip_ratio/low_mean": 0.0007498116001443123, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.001678082684520632, + "epoch": 11.802332361516035, + "grad_norm": 0.264766126871109, + "learning_rate": 1e-06, + "loss": -0.0423, + "step": 1142 + }, + { + "clip_ratio/high_max": 0.0025943254731828347, + "clip_ratio/high_mean": 0.0009999378216889454, + "clip_ratio/low_mean": 0.0006869605222163955, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.001686898362095235, + "epoch": 11.811661807580174, + "grad_norm": 0.15261907875537872, + "learning_rate": 1e-06, + "loss": -0.0198, + "step": 1143 + }, + { + "clip_ratio/high_max": 0.0025551571816322394, + "clip_ratio/high_mean": 0.001012136519420892, + "clip_ratio/low_mean": 0.0007965297954797279, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.001808666311262641, + "epoch": 11.820991253644316, + "grad_norm": 0.14053599536418915, + "learning_rate": 1e-06, + "loss": -0.0309, + "step": 1144 + }, + { + "clip_ratio/high_max": 0.002615540954138851, + "clip_ratio/high_mean": 0.0012030341440549819, + "clip_ratio/low_mean": 0.0006149866539999493, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0018180207880504895, + "epoch": 11.830320699708455, + "grad_norm": 0.12848827242851257, + "learning_rate": 1e-06, + "loss": -0.0709, + "step": 1145 + }, + { + "clip_ratio/high_max": 0.0027119321821373887, + "clip_ratio/high_mean": 0.0010350280590500915, + "clip_ratio/low_mean": 0.0008117324632621603, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0018467605113983154, + "epoch": 11.839650145772595, + "grad_norm": 0.1603071093559265, + "learning_rate": 1e-06, + "loss": -0.0536, + "step": 1146 + }, + { + "clip_ratio/high_max": 0.002456844762491528, + "clip_ratio/high_mean": 0.001064599880919559, + "clip_ratio/low_mean": 0.0007531777409894858, + "clip_ratio/low_min": 3.7324574805097654e-05, + "clip_ratio/region_mean": 0.0018177776291850023, + "epoch": 11.848979591836734, + "grad_norm": 0.16494296491146088, + "learning_rate": 1e-06, + "loss": -0.0546, + "step": 1147 + }, + { + "clip_ratio/high_max": 0.002657020988408476, + "clip_ratio/high_mean": 0.0008777734765317291, + "clip_ratio/low_mean": 0.0008918813109630719, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.001769654823874589, + "epoch": 11.858309037900874, + "grad_norm": 0.14670419692993164, + "learning_rate": 1e-06, + "loss": -0.0113, + "step": 1148 + }, + { + "clip_ratio/high_max": 0.002420163080387283, + "clip_ratio/high_mean": 0.0010264310185448267, + "clip_ratio/low_mean": 0.0007823877294867998, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0018088187789544463, + "epoch": 11.867638483965015, + "grad_norm": 0.18839552998542786, + "learning_rate": 1e-06, + "loss": -0.0494, + "step": 1149 + }, + { + "clip_ratio/high_max": 0.0022179387451615185, + "clip_ratio/high_mean": 0.0009804271467146464, + "clip_ratio/low_mean": 0.0006909294452270842, + "clip_ratio/low_min": 4.6728971938136965e-05, + "clip_ratio/region_mean": 0.0016713566074031405, + "epoch": 11.876967930029155, + "grad_norm": 0.15449899435043335, + "learning_rate": 1e-06, + "loss": -0.0545, + "step": 1150 + }, + { + "clip_ratio/high_max": 0.002441216573060956, + "clip_ratio/high_mean": 0.0009098972404899541, + "clip_ratio/low_mean": 0.0007784011968396953, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0016882984309631865, + "epoch": 11.886297376093294, + "grad_norm": 0.15630044043064117, + "learning_rate": 1e-06, + "loss": -0.0353, + "step": 1151 + }, + { + "clip_ratio/high_max": 0.00278202784829773, + "clip_ratio/high_mean": 0.0010964530520141125, + "clip_ratio/low_mean": 0.0007686192102482892, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0018650722195161507, + "epoch": 11.895626822157434, + "grad_norm": 0.13340958952903748, + "learning_rate": 1e-06, + "loss": -0.0368, + "step": 1152 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0818568638392857, + "completions/max_length": 4096.0, + "completions/max_terminated_length": 4024.0, + "completions/mean_length": 820.6602172851562, + "completions/mean_terminated_length": 528.64794921875, + "completions/min_length": 104.0, + "completions/min_terminated_length": 104.0, + "epoch": 12.00932944606414, + "grad_norm": 0.17377983033657074, + "learning_rate": 1e-06, + "loss": -0.0428, + "num_tokens": 663036318.0, + "reward": 0.6762346625328064, + "reward_std": 0.1436760425567627, + "rewards/simpleverify_reward/mean": 0.6762346625328064, + "rewards/simpleverify_reward/std": 0.46791985630989075, + "step": 1153 + }, + { + "clip_ratio/high_max": 0.001661277285165852, + "clip_ratio/high_mean": 0.0006638219110755017, + "clip_ratio/low_mean": 0.0003342748977956944, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0009980968297895743, + "epoch": 12.018658892128279, + "grad_norm": 0.16826342046260834, + "learning_rate": 1e-06, + "loss": -0.0394, + "step": 1154 + }, + { + "clip_ratio/high_max": 0.0015568419294140767, + "clip_ratio/high_mean": 0.0006034252019162523, + "clip_ratio/low_mean": 0.00037461021292983787, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.000978035403022659, + "epoch": 12.02798833819242, + "grad_norm": 0.16304998099803925, + "learning_rate": 1e-06, + "loss": -0.0207, + "step": 1155 + }, + { + "clip_ratio/high_max": 0.0016580984483880457, + "clip_ratio/high_mean": 0.0005983781202303362, + "clip_ratio/low_mean": 0.000489492094857269, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0010878702087211423, + "epoch": 12.03731778425656, + "grad_norm": 0.1583404690027237, + "learning_rate": 1e-06, + "loss": -0.0031, + "step": 1156 + }, + { + "clip_ratio/high_max": 0.002228031638878747, + "clip_ratio/high_mean": 0.0007559251525890431, + "clip_ratio/low_mean": 0.000435610113072471, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0011915352697542403, + "epoch": 12.0466472303207, + "grad_norm": 0.14719834923744202, + "learning_rate": 1e-06, + "loss": -0.0341, + "step": 1157 + }, + { + "clip_ratio/high_max": 0.0021915329198236577, + "clip_ratio/high_mean": 0.0008532390220352681, + "clip_ratio/low_mean": 0.00038506327337017865, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0012383022949506994, + "epoch": 12.055976676384839, + "grad_norm": 0.14217408001422882, + "learning_rate": 1e-06, + "loss": -0.0671, + "step": 1158 + }, + { + "clip_ratio/high_max": 0.0021985833955113776, + "clip_ratio/high_mean": 0.0007887626852607355, + "clip_ratio/low_mean": 0.0004897638227703283, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0012785264952981379, + "epoch": 12.06530612244898, + "grad_norm": 0.15787319839000702, + "learning_rate": 1e-06, + "loss": -0.0307, + "step": 1159 + }, + { + "clip_ratio/high_max": 0.0020062327530467883, + "clip_ratio/high_mean": 0.000756065099267289, + "clip_ratio/low_mean": 0.0005255451924313093, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0012816103153454605, + "epoch": 12.07463556851312, + "grad_norm": 0.12930822372436523, + "learning_rate": 1e-06, + "loss": -0.0063, + "step": 1160 + }, + { + "clip_ratio/high_max": 0.0022514497832162306, + "clip_ratio/high_mean": 0.0009500038177066017, + "clip_ratio/low_mean": 0.0006511448782475782, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0016011486950446852, + "epoch": 12.08396501457726, + "grad_norm": 0.16800308227539062, + "learning_rate": 1e-06, + "loss": -0.0156, + "step": 1161 + }, + { + "clip_ratio/high_max": 0.002100137160596205, + "clip_ratio/high_mean": 0.0007769330804876518, + "clip_ratio/low_mean": 0.0006053432002772752, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0013822762921336107, + "epoch": 12.093294460641399, + "grad_norm": 0.1570649892091751, + "learning_rate": 1e-06, + "loss": -0.0555, + "step": 1162 + }, + { + "clip_ratio/high_max": 0.0023757684321026318, + "clip_ratio/high_mean": 0.0009196359078487149, + "clip_ratio/low_mean": 0.0005699716239178088, + "clip_ratio/low_min": 1.540927041787654e-05, + "clip_ratio/region_mean": 0.001489607551775407, + "epoch": 12.102623906705539, + "grad_norm": 0.17271654307842255, + "learning_rate": 1e-06, + "loss": -0.0836, + "step": 1163 + }, + { + "clip_ratio/high_max": 0.002208869773312472, + "clip_ratio/high_mean": 0.0009188851108774543, + "clip_ratio/low_mean": 0.000640220639979816, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.001559105785418069, + "epoch": 12.11195335276968, + "grad_norm": 0.1625974178314209, + "learning_rate": 1e-06, + "loss": -0.0492, + "step": 1164 + }, + { + "clip_ratio/high_max": 0.0024372977586608613, + "clip_ratio/high_mean": 0.0008482209918838635, + "clip_ratio/low_mean": 0.000770468699556659, + "clip_ratio/low_min": 5.015483475290239e-05, + "clip_ratio/region_mean": 0.0016186896900762804, + "epoch": 12.12128279883382, + "grad_norm": 0.17503564059734344, + "learning_rate": 1e-06, + "loss": -0.0359, + "step": 1165 + }, + { + "clip_ratio/high_max": 0.0022809507172496524, + "clip_ratio/high_mean": 0.0008475133727188222, + "clip_ratio/low_mean": 0.0006917627615621313, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0015392761379189324, + "epoch": 12.130612244897959, + "grad_norm": 0.5186414122581482, + "learning_rate": 1e-06, + "loss": -0.0343, + "step": 1166 + }, + { + "clip_ratio/high_max": 0.0028250148170627654, + "clip_ratio/high_mean": 0.0010407908230263274, + "clip_ratio/low_mean": 0.0008246911302194349, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.001865481972345151, + "epoch": 12.139941690962099, + "grad_norm": 0.15463808178901672, + "learning_rate": 1e-06, + "loss": -0.0562, + "step": 1167 + }, + { + "clip_ratio/high_max": 0.002911760821007192, + "clip_ratio/high_mean": 0.001097201085940469, + "clip_ratio/low_mean": 0.0005832727783854352, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0016804738334030844, + "epoch": 12.14927113702624, + "grad_norm": 0.1505255252122879, + "learning_rate": 1e-06, + "loss": -0.0431, + "step": 1168 + }, + { + "clip_ratio/high_max": 0.0018844895166694187, + "clip_ratio/high_mean": 0.0007062927579681855, + "clip_ratio/low_mean": 0.0006535016027555685, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0013597943834611215, + "epoch": 12.15860058309038, + "grad_norm": 0.14496773481369019, + "learning_rate": 1e-06, + "loss": -0.0017, + "step": 1169 + }, + { + "clip_ratio/high_max": 0.002417742849502247, + "clip_ratio/high_mean": 0.0009744290146045387, + "clip_ratio/low_mean": 0.0007717087100900244, + "clip_ratio/low_min": 9.492709978076164e-06, + "clip_ratio/region_mean": 0.0017461377283325419, + "epoch": 12.167930029154519, + "grad_norm": 0.15995532274246216, + "learning_rate": 1e-06, + "loss": -0.0366, + "step": 1170 + }, + { + "clip_ratio/high_max": 0.0022656575602013618, + "clip_ratio/high_mean": 0.0009435345400561346, + "clip_ratio/low_mean": 0.0006132847665867303, + "clip_ratio/low_min": 2.340385617571883e-05, + "clip_ratio/region_mean": 0.0015568192975479178, + "epoch": 12.177259475218658, + "grad_norm": 0.13755114376544952, + "learning_rate": 1e-06, + "loss": -0.0658, + "step": 1171 + }, + { + "clip_ratio/high_max": 0.0025760534699657, + "clip_ratio/high_mean": 0.001054451164236525, + "clip_ratio/low_mean": 0.0006196158155944431, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0016740669816499576, + "epoch": 12.186588921282798, + "grad_norm": 0.1431213766336441, + "learning_rate": 1e-06, + "loss": -0.0631, + "step": 1172 + }, + { + "clip_ratio/high_max": 0.001973423048184486, + "clip_ratio/high_mean": 0.000847454575705342, + "clip_ratio/low_mean": 0.0008472516219626414, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0016947062213148456, + "epoch": 12.19591836734694, + "grad_norm": 0.16347478330135345, + "learning_rate": 1e-06, + "loss": -0.0129, + "step": 1173 + }, + { + "clip_ratio/high_max": 0.002401221830950817, + "clip_ratio/high_mean": 0.0009602002337487647, + "clip_ratio/low_mean": 0.0008040178308874601, + "clip_ratio/low_min": 6.460615986725315e-05, + "clip_ratio/region_mean": 0.001764218024618458, + "epoch": 12.205247813411079, + "grad_norm": 0.15968972444534302, + "learning_rate": 1e-06, + "loss": -0.0076, + "step": 1174 + }, + { + "clip_ratio/high_max": 0.0023099683894542977, + "clip_ratio/high_mean": 0.001005184072710108, + "clip_ratio/low_mean": 0.000844478718136088, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.001849662781751249, + "epoch": 12.214577259475218, + "grad_norm": 0.17376315593719482, + "learning_rate": 1e-06, + "loss": 0.0113, + "step": 1175 + }, + { + "clip_ratio/high_max": 0.0026273516996297985, + "clip_ratio/high_mean": 0.0010249346269119997, + "clip_ratio/low_mean": 0.0006908012546773534, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0017157358888653107, + "epoch": 12.223906705539358, + "grad_norm": 0.1493109166622162, + "learning_rate": 1e-06, + "loss": -0.0525, + "step": 1176 + }, + { + "clip_ratio/high_max": 0.002960449979582336, + "clip_ratio/high_mean": 0.0012713336218439508, + "clip_ratio/low_mean": 0.0007620664873684291, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0020334001383162104, + "epoch": 12.2332361516035, + "grad_norm": 0.1553051620721817, + "learning_rate": 1e-06, + "loss": -0.0428, + "step": 1177 + }, + { + "clip_ratio/high_max": 0.0022085066048020963, + "clip_ratio/high_mean": 0.0009750107274157926, + "clip_ratio/low_mean": 0.0006929903797754378, + "clip_ratio/low_min": 1.6988311472232454e-05, + "clip_ratio/region_mean": 0.0016680011394782923, + "epoch": 12.242565597667639, + "grad_norm": 0.46785208582878113, + "learning_rate": 1e-06, + "loss": -0.0545, + "step": 1178 + }, + { + "clip_ratio/high_max": 0.0026444776012795046, + "clip_ratio/high_mean": 0.001111933815991506, + "clip_ratio/low_mean": 0.0006248688005143777, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.001736802645609714, + "epoch": 12.251895043731778, + "grad_norm": 0.17787323892116547, + "learning_rate": 1e-06, + "loss": -0.0546, + "step": 1179 + }, + { + "clip_ratio/high_max": 0.00251927028875798, + "clip_ratio/high_mean": 0.0011117884787381627, + "clip_ratio/low_mean": 0.0006955171284062089, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.001807305590773467, + "epoch": 12.261224489795918, + "grad_norm": 0.13165973126888275, + "learning_rate": 1e-06, + "loss": -0.0375, + "step": 1180 + }, + { + "clip_ratio/high_max": 0.0024573969349148683, + "clip_ratio/high_mean": 0.0010340923709009076, + "clip_ratio/low_mean": 0.000628108978162345, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0016622013572487049, + "epoch": 12.270553935860057, + "grad_norm": 0.12693646550178528, + "learning_rate": 1e-06, + "loss": -0.0748, + "step": 1181 + }, + { + "clip_ratio/high_max": 0.002866101589461323, + "clip_ratio/high_mean": 0.0011937303715967573, + "clip_ratio/low_mean": 0.0007009933542576618, + "clip_ratio/low_min": 1.3261192179925274e-05, + "clip_ratio/region_mean": 0.0018947236749227159, + "epoch": 12.279883381924199, + "grad_norm": 0.14077836275100708, + "learning_rate": 1e-06, + "loss": -0.0689, + "step": 1182 + }, + { + "clip_ratio/high_max": 0.0024632771528558806, + "clip_ratio/high_mean": 0.0010278113368258346, + "clip_ratio/low_mean": 0.0008552577764930902, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0018830691114999354, + "epoch": 12.289212827988338, + "grad_norm": 0.14951001107692719, + "learning_rate": 1e-06, + "loss": -0.0035, + "step": 1183 + }, + { + "clip_ratio/high_max": 0.002585261157946661, + "clip_ratio/high_mean": 0.0009649988878663862, + "clip_ratio/low_mean": 0.0009446480489714304, + "clip_ratio/low_min": 4.653760333894752e-05, + "clip_ratio/region_mean": 0.0019096469259238802, + "epoch": 12.298542274052478, + "grad_norm": 0.15220119059085846, + "learning_rate": 1e-06, + "loss": 0.0197, + "step": 1184 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0818219866071429, + "completions/max_length": 4096.0, + "completions/max_terminated_length": 4070.0, + "completions/mean_length": 829.8917846679688, + "completions/mean_terminated_length": 538.837646484375, + "completions/min_length": 98.0, + "completions/min_terminated_length": 98.0, + "epoch": 12.307871720116617, + "grad_norm": 0.14995774626731873, + "learning_rate": 1e-06, + "loss": -0.0209, + "num_tokens": 680086743.0, + "reward": 0.665597140789032, + "reward_std": 0.14333374798297882, + "rewards/simpleverify_reward/mean": 0.6655970811843872, + "rewards/simpleverify_reward/std": 0.47178953886032104, + "step": 1185 + }, + { + "clip_ratio/high_max": 0.0015716331981820986, + "clip_ratio/high_mean": 0.0006381597031577257, + "clip_ratio/low_mean": 0.0002720080365179456, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0009101677278522402, + "epoch": 12.317201166180759, + "grad_norm": 0.2688741683959961, + "learning_rate": 1e-06, + "loss": -0.0211, + "step": 1186 + }, + { + "clip_ratio/high_max": 0.0018787017543218099, + "clip_ratio/high_mean": 0.0006574818698936724, + "clip_ratio/low_mean": 0.0003735257723747054, + "clip_ratio/low_min": 2.355823562538717e-05, + "clip_ratio/region_mean": 0.0010310076304449467, + "epoch": 12.326530612244898, + "grad_norm": 0.15530653297901154, + "learning_rate": 1e-06, + "loss": -0.0312, + "step": 1187 + }, + { + "clip_ratio/high_max": 0.0018491997216187883, + "clip_ratio/high_mean": 0.0006808987827753299, + "clip_ratio/low_mean": 0.00038289290523607633, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0010637916948326165, + "epoch": 12.335860058309038, + "grad_norm": 0.16759537160396576, + "learning_rate": 1e-06, + "loss": -0.0273, + "step": 1188 + }, + { + "clip_ratio/high_max": 0.0017889955852297135, + "clip_ratio/high_mean": 0.0006620636977459071, + "clip_ratio/low_mean": 0.0004419795286594308, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0011040432254958432, + "epoch": 12.345189504373177, + "grad_norm": 0.28052854537963867, + "learning_rate": 1e-06, + "loss": -0.0422, + "step": 1189 + }, + { + "clip_ratio/high_max": 0.0015503696704399772, + "clip_ratio/high_mean": 0.0006633316825173097, + "clip_ratio/low_mean": 0.0004858939437326626, + "clip_ratio/low_min": 2.6003745006164536e-05, + "clip_ratio/region_mean": 0.001149225608969573, + "epoch": 12.354518950437317, + "grad_norm": 0.12861286103725433, + "learning_rate": 1e-06, + "loss": -0.0341, + "step": 1190 + }, + { + "clip_ratio/high_max": 0.0019206314973416738, + "clip_ratio/high_mean": 0.0007159586466514156, + "clip_ratio/low_mean": 0.0006350382118398556, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.001350996859400766, + "epoch": 12.363848396501458, + "grad_norm": 0.16097664833068848, + "learning_rate": 1e-06, + "loss": -0.0219, + "step": 1191 + }, + { + "clip_ratio/high_max": 0.0021182185373618267, + "clip_ratio/high_mean": 0.0008341489028680371, + "clip_ratio/low_mean": 0.0005425302297226153, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0013766791016678326, + "epoch": 12.373177842565598, + "grad_norm": 0.16182248294353485, + "learning_rate": 1e-06, + "loss": -0.0193, + "step": 1192 + }, + { + "clip_ratio/high_max": 0.0021218025794951245, + "clip_ratio/high_mean": 0.0008499425330228405, + "clip_ratio/low_mean": 0.0005363503687476623, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.001386292922688881, + "epoch": 12.382507288629737, + "grad_norm": 2.356157064437866, + "learning_rate": 1e-06, + "loss": -0.0614, + "step": 1193 + }, + { + "clip_ratio/high_max": 0.002036281057371525, + "clip_ratio/high_mean": 0.0007653305492567597, + "clip_ratio/low_mean": 0.0006597809488084749, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0014251114953367505, + "epoch": 12.391836734693877, + "grad_norm": 0.15206362307071686, + "learning_rate": 1e-06, + "loss": -0.0191, + "step": 1194 + }, + { + "clip_ratio/high_max": 0.0015204007358988747, + "clip_ratio/high_mean": 0.0006446774850701331, + "clip_ratio/low_mean": 0.0005821640047543042, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0012268414902791847, + "epoch": 12.401166180758018, + "grad_norm": 0.13985669612884521, + "learning_rate": 1e-06, + "loss": -0.0132, + "step": 1195 + }, + { + "clip_ratio/high_max": 0.0018950283629237674, + "clip_ratio/high_mean": 0.0007218277751235291, + "clip_ratio/low_mean": 0.0007843420871722628, + "clip_ratio/low_min": 4.07298794016242e-05, + "clip_ratio/region_mean": 0.0015061698541103397, + "epoch": 12.410495626822158, + "grad_norm": 111.48577880859375, + "learning_rate": 1e-06, + "loss": 0.1556, + "step": 1196 + }, + { + "clip_ratio/high_max": 0.0020736753212986514, + "clip_ratio/high_mean": 0.0008079645431280369, + "clip_ratio/low_mean": 0.000687326496517926, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0014952910496504046, + "epoch": 12.419825072886297, + "grad_norm": 0.2254544049501419, + "learning_rate": 1e-06, + "loss": -0.026, + "step": 1197 + }, + { + "clip_ratio/high_max": 0.0022566876796190627, + "clip_ratio/high_mean": 0.0009089765117096249, + "clip_ratio/low_mean": 0.0008627541565147112, + "clip_ratio/low_min": 1.8195050870417617e-05, + "clip_ratio/region_mean": 0.001771730683685746, + "epoch": 12.429154518950437, + "grad_norm": 0.17400749027729034, + "learning_rate": 1e-06, + "loss": -0.0263, + "step": 1198 + }, + { + "clip_ratio/high_max": 0.0023348169997916557, + "clip_ratio/high_mean": 0.0010773108697321732, + "clip_ratio/low_mean": 0.0008090580035968742, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0018863688965211622, + "epoch": 12.438483965014576, + "grad_norm": 0.1585964560508728, + "learning_rate": 1e-06, + "loss": -0.0759, + "step": 1199 + }, + { + "clip_ratio/high_max": 0.0022800183614890557, + "clip_ratio/high_mean": 0.0010704686665121699, + "clip_ratio/low_mean": 0.0008826940847939113, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0019531627694959752, + "epoch": 12.447813411078718, + "grad_norm": 0.14367038011550903, + "learning_rate": 1e-06, + "loss": -0.0437, + "step": 1200 + }, + { + "clip_ratio/high_max": 0.002560265827924013, + "clip_ratio/high_mean": 0.0010434905234433245, + "clip_ratio/low_mean": 0.0007443507874995703, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.001787841334589757, + "epoch": 12.457142857142857, + "grad_norm": 0.16841132938861847, + "learning_rate": 1e-06, + "loss": -0.0625, + "step": 1201 + }, + { + "clip_ratio/high_max": 0.002361693448619917, + "clip_ratio/high_mean": 0.0010176355517614866, + "clip_ratio/low_mean": 0.0008699332593096187, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0018875688620028086, + "epoch": 12.466472303206997, + "grad_norm": 0.16177020967006683, + "learning_rate": 1e-06, + "loss": -0.0414, + "step": 1202 + }, + { + "clip_ratio/high_max": 0.002028581624472281, + "clip_ratio/high_mean": 0.0007894065965956543, + "clip_ratio/low_mean": 0.0008450373916275566, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0016344439718523063, + "epoch": 12.475801749271136, + "grad_norm": 0.1632426232099533, + "learning_rate": 1e-06, + "loss": 0.0107, + "step": 1203 + }, + { + "clip_ratio/high_max": 0.0023649368304177187, + "clip_ratio/high_mean": 0.0009850279566308018, + "clip_ratio/low_mean": 0.0009320356111857109, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0019170635860064067, + "epoch": 12.485131195335278, + "grad_norm": 0.2862667441368103, + "learning_rate": 1e-06, + "loss": -0.0131, + "step": 1204 + }, + { + "clip_ratio/high_max": 0.0025797787675401196, + "clip_ratio/high_mean": 0.0010242903772450518, + "clip_ratio/low_mean": 0.0009106253055506386, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0019349157155374996, + "epoch": 12.494460641399417, + "grad_norm": 0.142133429646492, + "learning_rate": 1e-06, + "loss": -0.0779, + "step": 1205 + }, + { + "clip_ratio/high_max": 0.0021059031423646957, + "clip_ratio/high_mean": 0.0008474146397929871, + "clip_ratio/low_mean": 0.0008995021053124219, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0017469167687522713, + "epoch": 12.503790087463557, + "grad_norm": 0.12770113348960876, + "learning_rate": 1e-06, + "loss": -0.0133, + "step": 1206 + }, + { + "clip_ratio/high_max": 0.0025423521728953347, + "clip_ratio/high_mean": 0.0010155218787986087, + "clip_ratio/low_mean": 0.0009215239351760829, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0019370458103367127, + "epoch": 12.513119533527696, + "grad_norm": 0.16851669549942017, + "learning_rate": 1e-06, + "loss": -0.0235, + "step": 1207 + }, + { + "clip_ratio/high_max": 0.002701407494896557, + "clip_ratio/high_mean": 0.0010981410996464547, + "clip_ratio/low_mean": 0.0008389247943796363, + "clip_ratio/low_min": 3.055487832170911e-05, + "clip_ratio/region_mean": 0.0019370659210835584, + "epoch": 12.522448979591836, + "grad_norm": 0.14179156720638275, + "learning_rate": 1e-06, + "loss": -0.0639, + "step": 1208 + }, + { + "clip_ratio/high_max": 0.0026506004578550346, + "clip_ratio/high_mean": 0.0009729711891850457, + "clip_ratio/low_mean": 0.0008822552590572741, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0018552264373283833, + "epoch": 12.531778425655977, + "grad_norm": 0.1276615858078003, + "learning_rate": 1e-06, + "loss": -0.036, + "step": 1209 + }, + { + "clip_ratio/high_max": 0.0028841284147347324, + "clip_ratio/high_mean": 0.0011803064953710418, + "clip_ratio/low_mean": 0.001103073386275355, + "clip_ratio/low_min": 0.00011602670292631956, + "clip_ratio/region_mean": 0.00228337987937266, + "epoch": 12.541107871720117, + "grad_norm": 0.1623101532459259, + "learning_rate": 1e-06, + "loss": -0.0556, + "step": 1210 + }, + { + "clip_ratio/high_max": 0.003024261233804282, + "clip_ratio/high_mean": 0.0010838951675395947, + "clip_ratio/low_mean": 0.0008472310910292435, + "clip_ratio/low_min": 1.6792047972558066e-05, + "clip_ratio/region_mean": 0.0019311262149130926, + "epoch": 12.550437317784256, + "grad_norm": 0.14724132418632507, + "learning_rate": 1e-06, + "loss": -0.0407, + "step": 1211 + }, + { + "clip_ratio/high_max": 0.002793861669488251, + "clip_ratio/high_mean": 0.001170790350442985, + "clip_ratio/low_mean": 0.0009176432140520774, + "clip_ratio/low_min": 2.0424837202881463e-05, + "clip_ratio/region_mean": 0.002088433553581126, + "epoch": 12.559766763848396, + "grad_norm": 0.13823272287845612, + "learning_rate": 1e-06, + "loss": -0.0507, + "step": 1212 + }, + { + "clip_ratio/high_max": 0.0022934300650376827, + "clip_ratio/high_mean": 0.0010273688276356552, + "clip_ratio/low_mean": 0.0008572914548494737, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0018846603270503692, + "epoch": 12.569096209912537, + "grad_norm": 0.1401015967130661, + "learning_rate": 1e-06, + "loss": -0.0253, + "step": 1213 + }, + { + "clip_ratio/high_max": 0.002863459063519258, + "clip_ratio/high_mean": 0.0011268034413660644, + "clip_ratio/low_mean": 0.0010628795262164203, + "clip_ratio/low_min": 1.4070238648855593e-05, + "clip_ratio/region_mean": 0.002189683000324294, + "epoch": 12.578425655976677, + "grad_norm": 0.15729497373104095, + "learning_rate": 1e-06, + "loss": -0.0453, + "step": 1214 + }, + { + "clip_ratio/high_max": 0.0030355383714777417, + "clip_ratio/high_mean": 0.001130250846472336, + "clip_ratio/low_mean": 0.0009242892556358129, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.002054540134849958, + "epoch": 12.587755102040816, + "grad_norm": 0.17753230035305023, + "learning_rate": 1e-06, + "loss": -0.0494, + "step": 1215 + }, + { + "clip_ratio/high_max": 0.002656020864378661, + "clip_ratio/high_mean": 0.0008885436200216645, + "clip_ratio/low_mean": 0.0010008937297243392, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0018894373206421733, + "epoch": 12.597084548104956, + "grad_norm": 0.1408652812242508, + "learning_rate": 1e-06, + "loss": -0.0311, + "step": 1216 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0905412946428571, + "completions/max_length": 4096.0, + "completions/max_terminated_length": 4061.0, + "completions/mean_length": 855.383544921875, + "completions/mean_terminated_length": 532.7634887695312, + "completions/min_length": 6.0, + "completions/min_terminated_length": 6.0, + "epoch": 12.606413994169095, + "grad_norm": 0.16870707273483276, + "learning_rate": 1e-06, + "loss": -0.0646, + "num_tokens": 696894708.0, + "reward": 0.6483328938484192, + "reward_std": 0.1489199846982956, + "rewards/simpleverify_reward/mean": 0.6483328938484192, + "rewards/simpleverify_reward/std": 0.4774990379810333, + "step": 1217 + }, + { + "clip_ratio/high_max": 0.0017404887694283389, + "clip_ratio/high_mean": 0.0006783103326597484, + "clip_ratio/low_mean": 0.00045006274649495026, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0011283730818831827, + "epoch": 12.615743440233237, + "grad_norm": 0.16225512325763702, + "learning_rate": 1e-06, + "loss": -0.0542, + "step": 1218 + }, + { + "clip_ratio/high_max": 0.0022510680719278753, + "clip_ratio/high_mean": 0.0008329924803547328, + "clip_ratio/low_mean": 0.00038368742434613523, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0012166798915131949, + "epoch": 12.625072886297376, + "grad_norm": 0.1448415368795395, + "learning_rate": 1e-06, + "loss": -0.0461, + "step": 1219 + }, + { + "clip_ratio/high_max": 0.0021501267547137104, + "clip_ratio/high_mean": 0.0008077281981968554, + "clip_ratio/low_mean": 0.0003889538829753292, + "clip_ratio/low_min": 1.5508685464737937e-05, + "clip_ratio/region_mean": 0.001196682067529764, + "epoch": 12.634402332361516, + "grad_norm": 0.15959766507148743, + "learning_rate": 1e-06, + "loss": -0.0422, + "step": 1220 + }, + { + "clip_ratio/high_max": 0.0018303566830581985, + "clip_ratio/high_mean": 0.0008155402720149141, + "clip_ratio/low_mean": 0.0004060740284330677, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0012216143004479818, + "epoch": 12.643731778425655, + "grad_norm": 0.18018299341201782, + "learning_rate": 1e-06, + "loss": -0.06, + "step": 1221 + }, + { + "clip_ratio/high_max": 0.0019239308567193802, + "clip_ratio/high_mean": 0.0007672081537748454, + "clip_ratio/low_mean": 0.0005562840024140314, + "clip_ratio/low_min": 3.0171373509801924e-05, + "clip_ratio/region_mean": 0.0013234921425464563, + "epoch": 12.653061224489797, + "grad_norm": 0.16592402756214142, + "learning_rate": 1e-06, + "loss": -0.0161, + "step": 1222 + }, + { + "clip_ratio/high_max": 0.0021908340640948154, + "clip_ratio/high_mean": 0.0007934398290672107, + "clip_ratio/low_mean": 0.0005543048810068285, + "clip_ratio/low_min": 1.4657598512712866e-05, + "clip_ratio/region_mean": 0.0013477446991601028, + "epoch": 12.662390670553936, + "grad_norm": 0.1567879468202591, + "learning_rate": 1e-06, + "loss": -0.0149, + "step": 1223 + }, + { + "clip_ratio/high_max": 0.001866518781753257, + "clip_ratio/high_mean": 0.0007898500016381149, + "clip_ratio/low_mean": 0.0005112250491947634, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0013010750262765214, + "epoch": 12.671720116618076, + "grad_norm": 0.13546861708164215, + "learning_rate": 1e-06, + "loss": -0.0214, + "step": 1224 + }, + { + "clip_ratio/high_max": 0.0021611627216771012, + "clip_ratio/high_mean": 0.0008454649478153442, + "clip_ratio/low_mean": 0.0004844476770813344, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0013299125894263852, + "epoch": 12.681049562682215, + "grad_norm": 0.1423157900571823, + "learning_rate": 1e-06, + "loss": -0.0984, + "step": 1225 + }, + { + "clip_ratio/high_max": 0.002142385797924362, + "clip_ratio/high_mean": 0.0008808996535663027, + "clip_ratio/low_mean": 0.0005733223561037448, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0014542220123985317, + "epoch": 12.690379008746355, + "grad_norm": 0.13812783360481262, + "learning_rate": 1e-06, + "loss": -0.0585, + "step": 1226 + }, + { + "clip_ratio/high_max": 0.002020554711634759, + "clip_ratio/high_mean": 0.0008732901296752971, + "clip_ratio/low_mean": 0.000523690925547271, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0013969810424896423, + "epoch": 12.699708454810496, + "grad_norm": 0.24717958271503448, + "learning_rate": 1e-06, + "loss": -0.0644, + "step": 1227 + }, + { + "clip_ratio/high_max": 0.002180870473239338, + "clip_ratio/high_mean": 0.0008601616191299399, + "clip_ratio/low_mean": 0.0006040267426214996, + "clip_ratio/low_min": 1.4328289580589626e-05, + "clip_ratio/region_mean": 0.0014641883681179024, + "epoch": 12.709037900874636, + "grad_norm": 0.16998404264450073, + "learning_rate": 1e-06, + "loss": -0.0531, + "step": 1228 + }, + { + "clip_ratio/high_max": 0.0024453268451907206, + "clip_ratio/high_mean": 0.0009915249593177577, + "clip_ratio/low_mean": 0.0007085805464157602, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.001700105465715751, + "epoch": 12.718367346938775, + "grad_norm": 0.15147055685520172, + "learning_rate": 1e-06, + "loss": -0.0584, + "step": 1229 + }, + { + "clip_ratio/high_max": 0.0021441570315801073, + "clip_ratio/high_mean": 0.0008396260145673295, + "clip_ratio/low_mean": 0.0006074137527321, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0014470397654804401, + "epoch": 12.727696793002915, + "grad_norm": 0.6910759210586548, + "learning_rate": 1e-06, + "loss": -0.0544, + "step": 1230 + }, + { + "clip_ratio/high_max": 0.0024313779358635657, + "clip_ratio/high_mean": 0.0009315318420703989, + "clip_ratio/low_mean": 0.0006912571616339847, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0016227889827860054, + "epoch": 12.737026239067056, + "grad_norm": 0.20457857847213745, + "learning_rate": 1e-06, + "loss": -0.0413, + "step": 1231 + }, + { + "clip_ratio/high_max": 0.0026995801090379246, + "clip_ratio/high_mean": 0.001102203874324914, + "clip_ratio/low_mean": 0.000710038711076777, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0018122425899491645, + "epoch": 12.746355685131196, + "grad_norm": 0.1747591495513916, + "learning_rate": 1e-06, + "loss": -0.0546, + "step": 1232 + }, + { + "clip_ratio/high_max": 0.0021721825978602283, + "clip_ratio/high_mean": 0.0008254947861132678, + "clip_ratio/low_mean": 0.001039720304106595, + "clip_ratio/low_min": 6.993312854319811e-05, + "clip_ratio/region_mean": 0.0018652151193236932, + "epoch": 12.755685131195335, + "grad_norm": 0.14564496278762817, + "learning_rate": 1e-06, + "loss": 0.0087, + "step": 1233 + }, + { + "clip_ratio/high_max": 0.002144414611393586, + "clip_ratio/high_mean": 0.0008651878433738602, + "clip_ratio/low_mean": 0.0008761578019402805, + "clip_ratio/low_min": 3.951798134949058e-05, + "clip_ratio/region_mean": 0.0017413456698704977, + "epoch": 12.765014577259475, + "grad_norm": 0.15314052999019623, + "learning_rate": 1e-06, + "loss": -0.04, + "step": 1234 + }, + { + "clip_ratio/high_max": 0.0026401054456073325, + "clip_ratio/high_mean": 0.0009981954117392888, + "clip_ratio/low_mean": 0.0006213458800630178, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0016195412681554444, + "epoch": 12.774344023323614, + "grad_norm": 0.14276200532913208, + "learning_rate": 1e-06, + "loss": -0.0479, + "step": 1235 + }, + { + "clip_ratio/high_max": 0.0022905794394318946, + "clip_ratio/high_mean": 0.001022375319735147, + "clip_ratio/low_mean": 0.0008215699112952279, + "clip_ratio/low_min": 4.134868140681647e-05, + "clip_ratio/region_mean": 0.001843945232394617, + "epoch": 12.783673469387756, + "grad_norm": 0.292371541261673, + "learning_rate": 1e-06, + "loss": -0.0469, + "step": 1236 + }, + { + "clip_ratio/high_max": 0.002155434776796028, + "clip_ratio/high_mean": 0.0009097324527829187, + "clip_ratio/low_mean": 0.000905888849956682, + "clip_ratio/low_min": 5.2954881539335474e-05, + "clip_ratio/region_mean": 0.0018156213554902934, + "epoch": 12.793002915451895, + "grad_norm": 0.1603776216506958, + "learning_rate": 1e-06, + "loss": -0.0341, + "step": 1237 + }, + { + "clip_ratio/high_max": 0.002130109212885145, + "clip_ratio/high_mean": 0.0009396676505275536, + "clip_ratio/low_mean": 0.000692275805704412, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.001631943461688934, + "epoch": 12.802332361516035, + "grad_norm": 0.1660233736038208, + "learning_rate": 1e-06, + "loss": -0.0454, + "step": 1238 + }, + { + "clip_ratio/high_max": 0.002717175379075343, + "clip_ratio/high_mean": 0.0011015345671694377, + "clip_ratio/low_mean": 0.0007200122481663129, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0018215468116977718, + "epoch": 12.811661807580174, + "grad_norm": 0.16909345984458923, + "learning_rate": 1e-06, + "loss": -0.0476, + "step": 1239 + }, + { + "clip_ratio/high_max": 0.0025858399385469966, + "clip_ratio/high_mean": 0.0010903625588980503, + "clip_ratio/low_mean": 0.0007437107390160236, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.001834073267673375, + "epoch": 12.820991253644316, + "grad_norm": 0.14618924260139465, + "learning_rate": 1e-06, + "loss": -0.0283, + "step": 1240 + }, + { + "clip_ratio/high_max": 0.002248534372483846, + "clip_ratio/high_mean": 0.0008539697155356407, + "clip_ratio/low_mean": 0.0009583699647919275, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0018123396876035258, + "epoch": 12.830320699708455, + "grad_norm": 0.12900881469249725, + "learning_rate": 1e-06, + "loss": -0.0065, + "step": 1241 + }, + { + "clip_ratio/high_max": 0.002524964431358967, + "clip_ratio/high_mean": 0.0009423884275747696, + "clip_ratio/low_mean": 0.0007425225703627802, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0016849110324983485, + "epoch": 12.839650145772595, + "grad_norm": 0.304208368062973, + "learning_rate": 1e-06, + "loss": -0.023, + "step": 1242 + }, + { + "clip_ratio/high_max": 0.0023080027385731228, + "clip_ratio/high_mean": 0.0010065719252452254, + "clip_ratio/low_mean": 0.0009284118095820304, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0019349837166373618, + "epoch": 12.848979591836734, + "grad_norm": 0.16248719394207, + "learning_rate": 1e-06, + "loss": -0.0317, + "step": 1243 + }, + { + "clip_ratio/high_max": 0.0028574519892572425, + "clip_ratio/high_mean": 0.001081569425878115, + "clip_ratio/low_mean": 0.0010608002203298383, + "clip_ratio/low_min": 7.573154289275408e-05, + "clip_ratio/region_mean": 0.0021423696525744162, + "epoch": 12.858309037900874, + "grad_norm": 0.1720161885023117, + "learning_rate": 1e-06, + "loss": 0.0147, + "step": 1244 + }, + { + "clip_ratio/high_max": 0.0026311200199415907, + "clip_ratio/high_mean": 0.0010182366913795704, + "clip_ratio/low_mean": 0.0008118485729937674, + "clip_ratio/low_min": 7.324685884668725e-05, + "clip_ratio/region_mean": 0.001830085224355571, + "epoch": 12.867638483965015, + "grad_norm": 0.1342935860157013, + "learning_rate": 1e-06, + "loss": -0.0344, + "step": 1245 + }, + { + "clip_ratio/high_max": 0.0031472905902774073, + "clip_ratio/high_mean": 0.00119532509779674, + "clip_ratio/low_mean": 0.0007574910468974849, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0019528161574271508, + "epoch": 12.876967930029155, + "grad_norm": 0.1567607969045639, + "learning_rate": 1e-06, + "loss": -0.0768, + "step": 1246 + }, + { + "clip_ratio/high_max": 0.002146186903701164, + "clip_ratio/high_mean": 0.0008842511306283996, + "clip_ratio/low_mean": 0.0008600137771281879, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0017442649295844603, + "epoch": 12.886297376093294, + "grad_norm": 0.1506277322769165, + "learning_rate": 1e-06, + "loss": -0.0228, + "step": 1247 + }, + { + "clip_ratio/high_max": 0.0028605922707356513, + "clip_ratio/high_mean": 0.0011452191774878884, + "clip_ratio/low_mean": 0.0008477435312670423, + "clip_ratio/low_min": 7.152830585255288e-05, + "clip_ratio/region_mean": 0.0019929627160308883, + "epoch": 12.895626822157434, + "grad_norm": 0.167267844080925, + "learning_rate": 1e-06, + "loss": -0.0492, + "step": 1248 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0907156808035714, + "completions/max_length": 4096.0, + "completions/max_terminated_length": 4070.0, + "completions/mean_length": 855.1385498046875, + "completions/mean_terminated_length": 531.8106689453125, + "completions/min_length": 102.0, + "completions/min_terminated_length": 102.0, + "epoch": 13.00932944606414, + "grad_norm": 0.14323611557483673, + "learning_rate": 1e-06, + "loss": -0.0155, + "num_tokens": 713649407.0, + "reward": 0.6462751626968384, + "reward_std": 0.1527000069618225, + "rewards/simpleverify_reward/mean": 0.6462751030921936, + "rewards/simpleverify_reward/std": 0.4781334400177002, + "step": 1249 + }, + { + "clip_ratio/high_max": 0.0018533071197452955, + "clip_ratio/high_mean": 0.0006961805702303536, + "clip_ratio/low_mean": 0.00040510214319056104, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.001101282690797234, + "epoch": 13.018658892128279, + "grad_norm": 0.31508520245552063, + "learning_rate": 1e-06, + "loss": -0.0717, + "step": 1250 + }, + { + "clip_ratio/high_max": 0.0017398837335349526, + "clip_ratio/high_mean": 0.0006584467691936879, + "clip_ratio/low_mean": 0.0004901854458694288, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0011486322073324118, + "epoch": 13.02798833819242, + "grad_norm": 0.1800042986869812, + "learning_rate": 1e-06, + "loss": -0.0162, + "step": 1251 + }, + { + "clip_ratio/high_max": 0.0020725686081277672, + "clip_ratio/high_mean": 0.0007888763993832981, + "clip_ratio/low_mean": 0.0005945292377873557, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.001383405615342781, + "epoch": 13.03731778425656, + "grad_norm": 0.2943728566169739, + "learning_rate": 1e-06, + "loss": -0.0257, + "step": 1252 + }, + { + "clip_ratio/high_max": 0.0020031657149957027, + "clip_ratio/high_mean": 0.0007610687353007961, + "clip_ratio/low_mean": 0.0005245145239314297, + "clip_ratio/low_min": 1.3394770576269366e-05, + "clip_ratio/region_mean": 0.0012855832756031305, + "epoch": 13.0466472303207, + "grad_norm": 0.16200225055217743, + "learning_rate": 1e-06, + "loss": -0.0555, + "step": 1253 + }, + { + "clip_ratio/high_max": 0.0020169132403680123, + "clip_ratio/high_mean": 0.0007873434478824493, + "clip_ratio/low_mean": 0.0004928506391479459, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0012801940538338386, + "epoch": 13.055976676384839, + "grad_norm": 0.16475985944271088, + "learning_rate": 1e-06, + "loss": -0.0493, + "step": 1254 + }, + { + "clip_ratio/high_max": 0.0020985237606510054, + "clip_ratio/high_mean": 0.0008507140464644181, + "clip_ratio/low_mean": 0.0005364021710647648, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0013871162445866503, + "epoch": 13.06530612244898, + "grad_norm": 0.2778988480567932, + "learning_rate": 1e-06, + "loss": -0.0684, + "step": 1255 + }, + { + "clip_ratio/high_max": 0.0017238276814168785, + "clip_ratio/high_mean": 0.0007483742410840932, + "clip_ratio/low_mean": 0.0005778618916565392, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0013262361899251118, + "epoch": 13.07463556851312, + "grad_norm": 0.16235233843326569, + "learning_rate": 1e-06, + "loss": -0.0543, + "step": 1256 + }, + { + "clip_ratio/high_max": 0.002413678630546201, + "clip_ratio/high_mean": 0.0009781181597645627, + "clip_ratio/low_mean": 0.0005532661152756191, + "clip_ratio/low_min": 2.146291262761224e-05, + "clip_ratio/region_mean": 0.0015313842850446235, + "epoch": 13.08396501457726, + "grad_norm": 0.15409797430038452, + "learning_rate": 1e-06, + "loss": -0.0921, + "step": 1257 + }, + { + "clip_ratio/high_max": 0.0023069537346600555, + "clip_ratio/high_mean": 0.0008356540129170753, + "clip_ratio/low_mean": 0.0005737879018852254, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0014094419238972478, + "epoch": 13.093294460641399, + "grad_norm": 0.18945381045341492, + "learning_rate": 1e-06, + "loss": -0.0736, + "step": 1258 + }, + { + "clip_ratio/high_max": 0.0022070999548304826, + "clip_ratio/high_mean": 0.0007774781824991805, + "clip_ratio/low_mean": 0.0006468332212534733, + "clip_ratio/low_min": 1.7970098269870505e-05, + "clip_ratio/region_mean": 0.001424311372829834, + "epoch": 13.102623906705539, + "grad_norm": 0.16146373748779297, + "learning_rate": 1e-06, + "loss": -0.0265, + "step": 1259 + }, + { + "clip_ratio/high_max": 0.002119520322594326, + "clip_ratio/high_mean": 0.0008539093469153158, + "clip_ratio/low_mean": 0.000707914303347934, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0015618236211594194, + "epoch": 13.11195335276968, + "grad_norm": 0.16552145779132843, + "learning_rate": 1e-06, + "loss": -0.0341, + "step": 1260 + }, + { + "clip_ratio/high_max": 0.0025728354521561414, + "clip_ratio/high_mean": 0.0009410246748302598, + "clip_ratio/low_mean": 0.0005817306664539501, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.001522755337646231, + "epoch": 13.12128279883382, + "grad_norm": 0.18818584084510803, + "learning_rate": 1e-06, + "loss": -0.0624, + "step": 1261 + }, + { + "clip_ratio/high_max": 0.002249132656288566, + "clip_ratio/high_mean": 0.0008876810570654925, + "clip_ratio/low_mean": 0.0007052272694636486, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0015929083529044874, + "epoch": 13.130612244897959, + "grad_norm": 0.14595019817352295, + "learning_rate": 1e-06, + "loss": -0.0468, + "step": 1262 + }, + { + "clip_ratio/high_max": 0.0024061270050879102, + "clip_ratio/high_mean": 0.0009578252847859403, + "clip_ratio/low_mean": 0.0007233730557345552, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0016811983077786863, + "epoch": 13.139941690962099, + "grad_norm": 0.15412220358848572, + "learning_rate": 1e-06, + "loss": -0.047, + "step": 1263 + }, + { + "clip_ratio/high_max": 0.0023314416321227327, + "clip_ratio/high_mean": 0.0008429664931099978, + "clip_ratio/low_mean": 0.000708587554981932, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.001551553992612753, + "epoch": 13.14927113702624, + "grad_norm": 0.14815962314605713, + "learning_rate": 1e-06, + "loss": -0.0617, + "step": 1264 + }, + { + "clip_ratio/high_max": 0.0024939264767454006, + "clip_ratio/high_mean": 0.0009565121981722768, + "clip_ratio/low_mean": 0.0008484964637318626, + "clip_ratio/low_min": 1.1405109034967609e-05, + "clip_ratio/region_mean": 0.0018050086800940335, + "epoch": 13.15860058309038, + "grad_norm": 0.16304215788841248, + "learning_rate": 1e-06, + "loss": -0.0139, + "step": 1265 + }, + { + "clip_ratio/high_max": 0.0022899250325281173, + "clip_ratio/high_mean": 0.0009441661386517808, + "clip_ratio/low_mean": 0.0008178508192031586, + "clip_ratio/low_min": 5.0785482017090544e-05, + "clip_ratio/region_mean": 0.0017620169310248457, + "epoch": 13.167930029154519, + "grad_norm": 0.15857693552970886, + "learning_rate": 1e-06, + "loss": -0.0279, + "step": 1266 + }, + { + "clip_ratio/high_max": 0.0022827775246696547, + "clip_ratio/high_mean": 0.0009474246144236531, + "clip_ratio/low_mean": 0.0008259005080617499, + "clip_ratio/low_min": 4.331254240241833e-05, + "clip_ratio/region_mean": 0.0017733250788296573, + "epoch": 13.177259475218658, + "grad_norm": 0.16857674717903137, + "learning_rate": 1e-06, + "loss": -0.0312, + "step": 1267 + }, + { + "clip_ratio/high_max": 0.0020531503105303273, + "clip_ratio/high_mean": 0.0008404118561884388, + "clip_ratio/low_mean": 0.0008659362006255833, + "clip_ratio/low_min": 3.568370084394701e-05, + "clip_ratio/region_mean": 0.0017063480445358437, + "epoch": 13.186588921282798, + "grad_norm": 1.5576626062393188, + "learning_rate": 1e-06, + "loss": -0.025, + "step": 1268 + }, + { + "clip_ratio/high_max": 0.002561094363045413, + "clip_ratio/high_mean": 0.0009574164905643556, + "clip_ratio/low_mean": 0.0007971743943926413, + "clip_ratio/low_min": 1.7212889360962436e-05, + "clip_ratio/region_mean": 0.0017545908558531664, + "epoch": 13.19591836734694, + "grad_norm": 0.1774628758430481, + "learning_rate": 1e-06, + "loss": -0.0443, + "step": 1269 + }, + { + "clip_ratio/high_max": 0.0029007500161242206, + "clip_ratio/high_mean": 0.001128241274273023, + "clip_ratio/low_mean": 0.000921056325751124, + "clip_ratio/low_min": 4.4034757593180984e-05, + "clip_ratio/region_mean": 0.0020492976036621258, + "epoch": 13.205247813411079, + "grad_norm": 0.18118013441562653, + "learning_rate": 1e-06, + "loss": -0.0644, + "step": 1270 + }, + { + "clip_ratio/high_max": 0.0022209560265764594, + "clip_ratio/high_mean": 0.0009575722651788965, + "clip_ratio/low_mean": 0.0008682124571350869, + "clip_ratio/low_min": 3.587143510230817e-05, + "clip_ratio/region_mean": 0.0018257847041240893, + "epoch": 13.214577259475218, + "grad_norm": 0.1728212535381317, + "learning_rate": 1e-06, + "loss": -0.0295, + "step": 1271 + }, + { + "clip_ratio/high_max": 0.0021065881410322618, + "clip_ratio/high_mean": 0.0008626566923339851, + "clip_ratio/low_mean": 0.0010264630072924774, + "clip_ratio/low_min": 9.42913302424131e-05, + "clip_ratio/region_mean": 0.001889119710540399, + "epoch": 13.223906705539358, + "grad_norm": 0.24277324974536896, + "learning_rate": 1e-06, + "loss": -0.0231, + "step": 1272 + }, + { + "clip_ratio/high_max": 0.002869507086870726, + "clip_ratio/high_mean": 0.0011621733756328467, + "clip_ratio/low_mean": 0.0009256251032638829, + "clip_ratio/low_min": 3.807492976193316e-05, + "clip_ratio/region_mean": 0.002087798471620772, + "epoch": 13.2332361516035, + "grad_norm": 0.17172911763191223, + "learning_rate": 1e-06, + "loss": -0.0539, + "step": 1273 + }, + { + "clip_ratio/high_max": 0.002434178502880968, + "clip_ratio/high_mean": 0.0010049845168396132, + "clip_ratio/low_mean": 0.0008125728973027435, + "clip_ratio/low_min": 5.372974919737317e-05, + "clip_ratio/region_mean": 0.0018175573932239786, + "epoch": 13.242565597667639, + "grad_norm": 0.13892962038516998, + "learning_rate": 1e-06, + "loss": -0.0129, + "step": 1274 + }, + { + "clip_ratio/high_max": 0.0024921852455008775, + "clip_ratio/high_mean": 0.0009185114486172097, + "clip_ratio/low_mean": 0.0009172616119030863, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0018357730732532218, + "epoch": 13.251895043731778, + "grad_norm": 0.1514800786972046, + "learning_rate": 1e-06, + "loss": -0.0036, + "step": 1275 + }, + { + "clip_ratio/high_max": 0.0025304442169726826, + "clip_ratio/high_mean": 0.0010810512976604514, + "clip_ratio/low_mean": 0.0010384250945207896, + "clip_ratio/low_min": 5.361258990888018e-05, + "clip_ratio/region_mean": 0.002119476383086294, + "epoch": 13.261224489795918, + "grad_norm": 0.14814990758895874, + "learning_rate": 1e-06, + "loss": -0.0236, + "step": 1276 + }, + { + "clip_ratio/high_max": 0.002682504476979375, + "clip_ratio/high_mean": 0.0010731675320130307, + "clip_ratio/low_mean": 0.001041904573867214, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0021150721222511493, + "epoch": 13.270553935860057, + "grad_norm": 0.16266551613807678, + "learning_rate": 1e-06, + "loss": -0.0313, + "step": 1277 + }, + { + "clip_ratio/high_max": 0.002794327348965453, + "clip_ratio/high_mean": 0.0010381264983152505, + "clip_ratio/low_mean": 0.000878216490491468, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0019163429315085523, + "epoch": 13.279883381924199, + "grad_norm": 0.13999363780021667, + "learning_rate": 1e-06, + "loss": -0.0444, + "step": 1278 + }, + { + "clip_ratio/high_max": 0.002684035564016085, + "clip_ratio/high_mean": 0.0011079498035542201, + "clip_ratio/low_mean": 0.0008522837451891974, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0019602335887611844, + "epoch": 13.289212827988338, + "grad_norm": 0.22559887170791626, + "learning_rate": 1e-06, + "loss": -0.0134, + "step": 1279 + }, + { + "clip_ratio/high_max": 0.003137089923257008, + "clip_ratio/high_mean": 0.0012736490425595548, + "clip_ratio/low_mean": 0.0008657584639877314, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.002139407537470106, + "epoch": 13.298542274052478, + "grad_norm": 0.15872757136821747, + "learning_rate": 1e-06, + "loss": -0.0499, + "step": 1280 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0867745535714286, + "completions/max_length": 4096.0, + "completions/max_terminated_length": 3974.0, + "completions/mean_length": 833.7069702148438, + "completions/mean_terminated_length": 523.7243041992188, + "completions/min_length": 5.0, + "completions/min_terminated_length": 5.0, + "epoch": 13.307871720116617, + "grad_norm": 0.1634041666984558, + "learning_rate": 1e-06, + "loss": -0.0482, + "num_tokens": 730220885.0, + "reward": 0.6594587564468384, + "reward_std": 0.14032572507858276, + "rewards/simpleverify_reward/mean": 0.6594586968421936, + "rewards/simpleverify_reward/std": 0.4738995432853699, + "step": 1281 + }, + { + "clip_ratio/high_max": 0.001808859349694103, + "clip_ratio/high_mean": 0.0006272067239478929, + "clip_ratio/low_mean": 0.00036182535313855624, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.000989032101642806, + "epoch": 13.317201166180759, + "grad_norm": 0.1884487271308899, + "learning_rate": 1e-06, + "loss": 0.0078, + "step": 1282 + }, + { + "clip_ratio/high_max": 0.0020336356028565206, + "clip_ratio/high_mean": 0.0007408132141790702, + "clip_ratio/low_mean": 0.0002650878468557494, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0010059010492113885, + "epoch": 13.326530612244898, + "grad_norm": 0.15217967331409454, + "learning_rate": 1e-06, + "loss": -0.0742, + "step": 1283 + }, + { + "clip_ratio/high_max": 0.00186717539691017, + "clip_ratio/high_mean": 0.000662296330119716, + "clip_ratio/low_mean": 0.00041883934864017647, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.001081135676940903, + "epoch": 13.335860058309038, + "grad_norm": 0.18114089965820312, + "learning_rate": 1e-06, + "loss": -0.0127, + "step": 1284 + }, + { + "clip_ratio/high_max": 0.002345685505133588, + "clip_ratio/high_mean": 0.0008602707530371845, + "clip_ratio/low_mean": 0.00040927010786617757, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.001269540865905583, + "epoch": 13.345189504373177, + "grad_norm": 0.15168233215808868, + "learning_rate": 1e-06, + "loss": -0.0395, + "step": 1285 + }, + { + "clip_ratio/high_max": 0.001897586254926864, + "clip_ratio/high_mean": 0.0006921796993992757, + "clip_ratio/low_mean": 0.00040923389678937383, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0011014136034646071, + "epoch": 13.354518950437317, + "grad_norm": 0.15989919006824493, + "learning_rate": 1e-06, + "loss": -0.0615, + "step": 1286 + }, + { + "clip_ratio/high_max": 0.001944827425177209, + "clip_ratio/high_mean": 0.0007232873549583019, + "clip_ratio/low_mean": 0.00046811776928734616, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0011914051265193848, + "epoch": 13.363848396501458, + "grad_norm": 0.20336195826530457, + "learning_rate": 1e-06, + "loss": -0.0298, + "step": 1287 + }, + { + "clip_ratio/high_max": 0.0018089843360939994, + "clip_ratio/high_mean": 0.0007766396356601035, + "clip_ratio/low_mean": 0.0004975311251200765, + "clip_ratio/low_min": 1.8723787434282713e-05, + "clip_ratio/region_mean": 0.001274170761462301, + "epoch": 13.373177842565598, + "grad_norm": 1.639616847038269, + "learning_rate": 1e-06, + "loss": -0.0315, + "step": 1288 + }, + { + "clip_ratio/high_max": 0.0018768889276543632, + "clip_ratio/high_mean": 0.0007424584982800297, + "clip_ratio/low_mean": 0.0005555891229960253, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0012980476531083696, + "epoch": 13.382507288629737, + "grad_norm": 0.16705983877182007, + "learning_rate": 1e-06, + "loss": -0.0339, + "step": 1289 + }, + { + "clip_ratio/high_max": 0.0019959039309469517, + "clip_ratio/high_mean": 0.0007260613092512358, + "clip_ratio/low_mean": 0.0005967703255009837, + "clip_ratio/low_min": 1.3264007066027261e-05, + "clip_ratio/region_mean": 0.0013228316202003043, + "epoch": 13.391836734693877, + "grad_norm": 0.17171286046504974, + "learning_rate": 1e-06, + "loss": -0.0324, + "step": 1290 + }, + { + "clip_ratio/high_max": 0.0017491098660684656, + "clip_ratio/high_mean": 0.0007554136082035257, + "clip_ratio/low_mean": 0.0005566121276388003, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.001312025746301515, + "epoch": 13.401166180758018, + "grad_norm": 0.154076486825943, + "learning_rate": 1e-06, + "loss": -0.0566, + "step": 1291 + }, + { + "clip_ratio/high_max": 0.002457838891132269, + "clip_ratio/high_mean": 0.0009031400022649905, + "clip_ratio/low_mean": 0.0008218799384849262, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.001725019967125263, + "epoch": 13.410495626822158, + "grad_norm": 0.3098447620868683, + "learning_rate": 1e-06, + "loss": -0.0338, + "step": 1292 + }, + { + "clip_ratio/high_max": 0.002612185176985804, + "clip_ratio/high_mean": 0.0009527155234536622, + "clip_ratio/low_mean": 0.0007108504405550775, + "clip_ratio/low_min": 1.6512549336766824e-05, + "clip_ratio/region_mean": 0.0016635659776511602, + "epoch": 13.419825072886297, + "grad_norm": 0.15453490614891052, + "learning_rate": 1e-06, + "loss": -0.0544, + "step": 1293 + }, + { + "clip_ratio/high_max": 0.002358254940190818, + "clip_ratio/high_mean": 0.0008521508279955015, + "clip_ratio/low_mean": 0.0004965492491919576, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0013487000614986755, + "epoch": 13.429154518950437, + "grad_norm": 0.1930791139602661, + "learning_rate": 1e-06, + "loss": -0.02, + "step": 1294 + }, + { + "clip_ratio/high_max": 0.0019142381934216246, + "clip_ratio/high_mean": 0.00085461691924138, + "clip_ratio/low_mean": 0.0005969883497982664, + "clip_ratio/low_min": 1.1363636076566763e-05, + "clip_ratio/region_mean": 0.0014516052324324846, + "epoch": 13.438483965014576, + "grad_norm": 0.16649490594863892, + "learning_rate": 1e-06, + "loss": -0.0216, + "step": 1295 + }, + { + "clip_ratio/high_max": 0.0023658930076635443, + "clip_ratio/high_mean": 0.0010460661578690633, + "clip_ratio/low_mean": 0.0006774481298634782, + "clip_ratio/low_min": 1.3202366062614601e-05, + "clip_ratio/region_mean": 0.0017235142659046687, + "epoch": 13.447813411078718, + "grad_norm": 0.20801228284835815, + "learning_rate": 1e-06, + "loss": -0.0699, + "step": 1296 + }, + { + "clip_ratio/high_max": 0.0022757874612580054, + "clip_ratio/high_mean": 0.0009783147088455735, + "clip_ratio/low_mean": 0.0006769907358830096, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0016553054811083712, + "epoch": 13.457142857142857, + "grad_norm": 0.1578143984079361, + "learning_rate": 1e-06, + "loss": -0.0621, + "step": 1297 + }, + { + "clip_ratio/high_max": 0.002449881583743263, + "clip_ratio/high_mean": 0.0009233583696186543, + "clip_ratio/low_mean": 0.0007409365225612419, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.001664294886722928, + "epoch": 13.466472303206997, + "grad_norm": 0.14478129148483276, + "learning_rate": 1e-06, + "loss": -0.0374, + "step": 1298 + }, + { + "clip_ratio/high_max": 0.0024123422044795007, + "clip_ratio/high_mean": 0.0010598309600027278, + "clip_ratio/low_mean": 0.000828198169983807, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0018880291463574395, + "epoch": 13.475801749271136, + "grad_norm": 0.15566128492355347, + "learning_rate": 1e-06, + "loss": -0.0644, + "step": 1299 + }, + { + "clip_ratio/high_max": 0.0022581193697988056, + "clip_ratio/high_mean": 0.0008295270599774085, + "clip_ratio/low_mean": 0.0007799461964168586, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.001609473256394267, + "epoch": 13.485131195335278, + "grad_norm": 0.1817512959241867, + "learning_rate": 1e-06, + "loss": -0.0132, + "step": 1300 + }, + { + "clip_ratio/high_max": 0.0022852260008221492, + "clip_ratio/high_mean": 0.0008699650170456152, + "clip_ratio/low_mean": 0.0007498297668462328, + "clip_ratio/low_min": 1.2918560969410464e-05, + "clip_ratio/region_mean": 0.001619794773432659, + "epoch": 13.494460641399417, + "grad_norm": 0.12788450717926025, + "learning_rate": 1e-06, + "loss": -0.0413, + "step": 1301 + }, + { + "clip_ratio/high_max": 0.002921771061664913, + "clip_ratio/high_mean": 0.0011562288818822708, + "clip_ratio/low_mean": 0.0007411051501549082, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0018973340338561684, + "epoch": 13.503790087463557, + "grad_norm": 0.14873087406158447, + "learning_rate": 1e-06, + "loss": -0.0697, + "step": 1302 + }, + { + "clip_ratio/high_max": 0.0025456506955379155, + "clip_ratio/high_mean": 0.0009445292271266226, + "clip_ratio/low_mean": 0.0009287504954045289, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0018732797107077204, + "epoch": 13.513119533527696, + "grad_norm": 0.15014441311359406, + "learning_rate": 1e-06, + "loss": -0.0002, + "step": 1303 + }, + { + "clip_ratio/high_max": 0.0029857042609364726, + "clip_ratio/high_mean": 0.001153793593402952, + "clip_ratio/low_mean": 0.0007656593706997228, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0019194529886590317, + "epoch": 13.522448979591836, + "grad_norm": 0.16597695648670197, + "learning_rate": 1e-06, + "loss": -0.021, + "step": 1304 + }, + { + "clip_ratio/high_max": 0.0023489712039008737, + "clip_ratio/high_mean": 0.0009593406102794688, + "clip_ratio/low_mean": 0.0010226008926110808, + "clip_ratio/low_min": 4.99433335789945e-05, + "clip_ratio/region_mean": 0.00198194153199438, + "epoch": 13.531778425655977, + "grad_norm": 0.1690942943096161, + "learning_rate": 1e-06, + "loss": 0.0098, + "step": 1305 + }, + { + "clip_ratio/high_max": 0.0021892231088713743, + "clip_ratio/high_mean": 0.0008942062304413412, + "clip_ratio/low_mean": 0.0008410560567426728, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0017352623035549186, + "epoch": 13.541107871720117, + "grad_norm": 0.15915672481060028, + "learning_rate": 1e-06, + "loss": 0.0015, + "step": 1306 + }, + { + "clip_ratio/high_max": 0.002514030653401278, + "clip_ratio/high_mean": 0.0008825074801279698, + "clip_ratio/low_mean": 0.0007410275356960483, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0016235350412898697, + "epoch": 13.550437317784256, + "grad_norm": 0.1376136839389801, + "learning_rate": 1e-06, + "loss": -0.0317, + "step": 1307 + }, + { + "clip_ratio/high_max": 0.0026857103148358874, + "clip_ratio/high_mean": 0.0009508218008704716, + "clip_ratio/low_mean": 0.0008803735017863801, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0018311953099328093, + "epoch": 13.559766763848396, + "grad_norm": 5.408728122711182, + "learning_rate": 1e-06, + "loss": 0.0036, + "step": 1308 + }, + { + "clip_ratio/high_max": 0.0023494828928960487, + "clip_ratio/high_mean": 0.0009465143348279526, + "clip_ratio/low_mean": 0.0008140747913785162, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.001760589104378596, + "epoch": 13.569096209912537, + "grad_norm": 0.1753227561712265, + "learning_rate": 1e-06, + "loss": -0.0453, + "step": 1309 + }, + { + "clip_ratio/high_max": 0.002443296463752631, + "clip_ratio/high_mean": 0.0010179600867559202, + "clip_ratio/low_mean": 0.0009900964123517042, + "clip_ratio/low_min": 1.70068033185089e-05, + "clip_ratio/region_mean": 0.0020080564900126774, + "epoch": 13.578425655976677, + "grad_norm": 0.2076674997806549, + "learning_rate": 1e-06, + "loss": -0.0315, + "step": 1310 + }, + { + "clip_ratio/high_max": 0.002646667773660738, + "clip_ratio/high_mean": 0.0010991272793035023, + "clip_ratio/low_mean": 0.0007010152285147342, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.001800142530555604, + "epoch": 13.587755102040816, + "grad_norm": 0.14079692959785461, + "learning_rate": 1e-06, + "loss": -0.0639, + "step": 1311 + }, + { + "clip_ratio/high_max": 0.002614323457237333, + "clip_ratio/high_mean": 0.0010439149227750022, + "clip_ratio/low_mean": 0.0008334641343026306, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0018773790143313818, + "epoch": 13.597084548104956, + "grad_norm": 0.15647540986537933, + "learning_rate": 1e-06, + "loss": -0.0666, + "step": 1312 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.09716796875, + "completions/max_length": 4096.0, + "completions/max_terminated_length": 4091.0, + "completions/mean_length": 881.06884765625, + "completions/mean_terminated_length": 535.0593872070312, + "completions/min_length": 108.0, + "completions/min_terminated_length": 108.0, + "epoch": 13.606413994169095, + "grad_norm": 0.16756437718868256, + "learning_rate": 1e-06, + "loss": -0.052, + "num_tokens": 746936521.0, + "reward": 0.655831515789032, + "reward_std": 0.14602485299110413, + "rewards/simpleverify_reward/mean": 0.6558314561843872, + "rewards/simpleverify_reward/std": 0.4751046895980835, + "step": 1313 + }, + { + "clip_ratio/high_max": 0.001813500355638098, + "clip_ratio/high_mean": 0.0007773894612910226, + "clip_ratio/low_mean": 0.0003756572200472874, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0011530467236298136, + "epoch": 13.615743440233237, + "grad_norm": 0.16500411927700043, + "learning_rate": 1e-06, + "loss": -0.0605, + "step": 1314 + }, + { + "clip_ratio/high_max": 0.0015950457909639226, + "clip_ratio/high_mean": 0.0005659756370732794, + "clip_ratio/low_mean": 0.00043099519257339125, + "clip_ratio/low_min": 1.5644554878235795e-05, + "clip_ratio/region_mean": 0.000996970833512023, + "epoch": 13.625072886297376, + "grad_norm": 0.17962244153022766, + "learning_rate": 1e-06, + "loss": 0.0066, + "step": 1315 + }, + { + "clip_ratio/high_max": 0.002201358074671589, + "clip_ratio/high_mean": 0.0008172746202035341, + "clip_ratio/low_mean": 0.0003837636122625554, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0012010382415610366, + "epoch": 13.634402332361516, + "grad_norm": 0.14784713089466095, + "learning_rate": 1e-06, + "loss": -0.0393, + "step": 1316 + }, + { + "clip_ratio/high_max": 0.0017767321260180324, + "clip_ratio/high_mean": 0.0006811254061176442, + "clip_ratio/low_mean": 0.0004224134347623476, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0011035388452000916, + "epoch": 13.643731778425655, + "grad_norm": 0.14827978610992432, + "learning_rate": 1e-06, + "loss": -0.025, + "step": 1317 + }, + { + "clip_ratio/high_max": 0.0019569822106859647, + "clip_ratio/high_mean": 0.0006786110207031015, + "clip_ratio/low_mean": 0.0004180717965027725, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0010966828122036532, + "epoch": 13.653061224489797, + "grad_norm": 0.1614934653043747, + "learning_rate": 1e-06, + "loss": -0.0406, + "step": 1318 + }, + { + "clip_ratio/high_max": 0.0021182683885854203, + "clip_ratio/high_mean": 0.0008049517764447955, + "clip_ratio/low_mean": 0.00043997185821353924, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0012449236237443984, + "epoch": 13.662390670553936, + "grad_norm": 0.18093058466911316, + "learning_rate": 1e-06, + "loss": -0.0635, + "step": 1319 + }, + { + "clip_ratio/high_max": 0.0018480188482499216, + "clip_ratio/high_mean": 0.0007299711505766027, + "clip_ratio/low_mean": 0.0006094280352044734, + "clip_ratio/low_min": 1.2781186342181172e-05, + "clip_ratio/region_mean": 0.0013393991757766344, + "epoch": 13.671720116618076, + "grad_norm": 0.1490190625190735, + "learning_rate": 1e-06, + "loss": -0.0172, + "step": 1320 + }, + { + "clip_ratio/high_max": 0.0015769346318847965, + "clip_ratio/high_mean": 0.0005846622361787013, + "clip_ratio/low_mean": 0.0005528841193154221, + "clip_ratio/low_min": 2.5237230147467926e-05, + "clip_ratio/region_mean": 0.001137546347308671, + "epoch": 13.681049562682215, + "grad_norm": 0.1591527760028839, + "learning_rate": 1e-06, + "loss": 0.0132, + "step": 1321 + }, + { + "clip_ratio/high_max": 0.0022073958898545243, + "clip_ratio/high_mean": 0.000830552562547382, + "clip_ratio/low_mean": 0.0006038271894794889, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0014343797702167649, + "epoch": 13.690379008746355, + "grad_norm": 0.16553795337677002, + "learning_rate": 1e-06, + "loss": -0.0402, + "step": 1322 + }, + { + "clip_ratio/high_max": 0.002165704070648644, + "clip_ratio/high_mean": 0.0007843224639145774, + "clip_ratio/low_mean": 0.0005567349894590734, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.001341057457466377, + "epoch": 13.699708454810496, + "grad_norm": 0.16234004497528076, + "learning_rate": 1e-06, + "loss": -0.071, + "step": 1323 + }, + { + "clip_ratio/high_max": 0.002194331907958258, + "clip_ratio/high_mean": 0.0009102580761464196, + "clip_ratio/low_mean": 0.0005638780476147076, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.001474136151955463, + "epoch": 13.709037900874636, + "grad_norm": 0.1552676409482956, + "learning_rate": 1e-06, + "loss": -0.033, + "step": 1324 + }, + { + "clip_ratio/high_max": 0.0021396054071374238, + "clip_ratio/high_mean": 0.0008660626008349936, + "clip_ratio/low_mean": 0.0005403208729148901, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.001406383489666041, + "epoch": 13.718367346938775, + "grad_norm": 0.1293756663799286, + "learning_rate": 1e-06, + "loss": -0.0791, + "step": 1325 + }, + { + "clip_ratio/high_max": 0.0022590512526221573, + "clip_ratio/high_mean": 0.0009599628792784642, + "clip_ratio/low_mean": 0.0005910327890887856, + "clip_ratio/low_min": 1.0257672329316847e-05, + "clip_ratio/region_mean": 0.0015509956574533135, + "epoch": 13.727696793002915, + "grad_norm": 0.183794304728508, + "learning_rate": 1e-06, + "loss": -0.0439, + "step": 1326 + }, + { + "clip_ratio/high_max": 0.002349718088225927, + "clip_ratio/high_mean": 0.000808860871984507, + "clip_ratio/low_mean": 0.0008203762936318526, + "clip_ratio/low_min": 2.071594281005673e-05, + "clip_ratio/region_mean": 0.001629237140150508, + "epoch": 13.737026239067056, + "grad_norm": 0.22887979447841644, + "learning_rate": 1e-06, + "loss": -0.0194, + "step": 1327 + }, + { + "clip_ratio/high_max": 0.0021565420902334154, + "clip_ratio/high_mean": 0.0008322687081090407, + "clip_ratio/low_mean": 0.0005570681873905414, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.001389336885040393, + "epoch": 13.746355685131196, + "grad_norm": 0.14333762228488922, + "learning_rate": 1e-06, + "loss": -0.0549, + "step": 1328 + }, + { + "clip_ratio/high_max": 0.002397045012912713, + "clip_ratio/high_mean": 0.0008801318854239071, + "clip_ratio/low_mean": 0.000892540143468068, + "clip_ratio/low_min": 1.3748350284004118e-05, + "clip_ratio/region_mean": 0.0017726720252539963, + "epoch": 13.755685131195335, + "grad_norm": 0.15640684962272644, + "learning_rate": 1e-06, + "loss": 0.0016, + "step": 1329 + }, + { + "clip_ratio/high_max": 0.002037891717918683, + "clip_ratio/high_mean": 0.0008360920219274703, + "clip_ratio/low_mean": 0.0007439319815603085, + "clip_ratio/low_min": 3.0599756428273395e-05, + "clip_ratio/region_mean": 0.0015800239780219272, + "epoch": 13.765014577259475, + "grad_norm": 0.22396045923233032, + "learning_rate": 1e-06, + "loss": -0.0367, + "step": 1330 + }, + { + "clip_ratio/high_max": 0.00227334935698309, + "clip_ratio/high_mean": 0.0008723421869945014, + "clip_ratio/low_mean": 0.0007686912222197861, + "clip_ratio/low_min": 1.7189218851854093e-05, + "clip_ratio/region_mean": 0.0016410334392276127, + "epoch": 13.774344023323614, + "grad_norm": 0.17985431849956512, + "learning_rate": 1e-06, + "loss": 0.0089, + "step": 1331 + }, + { + "clip_ratio/high_max": 0.0027121722741867416, + "clip_ratio/high_mean": 0.001032087289786432, + "clip_ratio/low_mean": 0.0006574166673090076, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0016895039443625137, + "epoch": 13.783673469387756, + "grad_norm": 0.1498870998620987, + "learning_rate": 1e-06, + "loss": -0.0675, + "step": 1332 + }, + { + "clip_ratio/high_max": 0.0025090702292800415, + "clip_ratio/high_mean": 0.0009700507780507905, + "clip_ratio/low_mean": 0.0007487067487090826, + "clip_ratio/low_min": 2.805206531775184e-05, + "clip_ratio/region_mean": 0.001718757499475032, + "epoch": 13.793002915451895, + "grad_norm": 0.14936555922031403, + "learning_rate": 1e-06, + "loss": -0.0544, + "step": 1333 + }, + { + "clip_ratio/high_max": 0.0024419114088232163, + "clip_ratio/high_mean": 0.0008547629472559493, + "clip_ratio/low_mean": 0.000663753350636398, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0015185162701527588, + "epoch": 13.802332361516035, + "grad_norm": 0.43814796209335327, + "learning_rate": 1e-06, + "loss": -0.0105, + "step": 1334 + }, + { + "clip_ratio/high_max": 0.0023229664475366008, + "clip_ratio/high_mean": 0.000955230130784912, + "clip_ratio/low_mean": 0.0008407178538618609, + "clip_ratio/low_min": 5.260942634777166e-05, + "clip_ratio/region_mean": 0.0017959480537683703, + "epoch": 13.811661807580174, + "grad_norm": 0.16806772351264954, + "learning_rate": 1e-06, + "loss": -0.0677, + "step": 1335 + }, + { + "clip_ratio/high_max": 0.0024982161485240795, + "clip_ratio/high_mean": 0.000988750336546218, + "clip_ratio/low_mean": 0.0006580239614777383, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0016467743189423345, + "epoch": 13.820991253644316, + "grad_norm": 0.1731717586517334, + "learning_rate": 1e-06, + "loss": -0.0163, + "step": 1336 + }, + { + "clip_ratio/high_max": 0.002182349213398993, + "clip_ratio/high_mean": 0.0009284431289415807, + "clip_ratio/low_mean": 0.0005774185074187699, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.001505861640907824, + "epoch": 13.830320699708455, + "grad_norm": 0.16114822030067444, + "learning_rate": 1e-06, + "loss": -0.0542, + "step": 1337 + }, + { + "clip_ratio/high_max": 0.002774575310468208, + "clip_ratio/high_mean": 0.0009930949527188204, + "clip_ratio/low_mean": 0.0010404394051874988, + "clip_ratio/low_min": 4.161118340562098e-05, + "clip_ratio/region_mean": 0.002033534339716425, + "epoch": 13.839650145772595, + "grad_norm": 0.1805349588394165, + "learning_rate": 1e-06, + "loss": -0.0034, + "step": 1338 + }, + { + "clip_ratio/high_max": 0.0025615940176066943, + "clip_ratio/high_mean": 0.001067853288986953, + "clip_ratio/low_mean": 0.0006013203164911829, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.00166917362366803, + "epoch": 13.848979591836734, + "grad_norm": 0.1403028815984726, + "learning_rate": 1e-06, + "loss": -0.0863, + "step": 1339 + }, + { + "clip_ratio/high_max": 0.0025039605316123925, + "clip_ratio/high_mean": 0.0009914779948303476, + "clip_ratio/low_mean": 0.0007381564028037246, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.001729634383082157, + "epoch": 13.858309037900874, + "grad_norm": 0.1365215927362442, + "learning_rate": 1e-06, + "loss": -0.0316, + "step": 1340 + }, + { + "clip_ratio/high_max": 0.0021068060996185523, + "clip_ratio/high_mean": 0.0009505476118647493, + "clip_ratio/low_mean": 0.0007266292923304718, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0016771769223851152, + "epoch": 13.867638483965015, + "grad_norm": 0.1598242223262787, + "learning_rate": 1e-06, + "loss": -0.0463, + "step": 1341 + }, + { + "clip_ratio/high_max": 0.002830190620443318, + "clip_ratio/high_mean": 0.0011102137050329475, + "clip_ratio/low_mean": 0.0008166018869815161, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0019268155701865908, + "epoch": 13.876967930029155, + "grad_norm": 0.16801099479198456, + "learning_rate": 1e-06, + "loss": -0.0532, + "step": 1342 + }, + { + "clip_ratio/high_max": 0.002465410318109207, + "clip_ratio/high_mean": 0.0010882965070777573, + "clip_ratio/low_mean": 0.0008112803934636759, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.001899576913274359, + "epoch": 13.886297376093294, + "grad_norm": 0.17822015285491943, + "learning_rate": 1e-06, + "loss": -0.045, + "step": 1343 + }, + { + "clip_ratio/high_max": 0.0022442466652137227, + "clip_ratio/high_mean": 0.0009439130590180866, + "clip_ratio/low_mean": 0.0006400478459909209, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0015839609040995128, + "epoch": 13.895626822157434, + "grad_norm": 0.13033932447433472, + "learning_rate": 1e-06, + "loss": -0.0604, + "step": 1344 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0910993303571429, + "completions/max_length": 4096.0, + "completions/max_terminated_length": 3996.0, + "completions/mean_length": 852.6739501953125, + "completions/mean_terminated_length": 527.5945434570312, + "completions/min_length": 87.0, + "completions/min_terminated_length": 87.0, + "epoch": 14.00932944606414, + "grad_norm": 0.1300303041934967, + "learning_rate": 1e-06, + "loss": -0.05, + "num_tokens": 763522171.0, + "reward": 0.670235812664032, + "reward_std": 0.1391279101371765, + "rewards/simpleverify_reward/mean": 0.6702357530593872, + "rewards/simpleverify_reward/std": 0.4701355993747711, + "step": 1345 + }, + { + "clip_ratio/high_max": 0.0018163523091061506, + "clip_ratio/high_mean": 0.0007031030163489049, + "clip_ratio/low_mean": 0.0002659307915564568, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0009690338156360667, + "epoch": 14.018658892128279, + "grad_norm": 0.14012062549591064, + "learning_rate": 1e-06, + "loss": -0.0438, + "step": 1346 + }, + { + "clip_ratio/high_max": 0.0017441005111322738, + "clip_ratio/high_mean": 0.0005964986430626595, + "clip_ratio/low_mean": 0.0004069341050580988, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.001003432771540247, + "epoch": 14.02798833819242, + "grad_norm": 0.17307274043560028, + "learning_rate": 1e-06, + "loss": -0.0041, + "step": 1347 + }, + { + "clip_ratio/high_max": 0.001794335101294564, + "clip_ratio/high_mean": 0.0006799416469220887, + "clip_ratio/low_mean": 0.00041512002508170553, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.001095061665182584, + "epoch": 14.03731778425656, + "grad_norm": 0.13101916015148163, + "learning_rate": 1e-06, + "loss": -0.0462, + "step": 1348 + }, + { + "clip_ratio/high_max": 0.0018801286933012307, + "clip_ratio/high_mean": 0.0006800919891247759, + "clip_ratio/low_mean": 0.00046491717193930526, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0011450091806182172, + "epoch": 14.0466472303207, + "grad_norm": 0.16502954065799713, + "learning_rate": 1e-06, + "loss": -0.0388, + "step": 1349 + }, + { + "clip_ratio/high_max": 0.001899451766803395, + "clip_ratio/high_mean": 0.0006883819296490401, + "clip_ratio/low_mean": 0.0004995909202989424, + "clip_ratio/low_min": 1.700217580946628e-05, + "clip_ratio/region_mean": 0.0011879728626809083, + "epoch": 14.055976676384839, + "grad_norm": 0.15088720619678497, + "learning_rate": 1e-06, + "loss": 0.0022, + "step": 1350 + }, + { + "clip_ratio/high_max": 0.0017837504274211824, + "clip_ratio/high_mean": 0.0007263548359333072, + "clip_ratio/low_mean": 0.00040178661856771214, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.00112814143722062, + "epoch": 14.06530612244898, + "grad_norm": 0.17774386703968048, + "learning_rate": 1e-06, + "loss": -0.0412, + "step": 1351 + }, + { + "clip_ratio/high_max": 0.0021136966097401455, + "clip_ratio/high_mean": 0.0007951533916639164, + "clip_ratio/low_mean": 0.0004988097116438439, + "clip_ratio/low_min": 1.525506468169624e-05, + "clip_ratio/region_mean": 0.0012939631051267497, + "epoch": 14.07463556851312, + "grad_norm": 0.3521977961063385, + "learning_rate": 1e-06, + "loss": -0.0395, + "step": 1352 + }, + { + "clip_ratio/high_max": 0.0018548462649050634, + "clip_ratio/high_mean": 0.0006804034910601331, + "clip_ratio/low_mean": 0.0005834443672938505, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0012638478438020684, + "epoch": 14.08396501457726, + "grad_norm": 0.1781509667634964, + "learning_rate": 1e-06, + "loss": -0.0464, + "step": 1353 + }, + { + "clip_ratio/high_max": 0.0018683207890717313, + "clip_ratio/high_mean": 0.0006873378051750478, + "clip_ratio/low_mean": 0.00047870125990812085, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0011660390428005485, + "epoch": 14.093294460641399, + "grad_norm": 0.1524149775505066, + "learning_rate": 1e-06, + "loss": -0.0494, + "step": 1354 + }, + { + "clip_ratio/high_max": 0.0018336370158067439, + "clip_ratio/high_mean": 0.0007193510482466081, + "clip_ratio/low_mean": 0.0005639771597998333, + "clip_ratio/low_min": 1.5284911569324322e-05, + "clip_ratio/region_mean": 0.0012833282198698726, + "epoch": 14.102623906705539, + "grad_norm": 0.1644119769334793, + "learning_rate": 1e-06, + "loss": -0.0085, + "step": 1355 + }, + { + "clip_ratio/high_max": 0.001847194303991273, + "clip_ratio/high_mean": 0.0007451445108017651, + "clip_ratio/low_mean": 0.0004804327027159161, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0012255772344360594, + "epoch": 14.11195335276968, + "grad_norm": 0.1366289258003235, + "learning_rate": 1e-06, + "loss": -0.0327, + "step": 1356 + }, + { + "clip_ratio/high_max": 0.0023650554940104485, + "clip_ratio/high_mean": 0.0008509068684361409, + "clip_ratio/low_mean": 0.0005658402469634893, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0014167471090331674, + "epoch": 14.12128279883382, + "grad_norm": 0.30652573704719543, + "learning_rate": 1e-06, + "loss": -0.0531, + "step": 1357 + }, + { + "clip_ratio/high_max": 0.0019843487862090115, + "clip_ratio/high_mean": 0.0008625020309409592, + "clip_ratio/low_mean": 0.000608655179348716, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.001471157174819382, + "epoch": 14.130612244897959, + "grad_norm": 0.15605638921260834, + "learning_rate": 1e-06, + "loss": -0.0816, + "step": 1358 + }, + { + "clip_ratio/high_max": 0.0025308848707936704, + "clip_ratio/high_mean": 0.0009464774957450572, + "clip_ratio/low_mean": 0.0007858975827730319, + "clip_ratio/low_min": 1.7250897144549526e-05, + "clip_ratio/region_mean": 0.0017323750798823312, + "epoch": 14.139941690962099, + "grad_norm": 0.15378928184509277, + "learning_rate": 1e-06, + "loss": -0.0388, + "step": 1359 + }, + { + "clip_ratio/high_max": 0.002428557629173156, + "clip_ratio/high_mean": 0.000973530753981322, + "clip_ratio/low_mean": 0.0007160219192883233, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0016895526860025711, + "epoch": 14.14927113702624, + "grad_norm": 0.15807275474071503, + "learning_rate": 1e-06, + "loss": -0.0692, + "step": 1360 + }, + { + "clip_ratio/high_max": 0.002050202587270178, + "clip_ratio/high_mean": 0.0008070806870819069, + "clip_ratio/low_mean": 0.0004673327671298466, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0012744134473905433, + "epoch": 14.15860058309038, + "grad_norm": 0.1502304971218109, + "learning_rate": 1e-06, + "loss": -0.0773, + "step": 1361 + }, + { + "clip_ratio/high_max": 0.0023081238941813353, + "clip_ratio/high_mean": 0.0008741438014112646, + "clip_ratio/low_mean": 0.0006113954441389069, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0014855392691970337, + "epoch": 14.167930029154519, + "grad_norm": 0.40585288405418396, + "learning_rate": 1e-06, + "loss": -0.0404, + "step": 1362 + }, + { + "clip_ratio/high_max": 0.0023766415106365457, + "clip_ratio/high_mean": 0.0009106923444051063, + "clip_ratio/low_mean": 0.0006825111149737495, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0015932035021251068, + "epoch": 14.177259475218658, + "grad_norm": 0.15575088560581207, + "learning_rate": 1e-06, + "loss": -0.0312, + "step": 1363 + }, + { + "clip_ratio/high_max": 0.0023336290396400727, + "clip_ratio/high_mean": 0.0009512256656307727, + "clip_ratio/low_mean": 0.00057078834515778, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0015220140521705616, + "epoch": 14.186588921282798, + "grad_norm": 0.1504908800125122, + "learning_rate": 1e-06, + "loss": -0.0739, + "step": 1364 + }, + { + "clip_ratio/high_max": 0.0026486451097298414, + "clip_ratio/high_mean": 0.0010069512300105998, + "clip_ratio/low_mean": 0.0005726203116864781, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0015795715517015196, + "epoch": 14.19591836734694, + "grad_norm": 0.26770541071891785, + "learning_rate": 1e-06, + "loss": -0.0983, + "step": 1365 + }, + { + "clip_ratio/high_max": 0.002441513533995021, + "clip_ratio/high_mean": 0.0009189674128720071, + "clip_ratio/low_mean": 0.0006258882276597433, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0015448556332557928, + "epoch": 14.205247813411079, + "grad_norm": 0.12824857234954834, + "learning_rate": 1e-06, + "loss": -0.0496, + "step": 1366 + }, + { + "clip_ratio/high_max": 0.002254344057291746, + "clip_ratio/high_mean": 0.0007987569788383553, + "clip_ratio/low_mean": 0.0008254318108811276, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0016241887715295888, + "epoch": 14.214577259475218, + "grad_norm": 0.19126002490520477, + "learning_rate": 1e-06, + "loss": -0.0071, + "step": 1367 + }, + { + "clip_ratio/high_max": 0.0025836834611254744, + "clip_ratio/high_mean": 0.0010134078765986487, + "clip_ratio/low_mean": 0.0008518906288372818, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.001865298552729655, + "epoch": 14.223906705539358, + "grad_norm": 0.14358840882778168, + "learning_rate": 1e-06, + "loss": -0.0553, + "step": 1368 + }, + { + "clip_ratio/high_max": 0.0023644188040634617, + "clip_ratio/high_mean": 0.0009145492313109571, + "clip_ratio/low_mean": 0.0006746485578332795, + "clip_ratio/low_min": 3.703703623614274e-05, + "clip_ratio/region_mean": 0.001589197781868279, + "epoch": 14.2332361516035, + "grad_norm": 0.1665046513080597, + "learning_rate": 1e-06, + "loss": -0.0569, + "step": 1369 + }, + { + "clip_ratio/high_max": 0.002611239324323833, + "clip_ratio/high_mean": 0.0009500922642473597, + "clip_ratio/low_mean": 0.0006333776145766024, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.001583469900651835, + "epoch": 14.242565597667639, + "grad_norm": 0.13827988505363464, + "learning_rate": 1e-06, + "loss": -0.0529, + "step": 1370 + }, + { + "clip_ratio/high_max": 0.002228705616289517, + "clip_ratio/high_mean": 0.0008472597346553812, + "clip_ratio/low_mean": 0.0006761608569831878, + "clip_ratio/low_min": 2.270250661240425e-05, + "clip_ratio/region_mean": 0.001523420583907864, + "epoch": 14.251895043731778, + "grad_norm": 0.1589655876159668, + "learning_rate": 1e-06, + "loss": -0.0345, + "step": 1371 + }, + { + "clip_ratio/high_max": 0.0024966281562228687, + "clip_ratio/high_mean": 0.0010061951270472491, + "clip_ratio/low_mean": 0.000924932025554881, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0019311271789774764, + "epoch": 14.261224489795918, + "grad_norm": 0.15875262022018433, + "learning_rate": 1e-06, + "loss": -0.0035, + "step": 1372 + }, + { + "clip_ratio/high_max": 0.002191486484662164, + "clip_ratio/high_mean": 0.0008106059503916185, + "clip_ratio/low_mean": 0.0007900780310592381, + "clip_ratio/low_min": 2.001921893679537e-05, + "clip_ratio/region_mean": 0.0016006839941837825, + "epoch": 14.270553935860057, + "grad_norm": 0.21720443665981293, + "learning_rate": 1e-06, + "loss": -0.0067, + "step": 1373 + }, + { + "clip_ratio/high_max": 0.002182364354666788, + "clip_ratio/high_mean": 0.0009078256771317683, + "clip_ratio/low_mean": 0.0007980483455867216, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0017058740049833432, + "epoch": 14.279883381924199, + "grad_norm": 0.15851877629756927, + "learning_rate": 1e-06, + "loss": 0.0024, + "step": 1374 + }, + { + "clip_ratio/high_max": 0.0023586457391502336, + "clip_ratio/high_mean": 0.0008919053434510715, + "clip_ratio/low_mean": 0.0006079379568291188, + "clip_ratio/low_min": 3.053248656215146e-05, + "clip_ratio/region_mean": 0.0014998432670836337, + "epoch": 14.289212827988338, + "grad_norm": 0.12755772471427917, + "learning_rate": 1e-06, + "loss": -0.0466, + "step": 1375 + }, + { + "clip_ratio/high_max": 0.0026586229214444757, + "clip_ratio/high_mean": 0.0010218377410637913, + "clip_ratio/low_mean": 0.001001206077489769, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0020230438822181895, + "epoch": 14.298542274052478, + "grad_norm": 0.15385538339614868, + "learning_rate": 1e-06, + "loss": -0.039, + "step": 1376 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0970633370535714, + "completions/max_length": 4096.0, + "completions/max_terminated_length": 4004.0, + "completions/mean_length": 877.1220092773438, + "completions/mean_terminated_length": 531.1010131835938, + "completions/min_length": 118.0, + "completions/min_terminated_length": 118.0, + "epoch": 14.307871720116617, + "grad_norm": 0.1781497597694397, + "learning_rate": 1e-06, + "loss": -0.0439, + "num_tokens": 780128109.0, + "reward": 0.6571568250656128, + "reward_std": 0.14410173892974854, + "rewards/simpleverify_reward/mean": 0.6571568250656128, + "rewards/simpleverify_reward/std": 0.4746679365634918, + "step": 1377 + }, + { + "clip_ratio/high_max": 0.001786439632269321, + "clip_ratio/high_mean": 0.0006511402298201574, + "clip_ratio/low_mean": 0.000377095008843753, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0010282352268404793, + "epoch": 14.317201166180759, + "grad_norm": 0.15552456676959991, + "learning_rate": 1e-06, + "loss": -0.044, + "step": 1378 + }, + { + "clip_ratio/high_max": 0.0018996015351149254, + "clip_ratio/high_mean": 0.0008098720663838321, + "clip_ratio/low_mean": 0.0003660948013930465, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0011759668777813204, + "epoch": 14.326530612244898, + "grad_norm": 0.1618681401014328, + "learning_rate": 1e-06, + "loss": -0.0803, + "step": 1379 + }, + { + "clip_ratio/high_max": 0.0019648383240564726, + "clip_ratio/high_mean": 0.0007293696417036699, + "clip_ratio/low_mean": 0.00035925227757616085, + "clip_ratio/low_min": 1.5628907931386493e-05, + "clip_ratio/region_mean": 0.0010886219097301364, + "epoch": 14.335860058309038, + "grad_norm": 0.16378606855869293, + "learning_rate": 1e-06, + "loss": -0.0573, + "step": 1380 + }, + { + "clip_ratio/high_max": 0.0018145204485335853, + "clip_ratio/high_mean": 0.0006706941640004516, + "clip_ratio/low_mean": 0.0003518197438552306, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0010225139230897184, + "epoch": 14.345189504373177, + "grad_norm": 0.13272294402122498, + "learning_rate": 1e-06, + "loss": -0.0506, + "step": 1381 + }, + { + "clip_ratio/high_max": 0.0017250901291845366, + "clip_ratio/high_mean": 0.0007156628980737878, + "clip_ratio/low_mean": 0.0004089378194294113, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0011246007143199677, + "epoch": 14.354518950437317, + "grad_norm": 0.14052458107471466, + "learning_rate": 1e-06, + "loss": -0.0374, + "step": 1382 + }, + { + "clip_ratio/high_max": 0.002041730855125934, + "clip_ratio/high_mean": 0.0007437054646288743, + "clip_ratio/low_mean": 0.0005045933776273159, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0012482988240662962, + "epoch": 14.363848396501458, + "grad_norm": 0.17788098752498627, + "learning_rate": 1e-06, + "loss": 0.0069, + "step": 1383 + }, + { + "clip_ratio/high_max": 0.002104928469634615, + "clip_ratio/high_mean": 0.0007452774207195034, + "clip_ratio/low_mean": 0.0005182259255889221, + "clip_ratio/low_min": 2.5830004233284853e-05, + "clip_ratio/region_mean": 0.0012635033381229732, + "epoch": 14.373177842565598, + "grad_norm": 0.17931057512760162, + "learning_rate": 1e-06, + "loss": -0.0405, + "step": 1384 + }, + { + "clip_ratio/high_max": 0.0022468803799711168, + "clip_ratio/high_mean": 0.0007708484299655538, + "clip_ratio/low_mean": 0.00039266398516701884, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0011635124210442882, + "epoch": 14.382507288629737, + "grad_norm": 0.16331049799919128, + "learning_rate": 1e-06, + "loss": -0.0562, + "step": 1385 + }, + { + "clip_ratio/high_max": 0.002127086067048367, + "clip_ratio/high_mean": 0.000811380059531075, + "clip_ratio/low_mean": 0.0005335072269190277, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0013448872887238394, + "epoch": 14.391836734693877, + "grad_norm": 0.1949557065963745, + "learning_rate": 1e-06, + "loss": -0.0606, + "step": 1386 + }, + { + "clip_ratio/high_max": 0.0019874968929798342, + "clip_ratio/high_mean": 0.0007955819983180845, + "clip_ratio/low_mean": 0.0005172160235815682, + "clip_ratio/low_min": 1.713267556624487e-05, + "clip_ratio/region_mean": 0.001312798045546515, + "epoch": 14.401166180758018, + "grad_norm": 0.16476590931415558, + "learning_rate": 1e-06, + "loss": -0.0652, + "step": 1387 + }, + { + "clip_ratio/high_max": 0.0023038855761114974, + "clip_ratio/high_mean": 0.0009133996863965876, + "clip_ratio/low_mean": 0.0005296056394854531, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0014430053379328456, + "epoch": 14.410495626822158, + "grad_norm": 0.1466214656829834, + "learning_rate": 1e-06, + "loss": -0.0357, + "step": 1388 + }, + { + "clip_ratio/high_max": 0.002041217085206881, + "clip_ratio/high_mean": 0.0008059848660195712, + "clip_ratio/low_mean": 0.0005780181381851435, + "clip_ratio/low_min": 1.3097233932057861e-05, + "clip_ratio/region_mean": 0.0013840030296705663, + "epoch": 14.419825072886297, + "grad_norm": 0.1603439897298813, + "learning_rate": 1e-06, + "loss": -0.029, + "step": 1389 + }, + { + "clip_ratio/high_max": 0.0022164858019095846, + "clip_ratio/high_mean": 0.0008501516831529443, + "clip_ratio/low_mean": 0.0006408127346730907, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0014909643723513, + "epoch": 14.429154518950437, + "grad_norm": 0.16874945163726807, + "learning_rate": 1e-06, + "loss": -0.0158, + "step": 1390 + }, + { + "clip_ratio/high_max": 0.0021246844407869503, + "clip_ratio/high_mean": 0.0008615278256911552, + "clip_ratio/low_mean": 0.0006316156723187305, + "clip_ratio/low_min": 3.2260839361697435e-05, + "clip_ratio/region_mean": 0.0014931435143807903, + "epoch": 14.438483965014576, + "grad_norm": 0.1571662575006485, + "learning_rate": 1e-06, + "loss": -0.0419, + "step": 1391 + }, + { + "clip_ratio/high_max": 0.0020351516250229906, + "clip_ratio/high_mean": 0.0007117465365809039, + "clip_ratio/low_mean": 0.0007588785028929124, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0014706250276503852, + "epoch": 14.447813411078718, + "grad_norm": 0.20177142322063446, + "learning_rate": 1e-06, + "loss": 0.0545, + "step": 1392 + }, + { + "clip_ratio/high_max": 0.0027564527263166383, + "clip_ratio/high_mean": 0.0010131545877811732, + "clip_ratio/low_mean": 0.0007468863486792543, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0017600409373699222, + "epoch": 14.457142857142857, + "grad_norm": 0.20664866268634796, + "learning_rate": 1e-06, + "loss": -0.0576, + "step": 1393 + }, + { + "clip_ratio/high_max": 0.0023366313107544556, + "clip_ratio/high_mean": 0.000984400196102797, + "clip_ratio/low_mean": 0.00073258942393295, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0017169896818813868, + "epoch": 14.466472303206997, + "grad_norm": 0.15278011560440063, + "learning_rate": 1e-06, + "loss": -0.0363, + "step": 1394 + }, + { + "clip_ratio/high_max": 0.0024514257893315516, + "clip_ratio/high_mean": 0.0009990524886234198, + "clip_ratio/low_mean": 0.0005893639181522303, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0015884164204180706, + "epoch": 14.475801749271136, + "grad_norm": 0.15815916657447815, + "learning_rate": 1e-06, + "loss": -0.0869, + "step": 1395 + }, + { + "clip_ratio/high_max": 0.0022904781653778628, + "clip_ratio/high_mean": 0.00089939329882327, + "clip_ratio/low_mean": 0.0007771529744786676, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.001676546293310821, + "epoch": 14.485131195335278, + "grad_norm": 0.16466347873210907, + "learning_rate": 1e-06, + "loss": -0.0243, + "step": 1396 + }, + { + "clip_ratio/high_max": 0.0023122845523175783, + "clip_ratio/high_mean": 0.0008836859469738556, + "clip_ratio/low_mean": 0.0006575505867658649, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0015412365246447735, + "epoch": 14.494460641399417, + "grad_norm": 0.1602303832769394, + "learning_rate": 1e-06, + "loss": 0.0024, + "step": 1397 + }, + { + "clip_ratio/high_max": 0.0022690760961268097, + "clip_ratio/high_mean": 0.0008907983501558192, + "clip_ratio/low_mean": 0.0007349894349317765, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.001625787750526797, + "epoch": 14.503790087463557, + "grad_norm": 0.17472094297409058, + "learning_rate": 1e-06, + "loss": -0.0504, + "step": 1398 + }, + { + "clip_ratio/high_max": 0.002355555865506176, + "clip_ratio/high_mean": 0.0009016182575578569, + "clip_ratio/low_mean": 0.0007545262560597621, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0016561444717808627, + "epoch": 14.513119533527696, + "grad_norm": 0.209573432803154, + "learning_rate": 1e-06, + "loss": -0.0315, + "step": 1399 + }, + { + "clip_ratio/high_max": 0.002152741792087909, + "clip_ratio/high_mean": 0.0009140385282080388, + "clip_ratio/low_mean": 0.000828500468742277, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.001742538977850927, + "epoch": 14.522448979591836, + "grad_norm": 0.15524740517139435, + "learning_rate": 1e-06, + "loss": -0.0312, + "step": 1400 + }, + { + "clip_ratio/high_max": 0.002396453226538142, + "clip_ratio/high_mean": 0.0009471032299188664, + "clip_ratio/low_mean": 0.0008003411373920244, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.001747444344800897, + "epoch": 14.531778425655977, + "grad_norm": 0.15095356106758118, + "learning_rate": 1e-06, + "loss": -0.033, + "step": 1401 + }, + { + "clip_ratio/high_max": 0.0026893775029748213, + "clip_ratio/high_mean": 0.0011095104559899482, + "clip_ratio/low_mean": 0.000741342244509724, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0018508526918594725, + "epoch": 14.541107871720117, + "grad_norm": 0.16207027435302734, + "learning_rate": 1e-06, + "loss": -0.0258, + "step": 1402 + }, + { + "clip_ratio/high_max": 0.002336189449124504, + "clip_ratio/high_mean": 0.0010394274722784758, + "clip_ratio/low_mean": 0.0008122254566842457, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0018516529598855413, + "epoch": 14.550437317784256, + "grad_norm": 0.16433963179588318, + "learning_rate": 1e-06, + "loss": -0.1248, + "step": 1403 + }, + { + "clip_ratio/high_max": 0.0024447282921755686, + "clip_ratio/high_mean": 0.0010036249555014365, + "clip_ratio/low_mean": 0.0008792593635007506, + "clip_ratio/low_min": 3.404757444513962e-05, + "clip_ratio/region_mean": 0.0018828843094524927, + "epoch": 14.559766763848396, + "grad_norm": 0.7684108018875122, + "learning_rate": 1e-06, + "loss": -0.0189, + "step": 1404 + }, + { + "clip_ratio/high_max": 0.0021612151176668704, + "clip_ratio/high_mean": 0.0009288510373153258, + "clip_ratio/low_mean": 0.0007889671178418212, + "clip_ratio/low_min": 3.855644536088221e-05, + "clip_ratio/region_mean": 0.0017178181442432106, + "epoch": 14.569096209912537, + "grad_norm": 0.15791532397270203, + "learning_rate": 1e-06, + "loss": -0.0711, + "step": 1405 + }, + { + "clip_ratio/high_max": 0.0031253333363565616, + "clip_ratio/high_mean": 0.0011079810101364274, + "clip_ratio/low_mean": 0.0008127800501824822, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0019207610675948672, + "epoch": 14.578425655976677, + "grad_norm": 0.14668971300125122, + "learning_rate": 1e-06, + "loss": -0.062, + "step": 1406 + }, + { + "clip_ratio/high_max": 0.0027687831898219883, + "clip_ratio/high_mean": 0.0009778712283150526, + "clip_ratio/low_mean": 0.0007738993808743544, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0017517706437502056, + "epoch": 14.587755102040816, + "grad_norm": 0.14398270845413208, + "learning_rate": 1e-06, + "loss": -0.04, + "step": 1407 + }, + { + "clip_ratio/high_max": 0.0026752289486466907, + "clip_ratio/high_mean": 0.0009468498101341538, + "clip_ratio/low_mean": 0.0006932692103873705, + "clip_ratio/low_min": 1.0568143807176966e-05, + "clip_ratio/region_mean": 0.0016401190259784926, + "epoch": 14.597084548104956, + "grad_norm": 0.1568804234266281, + "learning_rate": 1e-06, + "loss": -0.0588, + "step": 1408 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0955636160714286, + "completions/max_length": 4096.0, + "completions/max_terminated_length": 4062.0, + "completions/mean_length": 861.3428344726562, + "completions/mean_terminated_length": 519.5657348632812, + "completions/min_length": 6.0, + "completions/min_terminated_length": 6.0, + "epoch": 14.606413994169095, + "grad_norm": 0.16777855157852173, + "learning_rate": 1e-06, + "loss": -0.0105, + "num_tokens": 796516440.0, + "reward": 0.664794921875, + "reward_std": 0.13781213760375977, + "rewards/simpleverify_reward/mean": 0.664794921875, + "rewards/simpleverify_reward/std": 0.4720703065395355, + "step": 1409 + }, + { + "clip_ratio/high_max": 0.001371023325191345, + "clip_ratio/high_mean": 0.0005251198062978801, + "clip_ratio/low_mean": 0.00037971350002408144, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0009048333049577195, + "epoch": 14.615743440233237, + "grad_norm": 0.1606350988149643, + "learning_rate": 1e-06, + "loss": -0.0141, + "step": 1410 + }, + { + "clip_ratio/high_max": 0.002084295505483169, + "clip_ratio/high_mean": 0.000694572858265019, + "clip_ratio/low_mean": 0.00038588593542954186, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0010804587818711298, + "epoch": 14.625072886297376, + "grad_norm": 0.15839152038097382, + "learning_rate": 1e-06, + "loss": -0.0144, + "step": 1411 + }, + { + "clip_ratio/high_max": 0.0017357024589728098, + "clip_ratio/high_mean": 0.0006295709554251516, + "clip_ratio/low_mean": 0.0003676174446809455, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0009971884137485176, + "epoch": 14.634402332361516, + "grad_norm": 0.15450921654701233, + "learning_rate": 1e-06, + "loss": -0.0387, + "step": 1412 + }, + { + "clip_ratio/high_max": 0.0017522241450933507, + "clip_ratio/high_mean": 0.0007708320003985136, + "clip_ratio/low_mean": 0.00040037452549768204, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.00117120652430458, + "epoch": 14.643731778425655, + "grad_norm": 0.15621048212051392, + "learning_rate": 1e-06, + "loss": -0.0716, + "step": 1413 + }, + { + "clip_ratio/high_max": 0.0018941683301818557, + "clip_ratio/high_mean": 0.0006009948883729521, + "clip_ratio/low_mean": 0.00038278234660538146, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.000983777252258733, + "epoch": 14.653061224489797, + "grad_norm": 0.7600335478782654, + "learning_rate": 1e-06, + "loss": -0.0209, + "step": 1414 + }, + { + "clip_ratio/high_max": 0.001902294548926875, + "clip_ratio/high_mean": 0.0007294602355614188, + "clip_ratio/low_mean": 0.0004647338855647831, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0011941941284021595, + "epoch": 14.662390670553936, + "grad_norm": 528.1058959960938, + "learning_rate": 1e-06, + "loss": 0.0261, + "step": 1415 + }, + { + "clip_ratio/high_max": 0.0018748468319245148, + "clip_ratio/high_mean": 0.0006354565593937878, + "clip_ratio/low_mean": 0.0003740206661859702, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.001009477233310463, + "epoch": 14.671720116618076, + "grad_norm": 0.216713085770607, + "learning_rate": 1e-06, + "loss": -0.0705, + "step": 1416 + }, + { + "clip_ratio/high_max": 0.00203963217427372, + "clip_ratio/high_mean": 0.0007368754249910126, + "clip_ratio/low_mean": 0.0007297716183529701, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.001466647026973078, + "epoch": 14.681049562682215, + "grad_norm": 0.16934724152088165, + "learning_rate": 1e-06, + "loss": -0.0254, + "step": 1417 + }, + { + "clip_ratio/high_max": 0.0020112539386900607, + "clip_ratio/high_mean": 0.0008417380413447972, + "clip_ratio/low_mean": 0.000479844017945652, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.001321582036325708, + "epoch": 14.690379008746355, + "grad_norm": 0.17261309921741486, + "learning_rate": 1e-06, + "loss": -0.0922, + "step": 1418 + }, + { + "clip_ratio/high_max": 0.0021192739659454674, + "clip_ratio/high_mean": 0.0009254170618078206, + "clip_ratio/low_mean": 0.0005467834498631419, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0014722004816576373, + "epoch": 14.699708454810496, + "grad_norm": 0.14940941333770752, + "learning_rate": 1e-06, + "loss": -0.0979, + "step": 1419 + }, + { + "clip_ratio/high_max": 0.0021527031349251047, + "clip_ratio/high_mean": 0.0008098544603853952, + "clip_ratio/low_mean": 0.0005650116188462562, + "clip_ratio/low_min": 9.181724635709543e-06, + "clip_ratio/region_mean": 0.001374866085825488, + "epoch": 14.709037900874636, + "grad_norm": 0.16518554091453552, + "learning_rate": 1e-06, + "loss": -0.0605, + "step": 1420 + }, + { + "clip_ratio/high_max": 0.0026311096371500753, + "clip_ratio/high_mean": 0.000915269018150866, + "clip_ratio/low_mean": 0.0005642396790790372, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0014795086863159668, + "epoch": 14.718367346938775, + "grad_norm": 0.15047743916511536, + "learning_rate": 1e-06, + "loss": -0.0613, + "step": 1421 + }, + { + "clip_ratio/high_max": 0.0022607879727729596, + "clip_ratio/high_mean": 0.0008101974126475397, + "clip_ratio/low_mean": 0.0005676412843058642, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0013778386928606778, + "epoch": 14.727696793002915, + "grad_norm": 0.14523451030254364, + "learning_rate": 1e-06, + "loss": -0.0506, + "step": 1422 + }, + { + "clip_ratio/high_max": 0.0022824135594419204, + "clip_ratio/high_mean": 0.0009598317810741719, + "clip_ratio/low_mean": 0.00062423257077171, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0015840643463889137, + "epoch": 14.737026239067056, + "grad_norm": 0.17060302197933197, + "learning_rate": 1e-06, + "loss": -0.0328, + "step": 1423 + }, + { + "clip_ratio/high_max": 0.0022096964603406377, + "clip_ratio/high_mean": 0.000983294781690347, + "clip_ratio/low_mean": 0.000659456734865671, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0016427514929091558, + "epoch": 14.746355685131196, + "grad_norm": 0.17045511305332184, + "learning_rate": 1e-06, + "loss": -0.069, + "step": 1424 + }, + { + "clip_ratio/high_max": 0.002079459918604698, + "clip_ratio/high_mean": 0.000781764821113029, + "clip_ratio/low_mean": 0.0006046573339517636, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0013864221546100453, + "epoch": 14.755685131195335, + "grad_norm": 0.14913129806518555, + "learning_rate": 1e-06, + "loss": -0.0277, + "step": 1425 + }, + { + "clip_ratio/high_max": 0.0026301128673367202, + "clip_ratio/high_mean": 0.0009390130126121221, + "clip_ratio/low_mean": 0.0008366364982066443, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0017756495144567452, + "epoch": 14.765014577259475, + "grad_norm": 0.17709794640541077, + "learning_rate": 1e-06, + "loss": -0.0301, + "step": 1426 + }, + { + "clip_ratio/high_max": 0.002419726486550644, + "clip_ratio/high_mean": 0.001029123322950909, + "clip_ratio/low_mean": 0.0007320142758544534, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.001761137587891426, + "epoch": 14.774344023323614, + "grad_norm": 101.44778442382812, + "learning_rate": 1e-06, + "loss": -0.0231, + "step": 1427 + }, + { + "clip_ratio/high_max": 0.0025511273415759206, + "clip_ratio/high_mean": 0.0009499411698925542, + "clip_ratio/low_mean": 0.0006135852263469133, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0015635264207958244, + "epoch": 14.783673469387756, + "grad_norm": 0.16930541396141052, + "learning_rate": 1e-06, + "loss": -0.0656, + "step": 1428 + }, + { + "clip_ratio/high_max": 0.0028346305116428994, + "clip_ratio/high_mean": 0.000991210403299192, + "clip_ratio/low_mean": 0.0007385825665551238, + "clip_ratio/low_min": 1.4789398846914992e-05, + "clip_ratio/region_mean": 0.001729792966216337, + "epoch": 14.793002915451895, + "grad_norm": 0.18170827627182007, + "learning_rate": 1e-06, + "loss": -0.0524, + "step": 1429 + }, + { + "clip_ratio/high_max": 0.0024006419407669455, + "clip_ratio/high_mean": 0.000863458439198439, + "clip_ratio/low_mean": 0.0007311092631425709, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0015945676932460628, + "epoch": 14.802332361516035, + "grad_norm": 0.4562612473964691, + "learning_rate": 1e-06, + "loss": -0.0094, + "step": 1430 + }, + { + "clip_ratio/high_max": 0.002215027474449016, + "clip_ratio/high_mean": 0.0008813750164335943, + "clip_ratio/low_mean": 0.0008591822560219953, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0017405572580173612, + "epoch": 14.811661807580174, + "grad_norm": 0.2189190536737442, + "learning_rate": 1e-06, + "loss": -0.0213, + "step": 1431 + }, + { + "clip_ratio/high_max": 0.002640083766891621, + "clip_ratio/high_mean": 0.000979864773398731, + "clip_ratio/low_mean": 0.0006226883015187923, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0016025530530896503, + "epoch": 14.820991253644316, + "grad_norm": 0.13267762959003448, + "learning_rate": 1e-06, + "loss": -0.0534, + "step": 1432 + }, + { + "clip_ratio/high_max": 0.002463354256178718, + "clip_ratio/high_mean": 0.0009932165776262991, + "clip_ratio/low_mean": 0.0009031323679664638, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0018963489783345722, + "epoch": 14.830320699708455, + "grad_norm": 2.523435115814209, + "learning_rate": 1e-06, + "loss": -0.0491, + "step": 1433 + }, + { + "clip_ratio/high_max": 0.0026850850190385245, + "clip_ratio/high_mean": 0.0009352686010970501, + "clip_ratio/low_mean": 0.0007063232415021048, + "clip_ratio/low_min": 2.6829791750060394e-05, + "clip_ratio/region_mean": 0.0016415918653365225, + "epoch": 14.839650145772595, + "grad_norm": 0.3484272062778473, + "learning_rate": 1e-06, + "loss": -0.0161, + "step": 1434 + }, + { + "clip_ratio/high_max": 0.0025000876557896845, + "clip_ratio/high_mean": 0.0010419811460451456, + "clip_ratio/low_mean": 0.0006713745046909025, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0017133556902990676, + "epoch": 14.848979591836734, + "grad_norm": 0.1631457507610321, + "learning_rate": 1e-06, + "loss": -0.0532, + "step": 1435 + }, + { + "clip_ratio/high_max": 0.0023943624473758973, + "clip_ratio/high_mean": 0.0009258485661121085, + "clip_ratio/low_mean": 0.0009387403497385094, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.001864588906755671, + "epoch": 14.858309037900874, + "grad_norm": 0.1668511927127838, + "learning_rate": 1e-06, + "loss": -0.0054, + "step": 1436 + }, + { + "clip_ratio/high_max": 0.002261575617012568, + "clip_ratio/high_mean": 0.0009211516025970923, + "clip_ratio/low_mean": 0.0007193780056695687, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0016405296300945338, + "epoch": 14.867638483965015, + "grad_norm": 0.14407925307750702, + "learning_rate": 1e-06, + "loss": -0.0579, + "step": 1437 + }, + { + "clip_ratio/high_max": 0.0024793751508696005, + "clip_ratio/high_mean": 0.0011043389058613684, + "clip_ratio/low_mean": 0.0008443706365142134, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0019487095560180023, + "epoch": 14.876967930029155, + "grad_norm": 0.1657928228378296, + "learning_rate": 1e-06, + "loss": -0.0696, + "step": 1438 + }, + { + "clip_ratio/high_max": 0.0021965775558783207, + "clip_ratio/high_mean": 0.0009189502807203098, + "clip_ratio/low_mean": 0.001032198974826315, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0019511492391757201, + "epoch": 14.886297376093294, + "grad_norm": 0.20805539190769196, + "learning_rate": 1e-06, + "loss": -0.0308, + "step": 1439 + }, + { + "clip_ratio/high_max": 0.0024939083596109413, + "clip_ratio/high_mean": 0.0010812456621351885, + "clip_ratio/low_mean": 0.0010336533523513936, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0021148989617358893, + "epoch": 14.895626822157434, + "grad_norm": 0.1499452441930771, + "learning_rate": 1e-06, + "loss": -0.0266, + "step": 1440 + }, + { + "epoch": 14.895626822157434, + "step": 1440, + "total_flos": 0.0, + "train_loss": -0.017012172211714947, + "train_runtime": 87883.361, + "train_samples_per_second": 16.313, + "train_steps_per_second": 0.018 + } + ], + "logging_steps": 1, + "max_steps": 1600, + "num_input_tokens_seen": 796516440, + "num_train_epochs": 15, + "save_steps": 160, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 0.0, + "train_batch_size": 8, + "trial_name": null, + "trial_params": null +}