{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 14.895626822157434, "eval_steps": 500, "global_step": 1440, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 4094.0, "completions/mean_length": 605.2645263671875, "completions/mean_terminated_length": 553.8720092773438, "completions/min_length": 3.0, "completions/min_terminated_length": 3.0, "epoch": 0.009329446064139942, "grad_norm": 0.131331667304039, "learning_rate": 5e-07, "loss": 0.0129, "num_tokens": 18498440.0, "reward": 0.4915248453617096, "reward_std": 0.2574085593223572, "rewards/simpleverify_reward/mean": 0.4915248453617096, "rewards/simpleverify_reward/std": 0.49993693828582764, "step": 1 }, { "clip_ratio/high_max": 0.0023216921763378195, "clip_ratio/high_mean": 0.0010302629816578701, "clip_ratio/low_mean": 0.0006802254629292293, "clip_ratio/low_min": 6.060530540707987e-05, "clip_ratio/region_mean": 0.0017104884682339616, "epoch": 0.018658892128279883, "grad_norm": 0.14236930012702942, "learning_rate": 5e-07, "loss": -0.0041, "step": 2 }, { "clip_ratio/high_max": 0.0017581237698323093, "clip_ratio/high_mean": 0.0008538106085325126, "clip_ratio/low_mean": 0.0005615903714897286, "clip_ratio/low_min": 0.00010988541907863691, "clip_ratio/region_mean": 0.0014154009804769885, "epoch": 0.027988338192419825, "grad_norm": 0.15264558792114258, "learning_rate": 5e-07, "loss": 0.0142, "step": 3 }, { "clip_ratio/high_max": 0.002595315956568811, "clip_ratio/high_mean": 0.0011790855460276362, "clip_ratio/low_mean": 0.0007246428649523295, "clip_ratio/low_min": 3.706587085616775e-05, "clip_ratio/region_mean": 0.0019037284218939021, "epoch": 0.037317784256559766, "grad_norm": 0.15370598435401917, "learning_rate": 5e-07, "loss": -0.0071, "step": 4 }, { "clip_ratio/high_max": 0.0025825949051068164, "clip_ratio/high_mean": 0.0012525091515271924, "clip_ratio/low_mean": 0.000688050586177269, "clip_ratio/low_min": 1.899118797155097e-05, "clip_ratio/region_mean": 0.0019405597195145674, "epoch": 0.04664723032069971, "grad_norm": 0.14169178903102875, "learning_rate": 5e-07, "loss": -0.0737, "step": 5 }, { "clip_ratio/high_max": 0.002195987217419315, "clip_ratio/high_mean": 0.0010251491039525717, "clip_ratio/low_mean": 0.0008038162304728758, "clip_ratio/low_min": 0.0001338402089459123, "clip_ratio/region_mean": 0.0018289653526153415, "epoch": 0.05597667638483965, "grad_norm": 0.12938429415225983, "learning_rate": 5e-07, "loss": 0.0499, "step": 6 }, { "clip_ratio/high_max": 0.002393529241089709, "clip_ratio/high_mean": 0.0011856687306135427, "clip_ratio/low_mean": 0.0007589048618683591, "clip_ratio/low_min": 6.607858631468844e-05, "clip_ratio/region_mean": 0.0019445735961198807, "epoch": 0.0653061224489796, "grad_norm": 0.13213270902633667, "learning_rate": 5e-07, "loss": -0.0168, "step": 7 }, { "clip_ratio/high_max": 0.0027503964738571085, "clip_ratio/high_mean": 0.0012076203020114917, "clip_ratio/low_mean": 0.0006987634174038249, "clip_ratio/low_min": 3.70700536223012e-05, "clip_ratio/region_mean": 0.0019063837171415798, "epoch": 0.07463556851311953, "grad_norm": 0.13935300707817078, "learning_rate": 5e-07, "loss": -0.0396, "step": 8 }, { "clip_ratio/high_max": 0.002375476913584862, "clip_ratio/high_mean": 0.0011112253014289308, "clip_ratio/low_mean": 0.0010519421848584898, "clip_ratio/low_min": 0.00015046947737573646, "clip_ratio/region_mean": 0.002163167497201357, "epoch": 0.08396501457725948, "grad_norm": 0.1288035660982132, "learning_rate": 5e-07, "loss": -0.0015, "step": 9 }, { "clip_ratio/high_max": 0.0026818336991709657, "clip_ratio/high_mean": 0.001196322471514577, "clip_ratio/low_mean": 0.0012377222228678875, "clip_ratio/low_min": 0.00019973841699538752, "clip_ratio/region_mean": 0.0024340447125723585, "epoch": 0.09329446064139942, "grad_norm": 0.11253558844327927, "learning_rate": 5e-07, "loss": 0.0308, "step": 10 }, { "clip_ratio/high_max": 0.0023665463959332556, "clip_ratio/high_mean": 0.001151929875049973, "clip_ratio/low_mean": 0.0011318481847411022, "clip_ratio/low_min": 0.00010539333561609965, "clip_ratio/region_mean": 0.002283778041601181, "epoch": 0.10262390670553936, "grad_norm": 0.11897031962871552, "learning_rate": 5e-07, "loss": -0.0076, "step": 11 }, { "clip_ratio/high_max": 0.0025946504320017993, "clip_ratio/high_mean": 0.0012229333442519419, "clip_ratio/low_mean": 0.001451674776035361, "clip_ratio/low_min": 6.885036964376923e-05, "clip_ratio/region_mean": 0.0026746081493911333, "epoch": 0.1119533527696793, "grad_norm": 0.12915368378162384, "learning_rate": 5e-07, "loss": -0.0002, "step": 12 }, { "clip_ratio/high_max": 0.0029937578292447142, "clip_ratio/high_mean": 0.001326830155448988, "clip_ratio/low_mean": 0.001459045412047999, "clip_ratio/low_min": 0.00027211164433538215, "clip_ratio/region_mean": 0.0027858755784109235, "epoch": 0.12128279883381925, "grad_norm": 0.1449378877878189, "learning_rate": 5e-07, "loss": 0.0467, "step": 13 }, { "clip_ratio/high_max": 0.003035556663235184, "clip_ratio/high_mean": 0.0013858933998562861, "clip_ratio/low_mean": 0.001356685988866957, "clip_ratio/low_min": 0.00023836017589928815, "clip_ratio/region_mean": 0.0027425793959992006, "epoch": 0.1306122448979592, "grad_norm": 0.1509605497121811, "learning_rate": 5e-07, "loss": -0.0043, "step": 14 }, { "clip_ratio/high_max": 0.0025981081562349573, "clip_ratio/high_mean": 0.001216873512021266, "clip_ratio/low_mean": 0.0012306782246014336, "clip_ratio/low_min": 0.00011819705105153844, "clip_ratio/region_mean": 0.0024475518075632863, "epoch": 0.13994169096209913, "grad_norm": 0.1347721964120865, "learning_rate": 5e-07, "loss": -0.0396, "step": 15 }, { "clip_ratio/high_max": 0.002757471753284335, "clip_ratio/high_mean": 0.0012285461998544633, "clip_ratio/low_mean": 0.0012139716127421707, "clip_ratio/low_min": 0.00024589502754679415, "clip_ratio/region_mean": 0.0024425178271485493, "epoch": 0.14927113702623906, "grad_norm": 0.12649588286876678, "learning_rate": 5e-07, "loss": 0.0083, "step": 16 }, { "clip_ratio/high_max": 0.003136303828796372, "clip_ratio/high_mean": 0.00125961730373092, "clip_ratio/low_mean": 0.001337776477157604, "clip_ratio/low_min": 0.00019038475602428662, "clip_ratio/region_mean": 0.0025973937517846934, "epoch": 0.158600583090379, "grad_norm": 0.14974616467952728, "learning_rate": 5e-07, "loss": 0.0397, "step": 17 }, { "clip_ratio/high_max": 0.002668715394975152, "clip_ratio/high_mean": 0.0012346713338047266, "clip_ratio/low_mean": 0.0012769183376803994, "clip_ratio/low_min": 0.0002224307345386478, "clip_ratio/region_mean": 0.002511589635105338, "epoch": 0.16793002915451896, "grad_norm": 0.1325722187757492, "learning_rate": 5e-07, "loss": 0.0178, "step": 18 }, { "clip_ratio/high_max": 0.0024351933534489945, "clip_ratio/high_mean": 0.0010669569546735147, "clip_ratio/low_mean": 0.001265330927708419, "clip_ratio/low_min": 0.00024171168752218364, "clip_ratio/region_mean": 0.002332287847821135, "epoch": 0.1772594752186589, "grad_norm": 0.13122612237930298, "learning_rate": 5e-07, "loss": 0.0152, "step": 19 }, { "clip_ratio/high_max": 0.0029950038515380584, "clip_ratio/high_mean": 0.0012908562057418749, "clip_ratio/low_mean": 0.0012448179913917556, "clip_ratio/low_min": 0.0002158871766368975, "clip_ratio/region_mean": 0.002535674204409588, "epoch": 0.18658892128279883, "grad_norm": 0.12613993883132935, "learning_rate": 5e-07, "loss": 0.0151, "step": 20 }, { "clip_ratio/high_max": 0.0029358126848819666, "clip_ratio/high_mean": 0.0013152424689906184, "clip_ratio/low_mean": 0.001125064260122599, "clip_ratio/low_min": 7.533405187132303e-05, "clip_ratio/region_mean": 0.0024403066854574718, "epoch": 0.19591836734693877, "grad_norm": 0.13033734261989594, "learning_rate": 5e-07, "loss": -0.0312, "step": 21 }, { "clip_ratio/high_max": 0.0025670386603451334, "clip_ratio/high_mean": 0.001076224823918892, "clip_ratio/low_mean": 0.0011356466802681098, "clip_ratio/low_min": 0.00017650879726716084, "clip_ratio/region_mean": 0.0022118715569376945, "epoch": 0.20524781341107873, "grad_norm": 0.11890558898448944, "learning_rate": 5e-07, "loss": 0.0021, "step": 22 }, { "clip_ratio/high_max": 0.0024367794394493103, "clip_ratio/high_mean": 0.001170948879007483, "clip_ratio/low_mean": 0.0010662934619176667, "clip_ratio/low_min": 6.372660027409438e-05, "clip_ratio/region_mean": 0.0022372423190972768, "epoch": 0.21457725947521866, "grad_norm": 0.12802287936210632, "learning_rate": 5e-07, "loss": -0.0113, "step": 23 }, { "clip_ratio/high_max": 0.0024836100565153174, "clip_ratio/high_mean": 0.0010852228515432216, "clip_ratio/low_mean": 0.0009051441447809339, "clip_ratio/low_min": 6.720360215695109e-05, "clip_ratio/region_mean": 0.001990367039979901, "epoch": 0.2239067055393586, "grad_norm": 0.12940162420272827, "learning_rate": 5e-07, "loss": 0.0071, "step": 24 }, { "clip_ratio/high_max": 0.002292777666298207, "clip_ratio/high_mean": 0.0010893497892539017, "clip_ratio/low_mean": 0.001047341116645839, "clip_ratio/low_min": 4.6377492253668606e-05, "clip_ratio/region_mean": 0.0021366909058997408, "epoch": 0.23323615160349853, "grad_norm": 0.13016048073768616, "learning_rate": 5e-07, "loss": -0.0647, "step": 25 }, { "clip_ratio/high_max": 0.0023171197462943383, "clip_ratio/high_mean": 0.0010916536557488143, "clip_ratio/low_mean": 0.0011160504018334905, "clip_ratio/low_min": 0.0001997196350203012, "clip_ratio/region_mean": 0.0022077040557633154, "epoch": 0.2425655976676385, "grad_norm": 0.12679563462734222, "learning_rate": 5e-07, "loss": 0.0186, "step": 26 }, { "clip_ratio/high_max": 0.002647667955898214, "clip_ratio/high_mean": 0.0011598637283896096, "clip_ratio/low_mean": 0.0008844033618515823, "clip_ratio/low_min": 8.8138324827014e-05, "clip_ratio/region_mean": 0.002044267072051298, "epoch": 0.2518950437317784, "grad_norm": 0.11534130573272705, "learning_rate": 5e-07, "loss": -0.0231, "step": 27 }, { "clip_ratio/high_max": 0.002619112747197505, "clip_ratio/high_mean": 0.0011757613283407409, "clip_ratio/low_mean": 0.0010475817871338222, "clip_ratio/low_min": 5.176036665943684e-05, "clip_ratio/region_mean": 0.0022233431300264783, "epoch": 0.2612244897959184, "grad_norm": 0.12745560705661774, "learning_rate": 5e-07, "loss": -0.0184, "step": 28 }, { "clip_ratio/high_max": 0.002387891450780444, "clip_ratio/high_mean": 0.001015724064927781, "clip_ratio/low_mean": 0.0010694482352846535, "clip_ratio/low_min": 0.00011374177120160311, "clip_ratio/region_mean": 0.002085172280203551, "epoch": 0.2705539358600583, "grad_norm": 0.11388033628463745, "learning_rate": 5e-07, "loss": 0.0057, "step": 29 }, { "clip_ratio/high_max": 0.0024310967419296503, "clip_ratio/high_mean": 0.0011102042844868265, "clip_ratio/low_mean": 0.0010822213280334836, "clip_ratio/low_min": 0.00022650255141343223, "clip_ratio/region_mean": 0.002192425621615257, "epoch": 0.27988338192419826, "grad_norm": 0.11966362595558167, "learning_rate": 5e-07, "loss": -0.0071, "step": 30 }, { "clip_ratio/high_max": 0.0022376261549652554, "clip_ratio/high_mean": 0.001052344472554978, "clip_ratio/low_mean": 0.0010792531866172794, "clip_ratio/low_min": 7.535982877016068e-05, "clip_ratio/region_mean": 0.002131597626430448, "epoch": 0.2892128279883382, "grad_norm": 0.12972787022590637, "learning_rate": 5e-07, "loss": 0.0254, "step": 31 }, { "clip_ratio/high_max": 0.002377464625169523, "clip_ratio/high_mean": 0.0011515830410644412, "clip_ratio/low_mean": 0.0009880590623652097, "clip_ratio/low_min": 7.233149517560378e-05, "clip_ratio/region_mean": 0.0021396420488599688, "epoch": 0.29854227405247813, "grad_norm": 0.1271960586309433, "learning_rate": 5e-07, "loss": 0.0014, "step": 32 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014334542410714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 4052.0, "completions/mean_length": 603.6301879882812, "completions/mean_terminated_length": 552.840576171875, "completions/min_length": 3.0, "completions/min_terminated_length": 3.0, "epoch": 0.30787172011661806, "grad_norm": 0.14079268276691437, "learning_rate": 5e-07, "loss": 0.0364, "num_tokens": 36959676.0, "reward": 0.5126255750656128, "reward_std": 0.25040188431739807, "rewards/simpleverify_reward/mean": 0.5126255750656128, "rewards/simpleverify_reward/std": 0.4998493194580078, "step": 33 }, { "clip_ratio/high_max": 0.0022863421181682497, "clip_ratio/high_mean": 0.0009586980850144755, "clip_ratio/low_mean": 0.0006349682525979006, "clip_ratio/low_min": 1.3380432392295916e-05, "clip_ratio/region_mean": 0.0015936663694446906, "epoch": 0.317201166180758, "grad_norm": 0.14034491777420044, "learning_rate": 5e-07, "loss": 0.0278, "step": 34 }, { "clip_ratio/high_max": 0.002196236397139728, "clip_ratio/high_mean": 0.0009944097873813007, "clip_ratio/low_mean": 0.0006592385107069276, "clip_ratio/low_min": 3.9299935451708734e-05, "clip_ratio/region_mean": 0.001653648287174292, "epoch": 0.32653061224489793, "grad_norm": 0.1371551901102066, "learning_rate": 5e-07, "loss": 0.0017, "step": 35 }, { "clip_ratio/high_max": 0.002005641072173603, "clip_ratio/high_mean": 0.0009586297655914677, "clip_ratio/low_mean": 0.0007999000681593316, "clip_ratio/low_min": 5.318755756889004e-05, "clip_ratio/region_mean": 0.0017585298555786721, "epoch": 0.3358600583090379, "grad_norm": 0.13138970732688904, "learning_rate": 5e-07, "loss": 0.0062, "step": 36 }, { "clip_ratio/high_max": 0.0024777424914645962, "clip_ratio/high_mean": 0.0010102005107910372, "clip_ratio/low_mean": 0.0006888644111313624, "clip_ratio/low_min": 6.477101669588592e-05, "clip_ratio/region_mean": 0.0016990648946375586, "epoch": 0.34518950437317786, "grad_norm": 0.1383255422115326, "learning_rate": 5e-07, "loss": 0.0186, "step": 37 }, { "clip_ratio/high_max": 0.0019921148268622346, "clip_ratio/high_mean": 0.0008962915853771847, "clip_ratio/low_mean": 0.0007489126364816912, "clip_ratio/low_min": 4.9105203288490884e-05, "clip_ratio/region_mean": 0.0016452042400487699, "epoch": 0.3545189504373178, "grad_norm": 0.144417867064476, "learning_rate": 5e-07, "loss": 0.0211, "step": 38 }, { "clip_ratio/high_max": 0.0019493213840178214, "clip_ratio/high_mean": 0.0008399857815675205, "clip_ratio/low_mean": 0.0007804024189681513, "clip_ratio/low_min": 6.168910749693168e-05, "clip_ratio/region_mean": 0.0016203882041736506, "epoch": 0.3638483965014577, "grad_norm": 0.13966616988182068, "learning_rate": 5e-07, "loss": 0.0148, "step": 39 }, { "clip_ratio/high_max": 0.0021811831175000407, "clip_ratio/high_mean": 0.0009747384101501666, "clip_ratio/low_mean": 0.0008429400222667027, "clip_ratio/low_min": 7.595140232297126e-05, "clip_ratio/region_mean": 0.0018176784142269753, "epoch": 0.37317784256559766, "grad_norm": 0.11899567395448685, "learning_rate": 5e-07, "loss": 0.0138, "step": 40 }, { "clip_ratio/high_max": 0.002654959745996166, "clip_ratio/high_mean": 0.001108158627175726, "clip_ratio/low_mean": 0.000936737311349134, "clip_ratio/low_min": 8.707926645001862e-05, "clip_ratio/region_mean": 0.0020448959330678917, "epoch": 0.3825072886297376, "grad_norm": 0.12759868800640106, "learning_rate": 5e-07, "loss": 0.0413, "step": 41 }, { "clip_ratio/high_max": 0.0022380856971722096, "clip_ratio/high_mean": 0.0010899479893851094, "clip_ratio/low_mean": 0.0009552496958349366, "clip_ratio/low_min": 0.0001674862596701132, "clip_ratio/region_mean": 0.0020451976379263215, "epoch": 0.39183673469387753, "grad_norm": 0.1302347630262375, "learning_rate": 5e-07, "loss": -0.0135, "step": 42 }, { "clip_ratio/high_max": 0.002379240388108883, "clip_ratio/high_mean": 0.0010375067176937591, "clip_ratio/low_mean": 0.001095101371902274, "clip_ratio/low_min": 0.00018029624880000483, "clip_ratio/region_mean": 0.0021326081186998636, "epoch": 0.40116618075801747, "grad_norm": 0.12419261038303375, "learning_rate": 5e-07, "loss": 0.0216, "step": 43 }, { "clip_ratio/high_max": 0.0030644627768197097, "clip_ratio/high_mean": 0.0013917084725107998, "clip_ratio/low_mean": 0.001085498672182439, "clip_ratio/low_min": 0.00010210959044343326, "clip_ratio/region_mean": 0.0024772071628831327, "epoch": 0.41049562682215746, "grad_norm": 0.13948874175548553, "learning_rate": 5e-07, "loss": -0.011, "step": 44 }, { "clip_ratio/high_max": 0.002880107469536597, "clip_ratio/high_mean": 0.0012506534239946632, "clip_ratio/low_mean": 0.001078390338079771, "clip_ratio/low_min": 0.00021389202720456524, "clip_ratio/region_mean": 0.0023290437893592753, "epoch": 0.4198250728862974, "grad_norm": 0.1368735432624817, "learning_rate": 5e-07, "loss": 0.0185, "step": 45 }, { "clip_ratio/high_max": 0.003029709194379393, "clip_ratio/high_mean": 0.001400678011123091, "clip_ratio/low_mean": 0.0009860531263257144, "clip_ratio/low_min": 0.00012388800587359583, "clip_ratio/region_mean": 0.0023867311465437524, "epoch": 0.4291545189504373, "grad_norm": 0.14016173779964447, "learning_rate": 5e-07, "loss": -0.0038, "step": 46 }, { "clip_ratio/high_max": 0.002539137363783084, "clip_ratio/high_mean": 0.001206661203468684, "clip_ratio/low_mean": 0.0011205441733181942, "clip_ratio/low_min": 5.020544085709844e-05, "clip_ratio/region_mean": 0.0023272054313565604, "epoch": 0.43848396501457726, "grad_norm": 0.13392040133476257, "learning_rate": 5e-07, "loss": -0.0018, "step": 47 }, { "clip_ratio/high_max": 0.002793367821141146, "clip_ratio/high_mean": 0.0011795473146776203, "clip_ratio/low_mean": 0.0010273263924318599, "clip_ratio/low_min": 0.00012425377462932374, "clip_ratio/region_mean": 0.002206873701652512, "epoch": 0.4478134110787172, "grad_norm": 0.1314721405506134, "learning_rate": 5e-07, "loss": -0.0178, "step": 48 }, { "clip_ratio/high_max": 0.002273077145218849, "clip_ratio/high_mean": 0.0011841790310427314, "clip_ratio/low_mean": 0.001236245112522738, "clip_ratio/low_min": 0.0002435972119201324, "clip_ratio/region_mean": 0.002420424105366692, "epoch": 0.45714285714285713, "grad_norm": 0.1245843768119812, "learning_rate": 5e-07, "loss": -0.0249, "step": 49 }, { "clip_ratio/high_max": 0.0023852536542108282, "clip_ratio/high_mean": 0.001169397230114555, "clip_ratio/low_mean": 0.0011542706233740319, "clip_ratio/low_min": 0.0002012195654970128, "clip_ratio/region_mean": 0.0023236678607645445, "epoch": 0.46647230320699706, "grad_norm": 0.1297774463891983, "learning_rate": 5e-07, "loss": -0.0011, "step": 50 }, { "clip_ratio/high_max": 0.002454962937918026, "clip_ratio/high_mean": 0.0011786022150772624, "clip_ratio/low_mean": 0.0011728753706847783, "clip_ratio/low_min": 5.601863449555822e-05, "clip_ratio/region_mean": 0.00235147761850385, "epoch": 0.47580174927113705, "grad_norm": 0.12858253717422485, "learning_rate": 5e-07, "loss": -0.0036, "step": 51 }, { "clip_ratio/high_max": 0.00271710789820645, "clip_ratio/high_mean": 0.001267463456315454, "clip_ratio/low_mean": 0.001037508443914703, "clip_ratio/low_min": 0.00019888698261638638, "clip_ratio/region_mean": 0.0023049718874972314, "epoch": 0.485131195335277, "grad_norm": 0.13503322005271912, "learning_rate": 5e-07, "loss": 0.0125, "step": 52 }, { "clip_ratio/high_max": 0.002364695057622157, "clip_ratio/high_mean": 0.0011190052500751335, "clip_ratio/low_mean": 0.0010465662635397166, "clip_ratio/low_min": 6.342427241179394e-05, "clip_ratio/region_mean": 0.0021655715027009137, "epoch": 0.4944606413994169, "grad_norm": 0.1131204217672348, "learning_rate": 5e-07, "loss": -0.0305, "step": 53 }, { "clip_ratio/high_max": 0.002527049553464167, "clip_ratio/high_mean": 0.0009720858834043611, "clip_ratio/low_mean": 0.0011290208676655311, "clip_ratio/low_min": 0.00014344586088554934, "clip_ratio/region_mean": 0.0021011067292420194, "epoch": 0.5037900874635568, "grad_norm": 0.12126728892326355, "learning_rate": 5e-07, "loss": 0.0093, "step": 54 }, { "clip_ratio/high_max": 0.00241242851188872, "clip_ratio/high_mean": 0.0010888299129874213, "clip_ratio/low_mean": 0.0010215321308351122, "clip_ratio/low_min": 7.989729419932701e-05, "clip_ratio/region_mean": 0.0021103620310896076, "epoch": 0.5131195335276968, "grad_norm": 0.1308501958847046, "learning_rate": 5e-07, "loss": -0.0444, "step": 55 }, { "clip_ratio/high_max": 0.0026343139761593193, "clip_ratio/high_mean": 0.0011247241236560512, "clip_ratio/low_mean": 0.0010733525959949475, "clip_ratio/low_min": 0.00014605376145482296, "clip_ratio/region_mean": 0.002198076675995253, "epoch": 0.5224489795918368, "grad_norm": 0.11449221521615982, "learning_rate": 5e-07, "loss": 0.011, "step": 56 }, { "clip_ratio/high_max": 0.0023245855991262943, "clip_ratio/high_mean": 0.0010635077560436912, "clip_ratio/low_mean": 0.0011616673473326955, "clip_ratio/low_min": 0.0001502650247857673, "clip_ratio/region_mean": 0.002225175136118196, "epoch": 0.5317784256559767, "grad_norm": 0.11965508759021759, "learning_rate": 5e-07, "loss": 0.0299, "step": 57 }, { "clip_ratio/high_max": 0.002464928744302597, "clip_ratio/high_mean": 0.0010313002348993905, "clip_ratio/low_mean": 0.0011650233282125555, "clip_ratio/low_min": 7.162451856856933e-05, "clip_ratio/region_mean": 0.0021963235340081155, "epoch": 0.5411078717201167, "grad_norm": 0.12807074189186096, "learning_rate": 5e-07, "loss": 0.0147, "step": 58 }, { "clip_ratio/high_max": 0.0023764401776134036, "clip_ratio/high_mean": 0.001060073253029259, "clip_ratio/low_mean": 0.0010700331840780564, "clip_ratio/low_min": 6.526444394694408e-05, "clip_ratio/region_mean": 0.002130106389813591, "epoch": 0.5504373177842565, "grad_norm": 0.12465574592351913, "learning_rate": 5e-07, "loss": -0.0502, "step": 59 }, { "clip_ratio/high_max": 0.002451460182783194, "clip_ratio/high_mean": 0.0011471950383565854, "clip_ratio/low_mean": 0.0009914712463796604, "clip_ratio/low_min": 0.00012610398789547617, "clip_ratio/region_mean": 0.0021386662920122035, "epoch": 0.5597667638483965, "grad_norm": 0.1245523989200592, "learning_rate": 5e-07, "loss": -0.0243, "step": 60 }, { "clip_ratio/high_max": 0.0025791477601160295, "clip_ratio/high_mean": 0.001038796155626187, "clip_ratio/low_mean": 0.0010771303859655745, "clip_ratio/low_min": 0.00012076884286216227, "clip_ratio/region_mean": 0.0021159264870220795, "epoch": 0.5690962099125364, "grad_norm": 0.13137593865394592, "learning_rate": 5e-07, "loss": 0.0286, "step": 61 }, { "clip_ratio/high_max": 0.002549487784563098, "clip_ratio/high_mean": 0.0012202230100228917, "clip_ratio/low_mean": 0.000920853952266043, "clip_ratio/low_min": 9.88389365375042e-05, "clip_ratio/region_mean": 0.0021410769622889347, "epoch": 0.5784256559766764, "grad_norm": 0.12125889956951141, "learning_rate": 5e-07, "loss": -0.0345, "step": 62 }, { "clip_ratio/high_max": 0.002603382323286496, "clip_ratio/high_mean": 0.0012414115990395658, "clip_ratio/low_mean": 0.0011167167431267444, "clip_ratio/low_min": 0.00016160727045644308, "clip_ratio/region_mean": 0.002358128287596628, "epoch": 0.5877551020408164, "grad_norm": 0.12610796093940735, "learning_rate": 5e-07, "loss": -0.0089, "step": 63 }, { "clip_ratio/high_max": 0.002190053026424721, "clip_ratio/high_mean": 0.0009910077551467111, "clip_ratio/low_mean": 0.001023258111672476, "clip_ratio/low_min": 9.429754936718382e-05, "clip_ratio/region_mean": 0.002014265875914134, "epoch": 0.5970845481049563, "grad_norm": 0.12715384364128113, "learning_rate": 5e-07, "loss": 0.002, "step": 64 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014229910714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 4088.0, "completions/mean_length": 616.2706909179688, "completions/mean_terminated_length": 566.0396118164062, "completions/min_length": 3.0, "completions/min_terminated_length": 3.0, "epoch": 0.6064139941690962, "grad_norm": 0.14735741913318634, "learning_rate": 5e-07, "loss": 0.0212, "num_tokens": 55850443.0, "reward": 0.5286691188812256, "reward_std": 0.24445606768131256, "rewards/simpleverify_reward/mean": 0.5286690592765808, "rewards/simpleverify_reward/std": 0.49918612837791443, "step": 65 }, { "clip_ratio/high_max": 0.002269677206641063, "clip_ratio/high_mean": 0.0009126685108640231, "clip_ratio/low_mean": 0.0006784476499888115, "clip_ratio/low_min": 7.134479983506026e-05, "clip_ratio/region_mean": 0.0015911161754047498, "epoch": 0.6157434402332361, "grad_norm": 0.1250542402267456, "learning_rate": 5e-07, "loss": 0.037, "step": 66 }, { "clip_ratio/high_max": 0.00211200347985141, "clip_ratio/high_mean": 0.0010342555506213102, "clip_ratio/low_mean": 0.0005842460195708554, "clip_ratio/low_min": 2.756339563347865e-05, "clip_ratio/region_mean": 0.0016185015483642928, "epoch": 0.6250728862973761, "grad_norm": 0.1319248527288437, "learning_rate": 5e-07, "loss": -0.0336, "step": 67 }, { "clip_ratio/high_max": 0.0023904748159111477, "clip_ratio/high_mean": 0.0010492292676644865, "clip_ratio/low_mean": 0.0005754186204285361, "clip_ratio/low_min": 1.427592542313505e-05, "clip_ratio/region_mean": 0.001624647899006959, "epoch": 0.634402332361516, "grad_norm": 0.1214059591293335, "learning_rate": 5e-07, "loss": -0.0384, "step": 68 }, { "clip_ratio/high_max": 0.002319907165656332, "clip_ratio/high_mean": 0.0009754171369422693, "clip_ratio/low_mean": 0.0006642514363193186, "clip_ratio/low_min": 5.5770853577996604e-05, "clip_ratio/region_mean": 0.001639668582356535, "epoch": 0.643731778425656, "grad_norm": 0.12542420625686646, "learning_rate": 5e-07, "loss": 0.0149, "step": 69 }, { "clip_ratio/high_max": 0.0024787039510556497, "clip_ratio/high_mean": 0.0010657715447450755, "clip_ratio/low_mean": 0.0006974736588745145, "clip_ratio/low_min": 2.3256465283338912e-05, "clip_ratio/region_mean": 0.001763245221809484, "epoch": 0.6530612244897959, "grad_norm": 0.1362035721540451, "learning_rate": 5e-07, "loss": -0.0196, "step": 70 }, { "clip_ratio/high_max": 0.0024949344297056086, "clip_ratio/high_mean": 0.0009695752451079898, "clip_ratio/low_mean": 0.0008029742839426035, "clip_ratio/low_min": 1.3061650861345697e-05, "clip_ratio/region_mean": 0.0017725495126796886, "epoch": 0.6623906705539359, "grad_norm": 0.11836006492376328, "learning_rate": 5e-07, "loss": 0.016, "step": 71 }, { "clip_ratio/high_max": 0.002384097606409341, "clip_ratio/high_mean": 0.001066951623215573, "clip_ratio/low_mean": 0.0007494277015211992, "clip_ratio/low_min": 4.557683405437274e-05, "clip_ratio/region_mean": 0.0018163793574785814, "epoch": 0.6717201166180758, "grad_norm": 0.12951575219631195, "learning_rate": 5e-07, "loss": -0.0154, "step": 72 }, { "clip_ratio/high_max": 0.0021888353148824535, "clip_ratio/high_mean": 0.000916398394110729, "clip_ratio/low_mean": 0.0009493210309301503, "clip_ratio/low_min": 6.25697157374816e-05, "clip_ratio/region_mean": 0.0018657194086699747, "epoch": 0.6810495626822157, "grad_norm": 0.11974475532770157, "learning_rate": 5e-07, "loss": 0.0198, "step": 73 }, { "clip_ratio/high_max": 0.0025447066291235387, "clip_ratio/high_mean": 0.0012363185560388956, "clip_ratio/low_mean": 0.000958998833084479, "clip_ratio/low_min": 0.00012442229399312055, "clip_ratio/region_mean": 0.0021953174073132686, "epoch": 0.6903790087463557, "grad_norm": 0.13129617273807526, "learning_rate": 5e-07, "loss": -0.0281, "step": 74 }, { "clip_ratio/high_max": 0.0023129359469749033, "clip_ratio/high_mean": 0.001056255707226228, "clip_ratio/low_mean": 0.000935445506911492, "clip_ratio/low_min": 0.00011015579093509587, "clip_ratio/region_mean": 0.001991701174119953, "epoch": 0.6997084548104956, "grad_norm": 0.12138260900974274, "learning_rate": 5e-07, "loss": -0.0006, "step": 75 }, { "clip_ratio/high_max": 0.002366617205552757, "clip_ratio/high_mean": 0.0010631523655320052, "clip_ratio/low_mean": 0.0010049080428871093, "clip_ratio/low_min": 0.00015036267905088607, "clip_ratio/region_mean": 0.002068060355668422, "epoch": 0.7090379008746356, "grad_norm": 0.12002574652433395, "learning_rate": 5e-07, "loss": -0.005, "step": 76 }, { "clip_ratio/high_max": 0.0023247287535923533, "clip_ratio/high_mean": 0.0010074204983538948, "clip_ratio/low_mean": 0.0010972387572110165, "clip_ratio/low_min": 0.0001299779542023316, "clip_ratio/region_mean": 0.002104659222823102, "epoch": 0.7183673469387755, "grad_norm": 0.12160296738147736, "learning_rate": 5e-07, "loss": 0.0321, "step": 77 }, { "clip_ratio/high_max": 0.002664013998582959, "clip_ratio/high_mean": 0.0010896784733631648, "clip_ratio/low_mean": 0.0011854437871079426, "clip_ratio/low_min": 0.00022335676021612016, "clip_ratio/region_mean": 0.002275122213177383, "epoch": 0.7276967930029155, "grad_norm": 0.12966448068618774, "learning_rate": 5e-07, "loss": 0.0265, "step": 78 }, { "clip_ratio/high_max": 0.0027926598777412437, "clip_ratio/high_mean": 0.0012209811029606499, "clip_ratio/low_mean": 0.0010938578307104763, "clip_ratio/low_min": 0.00018711722441366874, "clip_ratio/region_mean": 0.002314839010068681, "epoch": 0.7370262390670554, "grad_norm": 0.13493461906909943, "learning_rate": 5e-07, "loss": -0.0212, "step": 79 }, { "clip_ratio/high_max": 0.002197874775447417, "clip_ratio/high_mean": 0.0009918822688632645, "clip_ratio/low_mean": 0.0011687487640301697, "clip_ratio/low_min": 5.7556905630917754e-05, "clip_ratio/region_mean": 0.0021606310110655613, "epoch": 0.7463556851311953, "grad_norm": 0.13200566172599792, "learning_rate": 5e-07, "loss": 0.0716, "step": 80 }, { "clip_ratio/high_max": 0.0025154613904305734, "clip_ratio/high_mean": 0.0012477115960791707, "clip_ratio/low_mean": 0.0013104054378345609, "clip_ratio/low_min": 0.0002499403944966616, "clip_ratio/region_mean": 0.0025581171066733077, "epoch": 0.7556851311953353, "grad_norm": 0.12989205121994019, "learning_rate": 5e-07, "loss": -0.0064, "step": 81 }, { "clip_ratio/high_max": 0.0025988262896134984, "clip_ratio/high_mean": 0.0011136246102978475, "clip_ratio/low_mean": 0.0011564869546418777, "clip_ratio/low_min": 0.00016451432657049736, "clip_ratio/region_mean": 0.0022701115449308418, "epoch": 0.7650145772594752, "grad_norm": 0.1321469396352768, "learning_rate": 5e-07, "loss": 0.0206, "step": 82 }, { "clip_ratio/high_max": 0.0024583666381658986, "clip_ratio/high_mean": 0.0011134094020235352, "clip_ratio/low_mean": 0.0013169339617888909, "clip_ratio/low_min": 8.830746628518682e-05, "clip_ratio/region_mean": 0.0024303433383465745, "epoch": 0.7743440233236152, "grad_norm": 0.13395744562149048, "learning_rate": 5e-07, "loss": 0.0291, "step": 83 }, { "clip_ratio/high_max": 0.0027450542293081526, "clip_ratio/high_mean": 0.0011149254132760689, "clip_ratio/low_mean": 0.0011469536511867773, "clip_ratio/low_min": 0.00024369885431951843, "clip_ratio/region_mean": 0.002261879089928698, "epoch": 0.7836734693877551, "grad_norm": 0.11095640808343887, "learning_rate": 5e-07, "loss": -0.0147, "step": 84 }, { "clip_ratio/high_max": 0.0027166251238668337, "clip_ratio/high_mean": 0.0012351689620118123, "clip_ratio/low_mean": 0.0011384154713596217, "clip_ratio/low_min": 0.00011234700559725752, "clip_ratio/region_mean": 0.002373584429733455, "epoch": 0.793002915451895, "grad_norm": 0.1324406862258911, "learning_rate": 5e-07, "loss": -0.0345, "step": 85 }, { "clip_ratio/high_max": 0.0021480369541677646, "clip_ratio/high_mean": 0.0010070661737699993, "clip_ratio/low_mean": 0.0011719924477802124, "clip_ratio/low_min": 7.173466019594343e-05, "clip_ratio/region_mean": 0.0021790586833958514, "epoch": 0.8023323615160349, "grad_norm": 0.13378696143627167, "learning_rate": 5e-07, "loss": 0.0081, "step": 86 }, { "clip_ratio/high_max": 0.0024519742801203392, "clip_ratio/high_mean": 0.001072947115972056, "clip_ratio/low_mean": 0.001031972515193047, "clip_ratio/low_min": 0.0001317659225605894, "clip_ratio/region_mean": 0.0021049195784144104, "epoch": 0.8116618075801749, "grad_norm": 0.11710911989212036, "learning_rate": 5e-07, "loss": 0.0102, "step": 87 }, { "clip_ratio/high_max": 0.0026126636694243643, "clip_ratio/high_mean": 0.0011445584605098702, "clip_ratio/low_mean": 0.0011490978504298255, "clip_ratio/low_min": 0.00020694040722446516, "clip_ratio/region_mean": 0.0022936563109396957, "epoch": 0.8209912536443149, "grad_norm": 0.12597015500068665, "learning_rate": 5e-07, "loss": -0.0101, "step": 88 }, { "clip_ratio/high_max": 0.0024568882217863575, "clip_ratio/high_mean": 0.00108201786497375, "clip_ratio/low_mean": 0.0010934615711448714, "clip_ratio/low_min": 7.273202390933875e-05, "clip_ratio/region_mean": 0.0021754793779109605, "epoch": 0.8303206997084548, "grad_norm": 0.1179308071732521, "learning_rate": 5e-07, "loss": 0.0093, "step": 89 }, { "clip_ratio/high_max": 0.0026153122817049734, "clip_ratio/high_mean": 0.00111842515252647, "clip_ratio/low_mean": 0.0011594263196457177, "clip_ratio/low_min": 0.00024540690901631024, "clip_ratio/region_mean": 0.0022778514903620817, "epoch": 0.8396501457725948, "grad_norm": 0.1327657699584961, "learning_rate": 5e-07, "loss": -0.0057, "step": 90 }, { "clip_ratio/high_max": 0.002534598985221237, "clip_ratio/high_mean": 0.001210538946907036, "clip_ratio/low_mean": 0.0009251924129785039, "clip_ratio/low_min": 5.44446183994296e-05, "clip_ratio/region_mean": 0.0021357313817134127, "epoch": 0.8489795918367347, "grad_norm": 0.13262243568897247, "learning_rate": 5e-07, "loss": 0.0188, "step": 91 }, { "clip_ratio/high_max": 0.0023722436744719744, "clip_ratio/high_mean": 0.0010358476793044247, "clip_ratio/low_mean": 0.0009467733179917559, "clip_ratio/low_min": 4.5292286813491955e-05, "clip_ratio/region_mean": 0.00198262096819235, "epoch": 0.8583090379008746, "grad_norm": 0.12303271889686584, "learning_rate": 5e-07, "loss": -0.0039, "step": 92 }, { "clip_ratio/high_max": 0.0022781071384088136, "clip_ratio/high_mean": 0.0011032367874577176, "clip_ratio/low_mean": 0.0010275329113937914, "clip_ratio/low_min": 0.00010772847872431157, "clip_ratio/region_mean": 0.0021307697243173607, "epoch": 0.8676384839650145, "grad_norm": 0.13420234620571136, "learning_rate": 5e-07, "loss": -0.0124, "step": 93 }, { "clip_ratio/high_max": 0.0021952271199552342, "clip_ratio/high_mean": 0.0010804641315189656, "clip_ratio/low_mean": 0.0010122978965227958, "clip_ratio/low_min": 0.00014320972059067572, "clip_ratio/region_mean": 0.002092761998937931, "epoch": 0.8769679300291545, "grad_norm": 0.12098018079996109, "learning_rate": 5e-07, "loss": -0.006, "step": 94 }, { "clip_ratio/high_max": 0.002461802345351316, "clip_ratio/high_mean": 0.0011353604095347691, "clip_ratio/low_mean": 0.0009493601428403053, "clip_ratio/low_min": 4.801800059794914e-05, "clip_ratio/region_mean": 0.002084720545099117, "epoch": 0.8862973760932945, "grad_norm": 0.12767820060253143, "learning_rate": 5e-07, "loss": -0.0059, "step": 95 }, { "clip_ratio/high_max": 0.0022527813125634566, "clip_ratio/high_mean": 0.001010920233966317, "clip_ratio/low_mean": 0.0010008105182350846, "clip_ratio/low_min": 3.285964885435533e-05, "clip_ratio/region_mean": 0.0020117307285545394, "epoch": 0.8956268221574344, "grad_norm": 0.11552922427654266, "learning_rate": 5e-07, "loss": -0.005, "step": 96 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014439174107142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 4095.0, "completions/mean_length": 609.28369140625, "completions/mean_terminated_length": 558.2008056640625, "completions/min_length": 2.0, "completions/min_terminated_length": 2.0, "epoch": 1.00932944606414, "grad_norm": 0.1428188681602478, "learning_rate": 5e-07, "loss": -0.0227, "num_tokens": 74498137.0, "reward": 0.53759765625, "reward_std": 0.22880253195762634, "rewards/simpleverify_reward/mean": 0.53759765625, "rewards/simpleverify_reward/std": 0.49859312176704407, "step": 97 }, { "clip_ratio/high_max": 0.002412014378933236, "clip_ratio/high_mean": 0.0010390685274614953, "clip_ratio/low_mean": 0.0006761948152416153, "clip_ratio/low_min": 4.6145758460625075e-05, "clip_ratio/region_mean": 0.0017152633299701847, "epoch": 1.01865889212828, "grad_norm": 0.13726329803466797, "learning_rate": 5e-07, "loss": -0.0202, "step": 98 }, { "clip_ratio/high_max": 0.0021888411574764177, "clip_ratio/high_mean": 0.0008821762439765735, "clip_ratio/low_mean": 0.0005867374338777154, "clip_ratio/low_min": 3.6346580600365996e-05, "clip_ratio/region_mean": 0.0014689137024106458, "epoch": 1.0279883381924197, "grad_norm": 0.12156879156827927, "learning_rate": 5e-07, "loss": 0.0338, "step": 99 }, { "clip_ratio/high_max": 0.002158078885258874, "clip_ratio/high_mean": 0.0009922259578161174, "clip_ratio/low_mean": 0.000607718357059639, "clip_ratio/low_min": 2.6730376703199e-05, "clip_ratio/region_mean": 0.0015999443203327246, "epoch": 1.0373177842565597, "grad_norm": 0.13005229830741882, "learning_rate": 5e-07, "loss": -0.0275, "step": 100 }, { "clip_ratio/high_max": 0.00208431938153808, "clip_ratio/high_mean": 0.0008170505880116252, "clip_ratio/low_mean": 0.0006534571894007968, "clip_ratio/low_min": 5.123859409650322e-05, "clip_ratio/region_mean": 0.0014705077701364644, "epoch": 1.0466472303206997, "grad_norm": 0.13222341239452362, "learning_rate": 5e-07, "loss": 0.0364, "step": 101 }, { "clip_ratio/high_max": 0.0020004238685942255, "clip_ratio/high_mean": 0.0009878917953756172, "clip_ratio/low_mean": 0.0007170062035584124, "clip_ratio/low_min": 2.3011782104731537e-05, "clip_ratio/region_mean": 0.001704898000753019, "epoch": 1.0559766763848397, "grad_norm": 0.1345507800579071, "learning_rate": 5e-07, "loss": 0.0029, "step": 102 }, { "clip_ratio/high_max": 0.0019352379349584226, "clip_ratio/high_mean": 0.0008718975823285291, "clip_ratio/low_mean": 0.0007541742616012925, "clip_ratio/low_min": 7.303286292881239e-05, "clip_ratio/region_mean": 0.0016260718330158852, "epoch": 1.0653061224489795, "grad_norm": 0.12978705763816833, "learning_rate": 5e-07, "loss": -0.0056, "step": 103 }, { "clip_ratio/high_max": 0.002352624145714799, "clip_ratio/high_mean": 0.0009090389867196791, "clip_ratio/low_mean": 0.0006674693813693011, "clip_ratio/low_min": 2.7713525014405604e-05, "clip_ratio/region_mean": 0.0015765083662699908, "epoch": 1.0746355685131195, "grad_norm": 0.12478694319725037, "learning_rate": 5e-07, "loss": 0.0028, "step": 104 }, { "clip_ratio/high_max": 0.0025181819510180503, "clip_ratio/high_mean": 0.0010329623437428381, "clip_ratio/low_mean": 0.0006875149847473949, "clip_ratio/low_min": 4.247387551004067e-05, "clip_ratio/region_mean": 0.0017204773394041695, "epoch": 1.0839650145772595, "grad_norm": 0.11838895827531815, "learning_rate": 5e-07, "loss": 0.0177, "step": 105 }, { "clip_ratio/high_max": 0.002307586451934185, "clip_ratio/high_mean": 0.0009979250862670597, "clip_ratio/low_mean": 0.0009068594572454458, "clip_ratio/low_min": 4.189024275547126e-05, "clip_ratio/region_mean": 0.0019047845271416008, "epoch": 1.0932944606413995, "grad_norm": 0.11559240520000458, "learning_rate": 5e-07, "loss": -0.0337, "step": 106 }, { "clip_ratio/high_max": 0.0022900708572706208, "clip_ratio/high_mean": 0.00104224169626832, "clip_ratio/low_mean": 0.0009164437888102839, "clip_ratio/low_min": 6.33470981483697e-05, "clip_ratio/region_mean": 0.001958685475983657, "epoch": 1.1026239067055394, "grad_norm": 0.1331067830324173, "learning_rate": 5e-07, "loss": 0.0244, "step": 107 }, { "clip_ratio/high_max": 0.00233249731536489, "clip_ratio/high_mean": 0.001049479662469821, "clip_ratio/low_mean": 0.0010218596289632842, "clip_ratio/low_min": 0.00011866063141496852, "clip_ratio/region_mean": 0.0020713393241749145, "epoch": 1.1119533527696792, "grad_norm": 0.12323485314846039, "learning_rate": 5e-07, "loss": 0.0088, "step": 108 }, { "clip_ratio/high_max": 0.0022582985839108005, "clip_ratio/high_mean": 0.0009559738355164882, "clip_ratio/low_mean": 0.0009132758223131532, "clip_ratio/low_min": 5.961625265626935e-05, "clip_ratio/region_mean": 0.0018692496523726732, "epoch": 1.1212827988338192, "grad_norm": 0.12629438936710358, "learning_rate": 5e-07, "loss": 0.005, "step": 109 }, { "clip_ratio/high_max": 0.0024561548270867206, "clip_ratio/high_mean": 0.0010402249463368207, "clip_ratio/low_mean": 0.0010177345302508911, "clip_ratio/low_min": 0.00012336498366494197, "clip_ratio/region_mean": 0.0020579594711307436, "epoch": 1.1306122448979592, "grad_norm": 0.12549607455730438, "learning_rate": 5e-07, "loss": -0.0056, "step": 110 }, { "clip_ratio/high_max": 0.0024932908709160984, "clip_ratio/high_mean": 0.001117931151384255, "clip_ratio/low_mean": 0.0008719716406631051, "clip_ratio/low_min": 4.1954988773795776e-05, "clip_ratio/region_mean": 0.0019899027756764553, "epoch": 1.1399416909620992, "grad_norm": 0.1251133680343628, "learning_rate": 5e-07, "loss": 0.0, "step": 111 }, { "clip_ratio/high_max": 0.0029504227059078403, "clip_ratio/high_mean": 0.0012048498683725484, "clip_ratio/low_mean": 0.001015726778859971, "clip_ratio/low_min": 0.00011289673057035543, "clip_ratio/region_mean": 0.002220576599938795, "epoch": 1.149271137026239, "grad_norm": 0.12310515344142914, "learning_rate": 5e-07, "loss": -0.0126, "step": 112 }, { "clip_ratio/high_max": 0.002522856098948978, "clip_ratio/high_mean": 0.0010406894507468678, "clip_ratio/low_mean": 0.0011670226340356749, "clip_ratio/low_min": 0.0001765086763043655, "clip_ratio/region_mean": 0.002207712095696479, "epoch": 1.158600583090379, "grad_norm": 0.1247100681066513, "learning_rate": 5e-07, "loss": 0.0235, "step": 113 }, { "clip_ratio/high_max": 0.0022356521149049513, "clip_ratio/high_mean": 0.0009427922923350707, "clip_ratio/low_mean": 0.0010149798272323096, "clip_ratio/low_min": 0.0001688823031145148, "clip_ratio/region_mean": 0.0019577721031964757, "epoch": 1.167930029154519, "grad_norm": 0.11946077644824982, "learning_rate": 5e-07, "loss": 0.03, "step": 114 }, { "clip_ratio/high_max": 0.0025783427045098506, "clip_ratio/high_mean": 0.0011456664979050402, "clip_ratio/low_mean": 0.0011362749828549568, "clip_ratio/low_min": 0.00016158123798959423, "clip_ratio/region_mean": 0.0022819414371042512, "epoch": 1.177259475218659, "grad_norm": 0.13808922469615936, "learning_rate": 5e-07, "loss": -0.0257, "step": 115 }, { "clip_ratio/high_max": 0.0020759690378326923, "clip_ratio/high_mean": 0.0010475745366420597, "clip_ratio/low_mean": 0.001021157280774787, "clip_ratio/low_min": 9.132805189437931e-05, "clip_ratio/region_mean": 0.002068731810140889, "epoch": 1.186588921282799, "grad_norm": 0.12475910782814026, "learning_rate": 5e-07, "loss": -0.0236, "step": 116 }, { "clip_ratio/high_max": 0.0022517901816172525, "clip_ratio/high_mean": 0.0009724043375172187, "clip_ratio/low_mean": 0.0013465041702147573, "clip_ratio/low_min": 0.0001758183971105609, "clip_ratio/region_mean": 0.0023189084458863363, "epoch": 1.1959183673469387, "grad_norm": 0.11097391694784164, "learning_rate": 5e-07, "loss": 0.0113, "step": 117 }, { "clip_ratio/high_max": 0.0020707121802843176, "clip_ratio/high_mean": 0.0009493553661741316, "clip_ratio/low_mean": 0.0010551097675488563, "clip_ratio/low_min": 6.672690506093204e-05, "clip_ratio/region_mean": 0.0020044650955242105, "epoch": 1.2052478134110787, "grad_norm": 0.11211978644132614, "learning_rate": 5e-07, "loss": 0.0072, "step": 118 }, { "clip_ratio/high_max": 0.00216411538713146, "clip_ratio/high_mean": 0.0009197963136102771, "clip_ratio/low_mean": 0.001240145316842245, "clip_ratio/low_min": 0.00022877372794027906, "clip_ratio/region_mean": 0.002159941621357575, "epoch": 1.2145772594752187, "grad_norm": 0.12565113604068756, "learning_rate": 5e-07, "loss": 0.0347, "step": 119 }, { "clip_ratio/high_max": 0.0022857633848616388, "clip_ratio/high_mean": 0.0010246565288980491, "clip_ratio/low_mean": 0.0011470295212347992, "clip_ratio/low_min": 0.0002409961971352459, "clip_ratio/region_mean": 0.0021716860646847636, "epoch": 1.2239067055393587, "grad_norm": 0.12010093778371811, "learning_rate": 5e-07, "loss": 0.0008, "step": 120 }, { "clip_ratio/high_max": 0.0022183424516697414, "clip_ratio/high_mean": 0.001045207794959424, "clip_ratio/low_mean": 0.0012124083768867422, "clip_ratio/low_min": 0.00010457054031576263, "clip_ratio/region_mean": 0.002257616135466378, "epoch": 1.2332361516034984, "grad_norm": 0.12658274173736572, "learning_rate": 5e-07, "loss": 0.0214, "step": 121 }, { "clip_ratio/high_max": 0.002132373207132332, "clip_ratio/high_mean": 0.0010345078881073277, "clip_ratio/low_mean": 0.0011389161081751809, "clip_ratio/low_min": 0.00017651768803261803, "clip_ratio/region_mean": 0.0021734240508521907, "epoch": 1.2425655976676384, "grad_norm": 0.14609114825725555, "learning_rate": 5e-07, "loss": 0.024, "step": 122 }, { "clip_ratio/high_max": 0.0023080587197910063, "clip_ratio/high_mean": 0.001030661256663734, "clip_ratio/low_mean": 0.0010234687379124807, "clip_ratio/low_min": 0.00011374608038750011, "clip_ratio/region_mean": 0.002054129996395204, "epoch": 1.2518950437317784, "grad_norm": 0.11758315563201904, "learning_rate": 5e-07, "loss": 0.0136, "step": 123 }, { "clip_ratio/high_max": 0.0020944466778018977, "clip_ratio/high_mean": 0.0009559125301166205, "clip_ratio/low_mean": 0.001189453796541784, "clip_ratio/low_min": 0.00016176121789612807, "clip_ratio/region_mean": 0.002145366306649521, "epoch": 1.2612244897959184, "grad_norm": 0.12284038215875626, "learning_rate": 5e-07, "loss": 0.0335, "step": 124 }, { "clip_ratio/high_max": 0.002425338032480795, "clip_ratio/high_mean": 0.0011915545328520238, "clip_ratio/low_mean": 0.0010088178478326881, "clip_ratio/low_min": 0.00015657668518542778, "clip_ratio/region_mean": 0.002200372407969553, "epoch": 1.2705539358600584, "grad_norm": 0.11723440140485764, "learning_rate": 5e-07, "loss": 0.001, "step": 125 }, { "clip_ratio/high_max": 0.0024598073214292526, "clip_ratio/high_mean": 0.0009697449040686479, "clip_ratio/low_mean": 0.0009691519098851131, "clip_ratio/low_min": 8.132989660225576e-05, "clip_ratio/region_mean": 0.001938896813953761, "epoch": 1.2798833819241984, "grad_norm": 0.12413505464792252, "learning_rate": 5e-07, "loss": -0.0383, "step": 126 }, { "clip_ratio/high_max": 0.0022972080842009746, "clip_ratio/high_mean": 0.0010105808905791491, "clip_ratio/low_mean": 0.0011250882835156517, "clip_ratio/low_min": 0.00011135663044115063, "clip_ratio/region_mean": 0.0021356692086555995, "epoch": 1.2892128279883381, "grad_norm": 0.11817125231027603, "learning_rate": 5e-07, "loss": 0.0221, "step": 127 }, { "clip_ratio/high_max": 0.0025600624721846543, "clip_ratio/high_mean": 0.0010133253927051555, "clip_ratio/low_mean": 0.0011239370687690098, "clip_ratio/low_min": 0.00016600637172814459, "clip_ratio/region_mean": 0.0021372624614741653, "epoch": 1.2985422740524781, "grad_norm": 0.13495278358459473, "learning_rate": 5e-07, "loss": 0.0375, "step": 128 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01611328125, "completions/max_length": 4096.0, "completions/max_terminated_length": 4070.0, "completions/mean_length": 626.5213623046875, "completions/mean_terminated_length": 569.7011108398438, "completions/min_length": 7.0, "completions/min_terminated_length": 7.0, "epoch": 1.3078717201166181, "grad_norm": 0.12188677489757538, "learning_rate": 5e-07, "loss": 0.0095, "num_tokens": 93451318.0, "reward": 0.5400390625, "reward_std": 0.22390761971473694, "rewards/simpleverify_reward/mean": 0.5400390625, "rewards/simpleverify_reward/std": 0.49840298295021057, "step": 129 }, { "clip_ratio/high_max": 0.002532487858843524, "clip_ratio/high_mean": 0.0011036241448891815, "clip_ratio/low_mean": 0.0005548655426537152, "clip_ratio/low_min": 2.2305495804175735e-05, "clip_ratio/region_mean": 0.0016584896729909815, "epoch": 1.3172011661807579, "grad_norm": 0.12728527188301086, "learning_rate": 5e-07, "loss": -0.0031, "step": 130 }, { "clip_ratio/high_max": 0.002569200405559968, "clip_ratio/high_mean": 0.0011086292997788405, "clip_ratio/low_mean": 0.0005344564742699731, "clip_ratio/low_min": 2.8116499379393645e-05, "clip_ratio/region_mean": 0.0016430858086096123, "epoch": 1.3265306122448979, "grad_norm": 0.1286688596010208, "learning_rate": 5e-07, "loss": -0.0112, "step": 131 }, { "clip_ratio/high_max": 0.002110801222443115, "clip_ratio/high_mean": 0.0009192116049234755, "clip_ratio/low_mean": 0.0005631530239043059, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014823645979049616, "epoch": 1.3358600583090379, "grad_norm": 0.11437869817018509, "learning_rate": 5e-07, "loss": 0.0107, "step": 132 }, { "clip_ratio/high_max": 0.0023251551465364173, "clip_ratio/high_mean": 0.0009645958270994015, "clip_ratio/low_mean": 0.0006603642414120259, "clip_ratio/low_min": 1.3830493116984144e-05, "clip_ratio/region_mean": 0.0016249601103481837, "epoch": 1.3451895043731779, "grad_norm": 0.13562798500061035, "learning_rate": 5e-07, "loss": -0.0065, "step": 133 }, { "clip_ratio/high_max": 0.002163304030545987, "clip_ratio/high_mean": 0.0008661448064231081, "clip_ratio/low_mean": 0.0007303108377527678, "clip_ratio/low_min": 1.188212900160579e-05, "clip_ratio/region_mean": 0.001596455604158109, "epoch": 1.3545189504373178, "grad_norm": 0.1309930682182312, "learning_rate": 5e-07, "loss": 0.033, "step": 134 }, { "clip_ratio/high_max": 0.0024065828874881845, "clip_ratio/high_mean": 0.0009457363503315719, "clip_ratio/low_mean": 0.0006568528879142832, "clip_ratio/low_min": 3.3970902222790755e-05, "clip_ratio/region_mean": 0.001602589229150908, "epoch": 1.3638483965014578, "grad_norm": 0.12403014302253723, "learning_rate": 5e-07, "loss": 0.0139, "step": 135 }, { "clip_ratio/high_max": 0.0020852075540460646, "clip_ratio/high_mean": 0.0009127761259151157, "clip_ratio/low_mean": 0.000675465889798943, "clip_ratio/low_min": 0.00011143419214931782, "clip_ratio/region_mean": 0.0015882419756962918, "epoch": 1.3731778425655976, "grad_norm": 0.13935862481594086, "learning_rate": 5e-07, "loss": 0.0061, "step": 136 }, { "clip_ratio/high_max": 0.0026606382452882826, "clip_ratio/high_mean": 0.0010857477573154029, "clip_ratio/low_mean": 0.000763498741434887, "clip_ratio/low_min": 5.5945578424143605e-05, "clip_ratio/region_mean": 0.0018492464878363535, "epoch": 1.3825072886297376, "grad_norm": 0.1322217881679535, "learning_rate": 5e-07, "loss": -0.0182, "step": 137 }, { "clip_ratio/high_max": 0.001961110501724761, "clip_ratio/high_mean": 0.0008836971974233165, "clip_ratio/low_mean": 0.0007501952604798134, "clip_ratio/low_min": 3.5483149986248463e-05, "clip_ratio/region_mean": 0.0016338924542651512, "epoch": 1.3918367346938776, "grad_norm": 0.12374483793973923, "learning_rate": 5e-07, "loss": 0.0034, "step": 138 }, { "clip_ratio/high_max": 0.0022783313834224828, "clip_ratio/high_mean": 0.0009250023213098757, "clip_ratio/low_mean": 0.0007064866604196141, "clip_ratio/low_min": 8.234209144575289e-05, "clip_ratio/region_mean": 0.001631488987186458, "epoch": 1.4011661807580174, "grad_norm": 0.11950436979532242, "learning_rate": 5e-07, "loss": -0.01, "step": 139 }, { "clip_ratio/high_max": 0.0021020961758040357, "clip_ratio/high_mean": 0.0009205868009303231, "clip_ratio/low_mean": 0.001008117356832372, "clip_ratio/low_min": 0.00021597160775854718, "clip_ratio/region_mean": 0.0019287041432107799, "epoch": 1.4104956268221573, "grad_norm": 0.1250588297843933, "learning_rate": 5e-07, "loss": 0.0483, "step": 140 }, { "clip_ratio/high_max": 0.0022805386906838976, "clip_ratio/high_mean": 0.0009637278089940082, "clip_ratio/low_mean": 0.0008353185785381356, "clip_ratio/low_min": 0.00013504010712495074, "clip_ratio/region_mean": 0.001799046411179006, "epoch": 1.4198250728862973, "grad_norm": 0.11921362578868866, "learning_rate": 5e-07, "loss": -0.0238, "step": 141 }, { "clip_ratio/high_max": 0.0021624194414471276, "clip_ratio/high_mean": 0.0010853397670871345, "clip_ratio/low_mean": 0.0008373549098905642, "clip_ratio/low_min": 4.1179018808179535e-05, "clip_ratio/region_mean": 0.0019226946897106245, "epoch": 1.4291545189504373, "grad_norm": 0.11101339757442474, "learning_rate": 5e-07, "loss": -0.0166, "step": 142 }, { "clip_ratio/high_max": 0.002537464053602889, "clip_ratio/high_mean": 0.001059767562765046, "clip_ratio/low_mean": 0.000989727295745979, "clip_ratio/low_min": 0.00011314626226521796, "clip_ratio/region_mean": 0.002049494876700919, "epoch": 1.4384839650145773, "grad_norm": 0.13162517547607422, "learning_rate": 5e-07, "loss": 0.0344, "step": 143 }, { "clip_ratio/high_max": 0.0020182182925054803, "clip_ratio/high_mean": 0.0009622107318136841, "clip_ratio/low_mean": 0.0008913901001506019, "clip_ratio/low_min": 7.711326270509744e-05, "clip_ratio/region_mean": 0.0018536007992224768, "epoch": 1.4478134110787173, "grad_norm": 0.12178914994001389, "learning_rate": 5e-07, "loss": -0.0141, "step": 144 }, { "clip_ratio/high_max": 0.002082120096019935, "clip_ratio/high_mean": 0.0009789334944798611, "clip_ratio/low_mean": 0.0009756877843756229, "clip_ratio/low_min": 9.88371921266662e-05, "clip_ratio/region_mean": 0.001954621293407399, "epoch": 1.457142857142857, "grad_norm": 0.11724556982517242, "learning_rate": 5e-07, "loss": -0.015, "step": 145 }, { "clip_ratio/high_max": 0.002136902119673323, "clip_ratio/high_mean": 0.0009376755897392286, "clip_ratio/low_mean": 0.0010446493251947686, "clip_ratio/low_min": 0.0001265414357476402, "clip_ratio/region_mean": 0.001982324931304902, "epoch": 1.466472303206997, "grad_norm": 0.11640371382236481, "learning_rate": 5e-07, "loss": 0.0233, "step": 146 }, { "clip_ratio/high_max": 0.0019896279118256643, "clip_ratio/high_mean": 0.0010162624093936756, "clip_ratio/low_mean": 0.0010304217739758315, "clip_ratio/low_min": 0.0001446172509531607, "clip_ratio/region_mean": 0.0020466841706365813, "epoch": 1.475801749271137, "grad_norm": 0.20054496824741364, "learning_rate": 5e-07, "loss": -0.0191, "step": 147 }, { "clip_ratio/high_max": 0.002186839505156968, "clip_ratio/high_mean": 0.0009855791704467265, "clip_ratio/low_mean": 0.001048430407536216, "clip_ratio/low_min": 0.0001502010036347201, "clip_ratio/region_mean": 0.002034009543422144, "epoch": 1.485131195335277, "grad_norm": 0.11376728117465973, "learning_rate": 5e-07, "loss": 0.0146, "step": 148 }, { "clip_ratio/high_max": 0.002394461742369458, "clip_ratio/high_mean": 0.0010685073502827436, "clip_ratio/low_mean": 0.0010939896128547844, "clip_ratio/low_min": 0.0001430833699487266, "clip_ratio/region_mean": 0.002162496981327422, "epoch": 1.4944606413994168, "grad_norm": 0.12821941077709198, "learning_rate": 5e-07, "loss": -0.0191, "step": 149 }, { "clip_ratio/high_max": 0.002413906760921236, "clip_ratio/high_mean": 0.0009416844641236821, "clip_ratio/low_mean": 0.0010388289683760377, "clip_ratio/low_min": 0.00015204520695988322, "clip_ratio/region_mean": 0.0019805134143098257, "epoch": 1.5037900874635568, "grad_norm": 0.11314080655574799, "learning_rate": 5e-07, "loss": 0.0061, "step": 150 }, { "clip_ratio/high_max": 0.0022575389011763036, "clip_ratio/high_mean": 0.0010119196922460105, "clip_ratio/low_mean": 0.0010828322610905161, "clip_ratio/low_min": 3.578233008738607e-05, "clip_ratio/region_mean": 0.0020947520170011558, "epoch": 1.5131195335276968, "grad_norm": 0.10506724566221237, "learning_rate": 5e-07, "loss": -0.0089, "step": 151 }, { "clip_ratio/high_max": 0.002319701001397334, "clip_ratio/high_mean": 0.0010348069572501117, "clip_ratio/low_mean": 0.0012161816412117332, "clip_ratio/low_min": 0.00017424348516215105, "clip_ratio/region_mean": 0.002250988662126474, "epoch": 1.5224489795918368, "grad_norm": 0.12333559989929199, "learning_rate": 5e-07, "loss": 0.0077, "step": 152 }, { "clip_ratio/high_max": 0.0018854673398891464, "clip_ratio/high_mean": 0.0008790300144028151, "clip_ratio/low_mean": 0.0011067713203374296, "clip_ratio/low_min": 0.00017525150451547233, "clip_ratio/region_mean": 0.001985801303817425, "epoch": 1.5317784256559768, "grad_norm": 0.11541402339935303, "learning_rate": 5e-07, "loss": 0.0342, "step": 153 }, { "clip_ratio/high_max": 0.002062065490463283, "clip_ratio/high_mean": 0.0008848087672959082, "clip_ratio/low_mean": 0.0011026721840607934, "clip_ratio/low_min": 0.00011142879884573631, "clip_ratio/region_mean": 0.001987480944080744, "epoch": 1.5411078717201168, "grad_norm": 0.11278820782899857, "learning_rate": 5e-07, "loss": 0.0337, "step": 154 }, { "clip_ratio/high_max": 0.0021580464162980206, "clip_ratio/high_mean": 0.0008849371624819469, "clip_ratio/low_mean": 0.0010856948792934418, "clip_ratio/low_min": 2.0791749193449505e-05, "clip_ratio/region_mean": 0.001970632052689325, "epoch": 1.5504373177842565, "grad_norm": 0.12022992968559265, "learning_rate": 5e-07, "loss": -0.002, "step": 155 }, { "clip_ratio/high_max": 0.002086007909383625, "clip_ratio/high_mean": 0.0009156819942290895, "clip_ratio/low_mean": 0.0010876557244046126, "clip_ratio/low_min": 5.3162744734436274e-05, "clip_ratio/region_mean": 0.002003337736823596, "epoch": 1.5597667638483965, "grad_norm": 0.12132801115512848, "learning_rate": 5e-07, "loss": 0.0225, "step": 156 }, { "clip_ratio/high_max": 0.002115488488925621, "clip_ratio/high_mean": 0.0009265114786103368, "clip_ratio/low_mean": 0.0009065129015652929, "clip_ratio/low_min": 9.179687549476512e-05, "clip_ratio/region_mean": 0.0018330243474338204, "epoch": 1.5690962099125363, "grad_norm": 0.12165207415819168, "learning_rate": 5e-07, "loss": -0.0265, "step": 157 }, { "clip_ratio/high_max": 0.0020164075831416994, "clip_ratio/high_mean": 0.0008815044457151089, "clip_ratio/low_mean": 0.0010422992563690059, "clip_ratio/low_min": 0.00019148907722410513, "clip_ratio/region_mean": 0.0019238036693423055, "epoch": 1.5784256559766763, "grad_norm": 0.12352567911148071, "learning_rate": 5e-07, "loss": 0.015, "step": 158 }, { "clip_ratio/high_max": 0.0021855336235603318, "clip_ratio/high_mean": 0.0009903410045808414, "clip_ratio/low_mean": 0.0009823391847021412, "clip_ratio/low_min": 5.57340190425748e-05, "clip_ratio/region_mean": 0.0019726801983779296, "epoch": 1.5877551020408163, "grad_norm": 0.12179430574178696, "learning_rate": 5e-07, "loss": -0.0137, "step": 159 }, { "clip_ratio/high_max": 0.002193670574342832, "clip_ratio/high_mean": 0.0009044589387485757, "clip_ratio/low_mean": 0.0010512249282328412, "clip_ratio/low_min": 0.00018381403242528904, "clip_ratio/region_mean": 0.0019556838233256713, "epoch": 1.5970845481049563, "grad_norm": 0.12712442874908447, "learning_rate": 5e-07, "loss": 0.0427, "step": 160 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.017194475446428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 4075.0, "completions/mean_length": 632.1343994140625, "completions/mean_terminated_length": 571.5330200195312, "completions/min_length": 3.0, "completions/min_terminated_length": 3.0, "epoch": 1.6064139941690962, "grad_norm": 0.13164669275283813, "learning_rate": 5e-07, "loss": -0.0406, "num_tokens": 112414731.0, "reward": 0.5465262532234192, "reward_std": 0.22409099340438843, "rewards/simpleverify_reward/mean": 0.5465262532234192, "rewards/simpleverify_reward/std": 0.4978393018245697, "step": 161 }, { "clip_ratio/high_max": 0.0021155449067009613, "clip_ratio/high_mean": 0.0009557815465086605, "clip_ratio/low_mean": 0.0005093838472021162, "clip_ratio/low_min": 3.6188685953675304e-05, "clip_ratio/region_mean": 0.0014651653873443138, "epoch": 1.6157434402332362, "grad_norm": 0.11821571737527847, "learning_rate": 5e-07, "loss": 0.0033, "step": 162 }, { "clip_ratio/high_max": 0.0022329404673655517, "clip_ratio/high_mean": 0.0009242719679605216, "clip_ratio/low_mean": 0.0006075669989513699, "clip_ratio/low_min": 1.0296540494891815e-05, "clip_ratio/region_mean": 0.001531838974187849, "epoch": 1.6250728862973762, "grad_norm": 0.12656114995479584, "learning_rate": 5e-07, "loss": -0.0003, "step": 163 }, { "clip_ratio/high_max": 0.0023432328598573804, "clip_ratio/high_mean": 0.0010028715078078676, "clip_ratio/low_mean": 0.0005758390034316108, "clip_ratio/low_min": 4.870684551860904e-05, "clip_ratio/region_mean": 0.0015787105148774572, "epoch": 1.634402332361516, "grad_norm": 0.1313450187444687, "learning_rate": 5e-07, "loss": -0.0222, "step": 164 }, { "clip_ratio/high_max": 0.002295366626640316, "clip_ratio/high_mean": 0.0009485942791798152, "clip_ratio/low_mean": 0.0005887915976927616, "clip_ratio/low_min": 6.951588420633925e-05, "clip_ratio/region_mean": 0.0015373858987004496, "epoch": 1.643731778425656, "grad_norm": 0.13072460889816284, "learning_rate": 5e-07, "loss": -0.0039, "step": 165 }, { "clip_ratio/high_max": 0.0022207050205906853, "clip_ratio/high_mean": 0.0010448156244819984, "clip_ratio/low_mean": 0.0005846115154781728, "clip_ratio/low_min": 1.3730228602071293e-05, "clip_ratio/region_mean": 0.0016294271408696659, "epoch": 1.6530612244897958, "grad_norm": 0.12400466203689575, "learning_rate": 5e-07, "loss": -0.0291, "step": 166 }, { "clip_ratio/high_max": 0.001972455058421474, "clip_ratio/high_mean": 0.000894544604307157, "clip_ratio/low_mean": 0.0006051632972230436, "clip_ratio/low_min": 6.353945445880527e-05, "clip_ratio/region_mean": 0.001499707930634031, "epoch": 1.6623906705539357, "grad_norm": 0.12252423912286758, "learning_rate": 5e-07, "loss": 0.0363, "step": 167 }, { "clip_ratio/high_max": 0.002044535973254824, "clip_ratio/high_mean": 0.0008333008318004431, "clip_ratio/low_mean": 0.0007062842159939464, "clip_ratio/low_min": 6.15112621744629e-05, "clip_ratio/region_mean": 0.0015395850641652942, "epoch": 1.6717201166180757, "grad_norm": 0.1125316396355629, "learning_rate": 5e-07, "loss": 0.0365, "step": 168 }, { "clip_ratio/high_max": 0.0021485978249984328, "clip_ratio/high_mean": 0.000931831606067135, "clip_ratio/low_mean": 0.0006716513071296504, "clip_ratio/low_min": 6.542694973177277e-05, "clip_ratio/region_mean": 0.0016034829313866794, "epoch": 1.6810495626822157, "grad_norm": 0.11953836679458618, "learning_rate": 5e-07, "loss": 0.016, "step": 169 }, { "clip_ratio/high_max": 0.0021844438706466462, "clip_ratio/high_mean": 0.001000118729280075, "clip_ratio/low_mean": 0.0006926498044776963, "clip_ratio/low_min": 2.8072991881344933e-05, "clip_ratio/region_mean": 0.001692768513748888, "epoch": 1.6903790087463557, "grad_norm": 0.12566804885864258, "learning_rate": 5e-07, "loss": -0.004, "step": 170 }, { "clip_ratio/high_max": 0.002026978188951034, "clip_ratio/high_mean": 0.0010106535883096512, "clip_ratio/low_mean": 0.0007393034820779576, "clip_ratio/low_min": 6.54607229080284e-05, "clip_ratio/region_mean": 0.0017499570531072095, "epoch": 1.6997084548104957, "grad_norm": 0.11672554910182953, "learning_rate": 5e-07, "loss": 0.0393, "step": 171 }, { "clip_ratio/high_max": 0.0022237565026443917, "clip_ratio/high_mean": 0.001044314378304989, "clip_ratio/low_mean": 0.0007149055800255155, "clip_ratio/low_min": 7.136967906262726e-05, "clip_ratio/region_mean": 0.0017592199728824198, "epoch": 1.7090379008746357, "grad_norm": 0.13103492558002472, "learning_rate": 5e-07, "loss": -0.007, "step": 172 }, { "clip_ratio/high_max": 0.0020723925554193556, "clip_ratio/high_mean": 0.0008898755859263474, "clip_ratio/low_mean": 0.000817124753666576, "clip_ratio/low_min": 9.031708395923488e-05, "clip_ratio/region_mean": 0.0017070003232220188, "epoch": 1.7183673469387755, "grad_norm": 0.12058312445878983, "learning_rate": 5e-07, "loss": -0.0184, "step": 173 }, { "clip_ratio/high_max": 0.002287717034050729, "clip_ratio/high_mean": 0.0009946338177542202, "clip_ratio/low_mean": 0.0009351614280603826, "clip_ratio/low_min": 0.00013694400422537, "clip_ratio/region_mean": 0.0019297952458146028, "epoch": 1.7276967930029155, "grad_norm": 0.12123372405767441, "learning_rate": 5e-07, "loss": 0.0115, "step": 174 }, { "clip_ratio/high_max": 0.0023684725529165007, "clip_ratio/high_mean": 0.0009593802751624025, "clip_ratio/low_mean": 0.0008536418336007046, "clip_ratio/low_min": 4.0952990275400225e-05, "clip_ratio/region_mean": 0.0018130220851162449, "epoch": 1.7370262390670554, "grad_norm": 0.11936067044734955, "learning_rate": 5e-07, "loss": 0.013, "step": 175 }, { "clip_ratio/high_max": 0.002620673600176815, "clip_ratio/high_mean": 0.0009418849767826032, "clip_ratio/low_mean": 0.0009188011881633429, "clip_ratio/low_min": 6.07050469625392e-05, "clip_ratio/region_mean": 0.0018606861704029143, "epoch": 1.7463556851311952, "grad_norm": 0.1250162273645401, "learning_rate": 5e-07, "loss": 0.0169, "step": 176 }, { "clip_ratio/high_max": 0.002387378001003526, "clip_ratio/high_mean": 0.0010187762309215032, "clip_ratio/low_mean": 0.0009778692983672954, "clip_ratio/low_min": 6.785435653000604e-05, "clip_ratio/region_mean": 0.0019966455074609257, "epoch": 1.7556851311953352, "grad_norm": 0.12887752056121826, "learning_rate": 5e-07, "loss": 0.0088, "step": 177 }, { "clip_ratio/high_max": 0.002199211281549651, "clip_ratio/high_mean": 0.0009228738235833589, "clip_ratio/low_mean": 0.0009483404428465292, "clip_ratio/low_min": 5.106076605443377e-05, "clip_ratio/region_mean": 0.001871214270067867, "epoch": 1.7650145772594752, "grad_norm": 0.12798890471458435, "learning_rate": 5e-07, "loss": -0.0112, "step": 178 }, { "clip_ratio/high_max": 0.0019266923009126913, "clip_ratio/high_mean": 0.0009169851946353447, "clip_ratio/low_mean": 0.0010008659282902954, "clip_ratio/low_min": 7.834922780602938e-05, "clip_ratio/region_mean": 0.0019178511211066507, "epoch": 1.7743440233236152, "grad_norm": 0.11853598803281784, "learning_rate": 5e-07, "loss": 0.0142, "step": 179 }, { "clip_ratio/high_max": 0.0026814728043973446, "clip_ratio/high_mean": 0.0011250865245528985, "clip_ratio/low_mean": 0.0009183116944768699, "clip_ratio/low_min": 7.348589952016482e-05, "clip_ratio/region_mean": 0.0020433982135728, "epoch": 1.7836734693877552, "grad_norm": 0.1303117275238037, "learning_rate": 5e-07, "loss": -0.0494, "step": 180 }, { "clip_ratio/high_max": 0.002200952440034598, "clip_ratio/high_mean": 0.0009869730165519286, "clip_ratio/low_mean": 0.0009989191421482246, "clip_ratio/low_min": 5.99357681494439e-05, "clip_ratio/region_mean": 0.0019858920786646195, "epoch": 1.7930029154518952, "grad_norm": 0.12202052772045135, "learning_rate": 5e-07, "loss": -0.0076, "step": 181 }, { "clip_ratio/high_max": 0.0019073101939284243, "clip_ratio/high_mean": 0.0008760555901972111, "clip_ratio/low_mean": 0.001173174285213463, "clip_ratio/low_min": 0.00018075468142342288, "clip_ratio/region_mean": 0.0020492298281169496, "epoch": 1.802332361516035, "grad_norm": 0.12133477628231049, "learning_rate": 5e-07, "loss": 0.0526, "step": 182 }, { "clip_ratio/high_max": 0.0020933677114953753, "clip_ratio/high_mean": 0.0009629364285501651, "clip_ratio/low_mean": 0.0010246749934594845, "clip_ratio/low_min": 0.000179915594344493, "clip_ratio/region_mean": 0.0019876114383805543, "epoch": 1.811661807580175, "grad_norm": 0.12155276536941528, "learning_rate": 5e-07, "loss": -0.0099, "step": 183 }, { "clip_ratio/high_max": 0.00239091640105471, "clip_ratio/high_mean": 0.0009812059015530394, "clip_ratio/low_mean": 0.0011000458034686744, "clip_ratio/low_min": 0.00012733397397823865, "clip_ratio/region_mean": 0.0020812516886508092, "epoch": 1.820991253644315, "grad_norm": 0.12019948661327362, "learning_rate": 5e-07, "loss": 0.0508, "step": 184 }, { "clip_ratio/high_max": 0.0019842361980408896, "clip_ratio/high_mean": 0.0008896537019609241, "clip_ratio/low_mean": 0.0010469920507603092, "clip_ratio/low_min": 0.0001126598153859959, "clip_ratio/region_mean": 0.0019366457490832545, "epoch": 1.8303206997084547, "grad_norm": 0.1159803569316864, "learning_rate": 5e-07, "loss": -0.0313, "step": 185 }, { "clip_ratio/high_max": 0.0028051144763594493, "clip_ratio/high_mean": 0.0011272086412645876, "clip_ratio/low_mean": 0.0010457790340296924, "clip_ratio/low_min": 0.0001264070788238314, "clip_ratio/region_mean": 0.0021729877043981105, "epoch": 1.8396501457725947, "grad_norm": 0.1263759583234787, "learning_rate": 5e-07, "loss": -0.0085, "step": 186 }, { "clip_ratio/high_max": 0.0022244712527026422, "clip_ratio/high_mean": 0.0009046845661941916, "clip_ratio/low_mean": 0.0011046832551073749, "clip_ratio/low_min": 4.6719686906726565e-05, "clip_ratio/region_mean": 0.0020093678613193333, "epoch": 1.8489795918367347, "grad_norm": 0.12488023936748505, "learning_rate": 5e-07, "loss": -0.0014, "step": 187 }, { "clip_ratio/high_max": 0.0025403893741895445, "clip_ratio/high_mean": 0.0010209342472080607, "clip_ratio/low_mean": 0.001199446049213293, "clip_ratio/low_min": 0.0001260528542843531, "clip_ratio/region_mean": 0.0022203803309821524, "epoch": 1.8583090379008746, "grad_norm": 0.11962290108203888, "learning_rate": 5e-07, "loss": 0.0311, "step": 188 }, { "clip_ratio/high_max": 0.0019120309916615952, "clip_ratio/high_mean": 0.0008163550355675397, "clip_ratio/low_mean": 0.0009936506703525083, "clip_ratio/low_min": 0.00016120051895995857, "clip_ratio/region_mean": 0.0018100056986440904, "epoch": 1.8676384839650146, "grad_norm": 0.1153150349855423, "learning_rate": 5e-07, "loss": 0.0446, "step": 189 }, { "clip_ratio/high_max": 0.0020212894305586815, "clip_ratio/high_mean": 0.000999592426524032, "clip_ratio/low_mean": 0.0010861339342227438, "clip_ratio/low_min": 0.00013697767462872434, "clip_ratio/region_mean": 0.0020857263589277864, "epoch": 1.8769679300291546, "grad_norm": 0.13115379214286804, "learning_rate": 5e-07, "loss": 0.019, "step": 190 }, { "clip_ratio/high_max": 0.0026047063947771676, "clip_ratio/high_mean": 0.001052342286129715, "clip_ratio/low_mean": 0.0011089621439168695, "clip_ratio/low_min": 9.95486589090433e-05, "clip_ratio/region_mean": 0.0021613044882542454, "epoch": 1.8862973760932946, "grad_norm": 0.12084170430898666, "learning_rate": 5e-07, "loss": 0.0012, "step": 191 }, { "clip_ratio/high_max": 0.002433049747196492, "clip_ratio/high_mean": 0.0010287633231200743, "clip_ratio/low_mean": 0.0011012652976205572, "clip_ratio/low_min": 0.00019247478667239193, "clip_ratio/region_mean": 0.002130028573446907, "epoch": 1.8956268221574344, "grad_norm": 0.1289254128932953, "learning_rate": 5e-07, "loss": -0.028, "step": 192 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0169154575892857, "completions/max_length": 4096.0, "completions/max_terminated_length": 4061.0, "completions/mean_length": 627.1741333007812, "completions/mean_terminated_length": 567.4876708984375, "completions/min_length": 3.0, "completions/min_terminated_length": 3.0, "epoch": 2.00932944606414, "grad_norm": 0.13775061070919037, "learning_rate": 5e-07, "loss": 0.0272, "num_tokens": 131258154.0, "reward": 0.5625, "reward_std": 0.20682105422019958, "rewards/simpleverify_reward/mean": 0.5625, "rewards/simpleverify_reward/std": 0.49608704447746277, "step": 193 }, { "clip_ratio/high_max": 0.002229022364190314, "clip_ratio/high_mean": 0.0009863105115073267, "clip_ratio/low_mean": 0.0005693547727787518, "clip_ratio/low_min": 6.933332406333648e-05, "clip_ratio/region_mean": 0.0015556652906525414, "epoch": 2.01865889212828, "grad_norm": 0.11476285010576248, "learning_rate": 5e-07, "loss": -0.04, "step": 194 }, { "clip_ratio/high_max": 0.0021732711866206955, "clip_ratio/high_mean": 0.0008677530840941472, "clip_ratio/low_mean": 0.0006152136866148794, "clip_ratio/low_min": 2.911266710725613e-05, "clip_ratio/region_mean": 0.0014829668107267935, "epoch": 2.02798833819242, "grad_norm": 0.1231580451130867, "learning_rate": 5e-07, "loss": 0.028, "step": 195 }, { "clip_ratio/high_max": 0.0018424815752950963, "clip_ratio/high_mean": 0.0008145036626956426, "clip_ratio/low_mean": 0.0006493221135315252, "clip_ratio/low_min": 3.894728251907509e-05, "clip_ratio/region_mean": 0.0014638257598562632, "epoch": 2.03731778425656, "grad_norm": 0.12309110909700394, "learning_rate": 5e-07, "loss": 0.0502, "step": 196 }, { "clip_ratio/high_max": 0.0026809979608515278, "clip_ratio/high_mean": 0.0009863548948487733, "clip_ratio/low_mean": 0.0007274754061654676, "clip_ratio/low_min": 3.765253131859936e-05, "clip_ratio/region_mean": 0.0017138303082901984, "epoch": 2.0466472303206995, "grad_norm": 0.1191527396440506, "learning_rate": 5e-07, "loss": -0.0111, "step": 197 }, { "clip_ratio/high_max": 0.0017816316176322289, "clip_ratio/high_mean": 0.0009079005467356183, "clip_ratio/low_mean": 0.00048239075658784714, "clip_ratio/low_min": 2.6910023734671995e-05, "clip_ratio/region_mean": 0.0013902913415222429, "epoch": 2.0559766763848395, "grad_norm": 0.12625813484191895, "learning_rate": 5e-07, "loss": -0.0131, "step": 198 }, { "clip_ratio/high_max": 0.0022486945963464677, "clip_ratio/high_mean": 0.0008553372317692265, "clip_ratio/low_mean": 0.0007179094282037113, "clip_ratio/low_min": 5.971540031168843e-05, "clip_ratio/region_mean": 0.0015732466708868742, "epoch": 2.0653061224489795, "grad_norm": 0.13229277729988098, "learning_rate": 5e-07, "loss": 0.0432, "step": 199 }, { "clip_ratio/high_max": 0.002375375261181034, "clip_ratio/high_mean": 0.0009779949541552924, "clip_ratio/low_mean": 0.0005284197841319838, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001506414781033527, "epoch": 2.0746355685131195, "grad_norm": 0.12849822640419006, "learning_rate": 5e-07, "loss": -0.0289, "step": 200 }, { "clip_ratio/high_max": 0.0019767363082792144, "clip_ratio/high_mean": 0.0008118527821352473, "clip_ratio/low_mean": 0.0006840115056547802, "clip_ratio/low_min": 4.534612708084751e-05, "clip_ratio/region_mean": 0.0014958642823330592, "epoch": 2.0839650145772595, "grad_norm": 0.12380823493003845, "learning_rate": 5e-07, "loss": 0.0101, "step": 201 }, { "clip_ratio/high_max": 0.00220859899854986, "clip_ratio/high_mean": 0.000870908101205714, "clip_ratio/low_mean": 0.0007129778587113833, "clip_ratio/low_min": 6.808996840845793e-05, "clip_ratio/region_mean": 0.0015838859690120444, "epoch": 2.0932944606413995, "grad_norm": 0.12166927009820938, "learning_rate": 5e-07, "loss": 0.0164, "step": 202 }, { "clip_ratio/high_max": 0.0020893482433166355, "clip_ratio/high_mean": 0.000807605681984569, "clip_ratio/low_mean": 0.0006847439290140755, "clip_ratio/low_min": 3.447315884841373e-05, "clip_ratio/region_mean": 0.001492349631007528, "epoch": 2.1026239067055394, "grad_norm": 0.10874878615140915, "learning_rate": 5e-07, "loss": -0.0114, "step": 203 }, { "clip_ratio/high_max": 0.0019901734849554487, "clip_ratio/high_mean": 0.000850892864036723, "clip_ratio/low_mean": 0.0006782453638152219, "clip_ratio/low_min": 1.6525647879461758e-05, "clip_ratio/region_mean": 0.0015291382442228496, "epoch": 2.1119533527696794, "grad_norm": 0.11096864193677902, "learning_rate": 5e-07, "loss": -0.0209, "step": 204 }, { "clip_ratio/high_max": 0.002087655822833767, "clip_ratio/high_mean": 0.0009059525582415517, "clip_ratio/low_mean": 0.0007051446573314024, "clip_ratio/low_min": 6.609262891288381e-05, "clip_ratio/region_mean": 0.0016110972137539648, "epoch": 2.1212827988338194, "grad_norm": 0.11903106421232224, "learning_rate": 5e-07, "loss": 0.0062, "step": 205 }, { "clip_ratio/high_max": 0.0023755195652483962, "clip_ratio/high_mean": 0.0009710347112559248, "clip_ratio/low_mean": 0.0008937190495998948, "clip_ratio/low_min": 0.00016638330362184206, "clip_ratio/region_mean": 0.0018647537435754202, "epoch": 2.130612244897959, "grad_norm": 0.12437862157821655, "learning_rate": 5e-07, "loss": 0.0177, "step": 206 }, { "clip_ratio/high_max": 0.0020000168988190126, "clip_ratio/high_mean": 0.0008507368384016445, "clip_ratio/low_mean": 0.0008329663432959933, "clip_ratio/low_min": 5.5204912314366084e-05, "clip_ratio/region_mean": 0.0016837031580507755, "epoch": 2.139941690962099, "grad_norm": 0.11857423931360245, "learning_rate": 5e-07, "loss": 0.0346, "step": 207 }, { "clip_ratio/high_max": 0.002171499239921104, "clip_ratio/high_mean": 0.0008554183441447094, "clip_ratio/low_mean": 0.0007344380210270174, "clip_ratio/low_min": 9.746144132805057e-05, "clip_ratio/region_mean": 0.0015898563724476844, "epoch": 2.149271137026239, "grad_norm": 0.10843094438314438, "learning_rate": 5e-07, "loss": -0.0217, "step": 208 }, { "clip_ratio/high_max": 0.0026064330886583775, "clip_ratio/high_mean": 0.0010858900168386754, "clip_ratio/low_mean": 0.0008511864434694871, "clip_ratio/low_min": 1.6391293684137054e-05, "clip_ratio/region_mean": 0.0019370764421182685, "epoch": 2.158600583090379, "grad_norm": 0.12493105232715607, "learning_rate": 5e-07, "loss": -0.0195, "step": 209 }, { "clip_ratio/high_max": 0.002184925753681455, "clip_ratio/high_mean": 0.0009724825613375288, "clip_ratio/low_mean": 0.0009151926715276204, "clip_ratio/low_min": 1.752172647684347e-05, "clip_ratio/region_mean": 0.0018876752510550432, "epoch": 2.167930029154519, "grad_norm": 0.12322812527418137, "learning_rate": 5e-07, "loss": 0.0212, "step": 210 }, { "clip_ratio/high_max": 0.0020377628461574204, "clip_ratio/high_mean": 0.0007814385971869342, "clip_ratio/low_mean": 0.0008634301702841185, "clip_ratio/low_min": 4.9621728066995274e-05, "clip_ratio/region_mean": 0.0016448687674710527, "epoch": 2.177259475218659, "grad_norm": 0.11850560456514359, "learning_rate": 5e-07, "loss": 0.0264, "step": 211 }, { "clip_ratio/high_max": 0.002252709797176067, "clip_ratio/high_mean": 0.0009210860789607977, "clip_ratio/low_mean": 0.0008786661619524239, "clip_ratio/low_min": 9.591357957106084e-05, "clip_ratio/region_mean": 0.0017997522227233276, "epoch": 2.186588921282799, "grad_norm": 0.13405467569828033, "learning_rate": 5e-07, "loss": 0.0059, "step": 212 }, { "clip_ratio/high_max": 0.002043398439127486, "clip_ratio/high_mean": 0.0008526231795258354, "clip_ratio/low_mean": 0.0009018258497235365, "clip_ratio/low_min": 4.1075159060710575e-05, "clip_ratio/region_mean": 0.0017544490183354355, "epoch": 2.195918367346939, "grad_norm": 0.11684154719114304, "learning_rate": 5e-07, "loss": -0.0182, "step": 213 }, { "clip_ratio/high_max": 0.001940479654876981, "clip_ratio/high_mean": 0.0008909875396057032, "clip_ratio/low_mean": 0.0007566111180494772, "clip_ratio/low_min": 5.765316109318519e-05, "clip_ratio/region_mean": 0.0016475986340083182, "epoch": 2.205247813411079, "grad_norm": 0.11965250223875046, "learning_rate": 5e-07, "loss": -0.0044, "step": 214 }, { "clip_ratio/high_max": 0.0021004133304813877, "clip_ratio/high_mean": 0.0007997016309673199, "clip_ratio/low_mean": 0.0010004325176851125, "clip_ratio/low_min": 0.00012600076297530904, "clip_ratio/region_mean": 0.001800134195946157, "epoch": 2.2145772594752184, "grad_norm": 0.11486749351024628, "learning_rate": 5e-07, "loss": 0.034, "step": 215 }, { "clip_ratio/high_max": 0.0023985433581401594, "clip_ratio/high_mean": 0.0008992287530418253, "clip_ratio/low_mean": 0.0009571204427629709, "clip_ratio/low_min": 0.00015878679187153466, "clip_ratio/region_mean": 0.0018563491612439975, "epoch": 2.2239067055393584, "grad_norm": 0.11359333992004395, "learning_rate": 5e-07, "loss": 0.0147, "step": 216 }, { "clip_ratio/high_max": 0.0022533879564434756, "clip_ratio/high_mean": 0.0009853052542894147, "clip_ratio/low_mean": 0.0008391646824748022, "clip_ratio/low_min": 1.2810001862817444e-05, "clip_ratio/region_mean": 0.0018244699531351216, "epoch": 2.2332361516034984, "grad_norm": 0.11455614119768143, "learning_rate": 5e-07, "loss": -0.0319, "step": 217 }, { "clip_ratio/high_max": 0.002313061304448638, "clip_ratio/high_mean": 0.0009640058342483826, "clip_ratio/low_mean": 0.0008587654356233543, "clip_ratio/low_min": 0.00010018447210313752, "clip_ratio/region_mean": 0.0018227712935185991, "epoch": 2.2425655976676384, "grad_norm": 0.11951214075088501, "learning_rate": 5e-07, "loss": 0.0009, "step": 218 }, { "clip_ratio/high_max": 0.002272227873618249, "clip_ratio/high_mean": 0.0010316048719687387, "clip_ratio/low_mean": 0.0009271391336369561, "clip_ratio/low_min": 8.370924479095265e-05, "clip_ratio/region_mean": 0.0019587440256145783, "epoch": 2.2518950437317784, "grad_norm": 0.11467453092336655, "learning_rate": 5e-07, "loss": -0.024, "step": 219 }, { "clip_ratio/high_max": 0.0022254415453062393, "clip_ratio/high_mean": 0.0008693968411535025, "clip_ratio/low_mean": 0.0009523302032903302, "clip_ratio/low_min": 3.0278517442638986e-05, "clip_ratio/region_mean": 0.001821727018977981, "epoch": 2.2612244897959184, "grad_norm": 0.12011631578207016, "learning_rate": 5e-07, "loss": 0.0244, "step": 220 }, { "clip_ratio/high_max": 0.0023950649410835467, "clip_ratio/high_mean": 0.001096279334888095, "clip_ratio/low_mean": 0.001002466802674462, "clip_ratio/low_min": 7.663967880944256e-05, "clip_ratio/region_mean": 0.0020987461612094194, "epoch": 2.2705539358600584, "grad_norm": 0.1340310424566269, "learning_rate": 5e-07, "loss": 0.0143, "step": 221 }, { "clip_ratio/high_max": 0.0019899350372725166, "clip_ratio/high_mean": 0.0008908587224141229, "clip_ratio/low_mean": 0.000956410180151579, "clip_ratio/low_min": 9.185300405079033e-05, "clip_ratio/region_mean": 0.0018472688825568184, "epoch": 2.2798833819241984, "grad_norm": 0.11284404993057251, "learning_rate": 5e-07, "loss": 0.0257, "step": 222 }, { "clip_ratio/high_max": 0.0021345042769098654, "clip_ratio/high_mean": 0.0009371515152452048, "clip_ratio/low_mean": 0.000967794065218186, "clip_ratio/low_min": 5.2023277021362446e-05, "clip_ratio/region_mean": 0.0019049456168431789, "epoch": 2.2892128279883384, "grad_norm": 0.12954267859458923, "learning_rate": 5e-07, "loss": 0.0186, "step": 223 }, { "clip_ratio/high_max": 0.0018915035252575763, "clip_ratio/high_mean": 0.0008314701708513894, "clip_ratio/low_mean": 0.0009672053129179403, "clip_ratio/low_min": 3.723203371919226e-05, "clip_ratio/region_mean": 0.0017986754792218562, "epoch": 2.298542274052478, "grad_norm": 0.13046307861804962, "learning_rate": 5e-07, "loss": 0.0034, "step": 224 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 4068.0, "completions/mean_length": 628.1800537109375, "completions/mean_terminated_length": 569.1365966796875, "completions/min_length": 3.0, "completions/min_terminated_length": 3.0, "epoch": 2.307871720116618, "grad_norm": 0.13134711980819702, "learning_rate": 5e-07, "loss": -0.0241, "num_tokens": 150179692.0, "reward": 0.5658482313156128, "reward_std": 0.20403511822223663, "rewards/simpleverify_reward/mean": 0.5658482313156128, "rewards/simpleverify_reward/std": 0.4956537187099457, "step": 225 }, { "clip_ratio/high_max": 0.0019468590217002202, "clip_ratio/high_mean": 0.0008631800246803323, "clip_ratio/low_mean": 0.0006505349283543183, "clip_ratio/low_min": 2.5730752895469777e-05, "clip_ratio/region_mean": 0.0015137149384827353, "epoch": 2.317201166180758, "grad_norm": 0.12197469919919968, "learning_rate": 5e-07, "loss": 0.0076, "step": 226 }, { "clip_ratio/high_max": 0.0021045406683697365, "clip_ratio/high_mean": 0.0008605964576418046, "clip_ratio/low_mean": 0.0005780401970696403, "clip_ratio/low_min": 1.4162701518216636e-05, "clip_ratio/region_mean": 0.0014386366274266038, "epoch": 2.326530612244898, "grad_norm": 0.1258629709482193, "learning_rate": 5e-07, "loss": -0.0049, "step": 227 }, { "clip_ratio/high_max": 0.0022578798671020195, "clip_ratio/high_mean": 0.0009214651472575497, "clip_ratio/low_mean": 0.0006046358830644749, "clip_ratio/low_min": 3.1508503525401466e-05, "clip_ratio/region_mean": 0.0015261010121321306, "epoch": 2.335860058309038, "grad_norm": 0.12187648564577103, "learning_rate": 5e-07, "loss": 0.0219, "step": 228 }, { "clip_ratio/high_max": 0.002285140650201356, "clip_ratio/high_mean": 0.0009679764207248809, "clip_ratio/low_mean": 0.0005572715126618277, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015252479497576132, "epoch": 2.345189504373178, "grad_norm": 0.12375791370868683, "learning_rate": 5e-07, "loss": 0.0025, "step": 229 }, { "clip_ratio/high_max": 0.0020598897244781256, "clip_ratio/high_mean": 0.0008461790384899359, "clip_ratio/low_mean": 0.000638820672975271, "clip_ratio/low_min": 9.404152478964534e-06, "clip_ratio/region_mean": 0.0014849997241981328, "epoch": 2.354518950437318, "grad_norm": 0.113538958132267, "learning_rate": 5e-07, "loss": 0.0064, "step": 230 }, { "clip_ratio/high_max": 0.0019174331646354403, "clip_ratio/high_mean": 0.0007830447357264347, "clip_ratio/low_mean": 0.0006618806910410058, "clip_ratio/low_min": 3.487723370199092e-05, "clip_ratio/region_mean": 0.0014449254304054193, "epoch": 2.363848396501458, "grad_norm": 0.11257679015398026, "learning_rate": 5e-07, "loss": -0.0102, "step": 231 }, { "clip_ratio/high_max": 0.0022329769599309657, "clip_ratio/high_mean": 0.0008192208970285719, "clip_ratio/low_mean": 0.000667771970256581, "clip_ratio/low_min": 7.396097316814121e-05, "clip_ratio/region_mean": 0.0014869928818370681, "epoch": 2.373177842565598, "grad_norm": 0.11805589497089386, "learning_rate": 5e-07, "loss": 0.0229, "step": 232 }, { "clip_ratio/high_max": 0.0020047536891070195, "clip_ratio/high_mean": 0.0008233848657255294, "clip_ratio/low_mean": 0.0007809676062606741, "clip_ratio/low_min": 5.248362504062243e-05, "clip_ratio/region_mean": 0.0016043524956330657, "epoch": 2.3825072886297374, "grad_norm": 0.12454579025506973, "learning_rate": 5e-07, "loss": 0.0125, "step": 233 }, { "clip_ratio/high_max": 0.0021490436847670935, "clip_ratio/high_mean": 0.0009364865836687386, "clip_ratio/low_mean": 0.0007699130137552856, "clip_ratio/low_min": 2.7286867407383397e-05, "clip_ratio/region_mean": 0.0017063995837816037, "epoch": 2.3918367346938774, "grad_norm": 0.1119610071182251, "learning_rate": 5e-07, "loss": 0.0393, "step": 234 }, { "clip_ratio/high_max": 0.0019709931257239077, "clip_ratio/high_mean": 0.0008370563227799721, "clip_ratio/low_mean": 0.0007531626924901502, "clip_ratio/low_min": 2.2417482796299737e-05, "clip_ratio/region_mean": 0.0015902190025371965, "epoch": 2.4011661807580174, "grad_norm": 0.12001766264438629, "learning_rate": 5e-07, "loss": 0.001, "step": 235 }, { "clip_ratio/high_max": 0.0021226400494924746, "clip_ratio/high_mean": 0.0008200508164009079, "clip_ratio/low_mean": 0.0007281558482645778, "clip_ratio/low_min": 6.69851742713945e-05, "clip_ratio/region_mean": 0.0015482066955883056, "epoch": 2.4104956268221573, "grad_norm": 0.13007567822933197, "learning_rate": 5e-07, "loss": 0.0288, "step": 236 }, { "clip_ratio/high_max": 0.0020586479986377526, "clip_ratio/high_mean": 0.0009263794036087347, "clip_ratio/low_mean": 0.0007144017963582883, "clip_ratio/low_min": 2.7860384761879686e-05, "clip_ratio/region_mean": 0.0016407811745011713, "epoch": 2.4198250728862973, "grad_norm": 0.10453750938177109, "learning_rate": 5e-07, "loss": -0.0156, "step": 237 }, { "clip_ratio/high_max": 0.002203514421125874, "clip_ratio/high_mean": 0.0008849140649545006, "clip_ratio/low_mean": 0.0006674068590655224, "clip_ratio/low_min": 4.147702657064656e-05, "clip_ratio/region_mean": 0.001552320936752949, "epoch": 2.4291545189504373, "grad_norm": 0.11704640090465546, "learning_rate": 5e-07, "loss": -0.0383, "step": 238 }, { "clip_ratio/high_max": 0.0021351744362618774, "clip_ratio/high_mean": 0.0009351320732093882, "clip_ratio/low_mean": 0.0008461579745926429, "clip_ratio/low_min": 8.746576531848405e-05, "clip_ratio/region_mean": 0.001781290047802031, "epoch": 2.4384839650145773, "grad_norm": 0.11870754510164261, "learning_rate": 5e-07, "loss": -0.0035, "step": 239 }, { "clip_ratio/high_max": 0.002885163230530452, "clip_ratio/high_mean": 0.0010167193086090265, "clip_ratio/low_mean": 0.0008019546248760889, "clip_ratio/low_min": 7.625996659044176e-05, "clip_ratio/region_mean": 0.0018186739398515783, "epoch": 2.4478134110787173, "grad_norm": 0.11866199225187302, "learning_rate": 5e-07, "loss": -0.0445, "step": 240 }, { "clip_ratio/high_max": 0.002493797852366697, "clip_ratio/high_mean": 0.001027366302878363, "clip_ratio/low_mean": 0.00080925793008646, "clip_ratio/low_min": 4.693234404840041e-05, "clip_ratio/region_mean": 0.0018366242220508866, "epoch": 2.4571428571428573, "grad_norm": 0.11172059178352356, "learning_rate": 5e-07, "loss": -0.0107, "step": 241 }, { "clip_ratio/high_max": 0.0017664682454778813, "clip_ratio/high_mean": 0.0007959799822856439, "clip_ratio/low_mean": 0.0009423257724847645, "clip_ratio/low_min": 9.527554448141018e-05, "clip_ratio/region_mean": 0.001738305771141313, "epoch": 2.466472303206997, "grad_norm": 0.12680684030056, "learning_rate": 5e-07, "loss": 0.0068, "step": 242 }, { "clip_ratio/high_max": 0.0021455689711729065, "clip_ratio/high_mean": 0.0009353133245895151, "clip_ratio/low_mean": 0.0009823809632507619, "clip_ratio/low_min": 0.00011531837390066357, "clip_ratio/region_mean": 0.001917694287840277, "epoch": 2.4758017492711373, "grad_norm": 0.12407863885164261, "learning_rate": 5e-07, "loss": 0.0151, "step": 243 }, { "clip_ratio/high_max": 0.002429618783935439, "clip_ratio/high_mean": 0.0010251744024571963, "clip_ratio/low_mean": 0.0008339414871443296, "clip_ratio/low_min": 3.0332443202496506e-05, "clip_ratio/region_mean": 0.0018591158514027484, "epoch": 2.485131195335277, "grad_norm": 0.11387648433446884, "learning_rate": 5e-07, "loss": -0.0375, "step": 244 }, { "clip_ratio/high_max": 0.00216454279870959, "clip_ratio/high_mean": 0.0008796136025921442, "clip_ratio/low_mean": 0.0009154105282505043, "clip_ratio/low_min": 8.244283253588947e-05, "clip_ratio/region_mean": 0.001795024101738818, "epoch": 2.494460641399417, "grad_norm": 0.11466012895107269, "learning_rate": 5e-07, "loss": 0.0099, "step": 245 }, { "clip_ratio/high_max": 0.002159710529667791, "clip_ratio/high_mean": 0.0009003815948744887, "clip_ratio/low_mean": 0.0009448984073969768, "clip_ratio/low_min": 2.9854310923838057e-05, "clip_ratio/region_mean": 0.0018452800213708542, "epoch": 2.503790087463557, "grad_norm": 0.13637150824069977, "learning_rate": 5e-07, "loss": 0.0463, "step": 246 }, { "clip_ratio/high_max": 0.002098587734508328, "clip_ratio/high_mean": 0.0008763448968238663, "clip_ratio/low_mean": 0.0009092434702324681, "clip_ratio/low_min": 7.323962017835584e-05, "clip_ratio/region_mean": 0.0017855883343145251, "epoch": 2.513119533527697, "grad_norm": 0.12613531947135925, "learning_rate": 5e-07, "loss": -0.0075, "step": 247 }, { "clip_ratio/high_max": 0.0020531823647615965, "clip_ratio/high_mean": 0.000872458524099784, "clip_ratio/low_mean": 0.0009210473417624598, "clip_ratio/low_min": 0.00015417218037327984, "clip_ratio/region_mean": 0.0017935058422153816, "epoch": 2.522448979591837, "grad_norm": 0.12674494087696075, "learning_rate": 5e-07, "loss": 0.0188, "step": 248 }, { "clip_ratio/high_max": 0.0024033719855651725, "clip_ratio/high_mean": 0.0009285767282563029, "clip_ratio/low_mean": 0.0009532334006507881, "clip_ratio/low_min": 8.930877629609313e-05, "clip_ratio/region_mean": 0.0018818101816577837, "epoch": 2.5317784256559768, "grad_norm": 0.12458933144807816, "learning_rate": 5e-07, "loss": 0.0264, "step": 249 }, { "clip_ratio/high_max": 0.002016237674979493, "clip_ratio/high_mean": 0.0009370575326101971, "clip_ratio/low_mean": 0.0009300682722823694, "clip_ratio/low_min": 5.7667080909595825e-05, "clip_ratio/region_mean": 0.001867125800345093, "epoch": 2.5411078717201168, "grad_norm": 0.12151386588811874, "learning_rate": 5e-07, "loss": 0.0065, "step": 250 }, { "clip_ratio/high_max": 0.0022031385960872285, "clip_ratio/high_mean": 0.000998389565211255, "clip_ratio/low_mean": 0.0009807745063881157, "clip_ratio/low_min": 7.365113742707763e-05, "clip_ratio/region_mean": 0.001979164080694318, "epoch": 2.5504373177842563, "grad_norm": 0.12449519336223602, "learning_rate": 5e-07, "loss": -0.0168, "step": 251 }, { "clip_ratio/high_max": 0.0019359301149961539, "clip_ratio/high_mean": 0.0009006101499835495, "clip_ratio/low_mean": 0.0009596726158633828, "clip_ratio/low_min": 3.7994698686816264e-05, "clip_ratio/region_mean": 0.0018602827440190595, "epoch": 2.5597667638483967, "grad_norm": 0.12011729925870895, "learning_rate": 5e-07, "loss": 0.0179, "step": 252 }, { "clip_ratio/high_max": 0.0019786344273597933, "clip_ratio/high_mean": 0.0009102276799239917, "clip_ratio/low_mean": 0.0010415639226266649, "clip_ratio/low_min": 0.00012442541083146352, "clip_ratio/region_mean": 0.0019517916152835824, "epoch": 2.5690962099125363, "grad_norm": 0.12171616405248642, "learning_rate": 5e-07, "loss": 0.0478, "step": 253 }, { "clip_ratio/high_max": 0.002204625623562606, "clip_ratio/high_mean": 0.0009647503684391268, "clip_ratio/low_mean": 0.0010178078409808222, "clip_ratio/low_min": 0.00011245799214520957, "clip_ratio/region_mean": 0.0019825581985060126, "epoch": 2.5784256559766763, "grad_norm": 0.11924804002046585, "learning_rate": 5e-07, "loss": -0.0044, "step": 254 }, { "clip_ratio/high_max": 0.002087706798192812, "clip_ratio/high_mean": 0.0008882680631359108, "clip_ratio/low_mean": 0.0009431020516785793, "clip_ratio/low_min": 6.834862506366335e-05, "clip_ratio/region_mean": 0.0018313701075385325, "epoch": 2.5877551020408163, "grad_norm": 0.11737913638353348, "learning_rate": 5e-07, "loss": 0.0124, "step": 255 }, { "clip_ratio/high_max": 0.002065118394966703, "clip_ratio/high_mean": 0.0008659984614496352, "clip_ratio/low_mean": 0.001027020229230402, "clip_ratio/low_min": 9.269145539292367e-05, "clip_ratio/region_mean": 0.0018930186779471114, "epoch": 2.5970845481049563, "grad_norm": 0.11867991834878922, "learning_rate": 5e-07, "loss": 0.0207, "step": 256 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0166015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 4087.0, "completions/mean_length": 628.7033081054688, "completions/mean_terminated_length": 570.1690063476562, "completions/min_length": 3.0, "completions/min_terminated_length": 3.0, "epoch": 2.6064139941690962, "grad_norm": 0.12945429980754852, "learning_rate": 5e-07, "loss": -0.0614, "num_tokens": 169080521.0, "reward": 0.5622209906578064, "reward_std": 0.20609794557094574, "rewards/simpleverify_reward/mean": 0.5622209906578064, "rewards/simpleverify_reward/std": 0.4961221218109131, "step": 257 }, { "clip_ratio/high_max": 0.0021116924181114882, "clip_ratio/high_mean": 0.0009661569856689312, "clip_ratio/low_mean": 0.0005576787743848399, "clip_ratio/low_min": 6.813508571212878e-05, "clip_ratio/region_mean": 0.0015238357809721492, "epoch": 2.6157434402332362, "grad_norm": 0.13068020343780518, "learning_rate": 5e-07, "loss": 0.0222, "step": 258 }, { "clip_ratio/high_max": 0.002199319926148746, "clip_ratio/high_mean": 0.0008925827205530368, "clip_ratio/low_mean": 0.0005837720709678251, "clip_ratio/low_min": 6.643380402238108e-05, "clip_ratio/region_mean": 0.0014763547806069255, "epoch": 2.6250728862973762, "grad_norm": 0.11842658370733261, "learning_rate": 5e-07, "loss": 0.0103, "step": 259 }, { "clip_ratio/high_max": 0.0018341596005484462, "clip_ratio/high_mean": 0.000920283575396752, "clip_ratio/low_mean": 0.0006871194582345197, "clip_ratio/low_min": 7.376930716418428e-05, "clip_ratio/region_mean": 0.0016074030427262187, "epoch": 2.6344023323615158, "grad_norm": 0.12123681604862213, "learning_rate": 5e-07, "loss": -0.002, "step": 260 }, { "clip_ratio/high_max": 0.002142856603313703, "clip_ratio/high_mean": 0.0008353476696356665, "clip_ratio/low_mean": 0.0005606644335784949, "clip_ratio/low_min": 2.8082626158720814e-05, "clip_ratio/region_mean": 0.0013960120995761827, "epoch": 2.643731778425656, "grad_norm": 0.11993051320314407, "learning_rate": 5e-07, "loss": 0.008, "step": 261 }, { "clip_ratio/high_max": 0.0018861395728890784, "clip_ratio/high_mean": 0.0007984005078469636, "clip_ratio/low_mean": 0.0006277632091951091, "clip_ratio/low_min": 2.5875317987811286e-05, "clip_ratio/region_mean": 0.0014261637261370197, "epoch": 2.6530612244897958, "grad_norm": 0.11381463706493378, "learning_rate": 5e-07, "loss": 0.0086, "step": 262 }, { "clip_ratio/high_max": 0.0019032063028134871, "clip_ratio/high_mean": 0.0007902567722339882, "clip_ratio/low_mean": 0.0006543948293256108, "clip_ratio/low_min": 0.00012276396046217997, "clip_ratio/region_mean": 0.0014446515815507155, "epoch": 2.6623906705539357, "grad_norm": 0.12698380649089813, "learning_rate": 5e-07, "loss": 0.0201, "step": 263 }, { "clip_ratio/high_max": 0.002125911214534426, "clip_ratio/high_mean": 0.0009353961067972705, "clip_ratio/low_mean": 0.000554675625494383, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014900717214914039, "epoch": 2.6717201166180757, "grad_norm": 0.11230649054050446, "learning_rate": 5e-07, "loss": -0.0227, "step": 264 }, { "clip_ratio/high_max": 0.001924642754602246, "clip_ratio/high_mean": 0.0008210958385461709, "clip_ratio/low_mean": 0.0006169139760459075, "clip_ratio/low_min": 1.0884708899538964e-05, "clip_ratio/region_mean": 0.0014380098291439936, "epoch": 2.6810495626822157, "grad_norm": 0.11232473701238632, "learning_rate": 5e-07, "loss": 0.0077, "step": 265 }, { "clip_ratio/high_max": 0.001963945738680195, "clip_ratio/high_mean": 0.0009041257508215494, "clip_ratio/low_mean": 0.0005949569840595359, "clip_ratio/low_min": 3.4266461625520606e-05, "clip_ratio/region_mean": 0.0014990827476140112, "epoch": 2.6903790087463557, "grad_norm": 0.12415003031492233, "learning_rate": 5e-07, "loss": -0.007, "step": 266 }, { "clip_ratio/high_max": 0.0019739881245186552, "clip_ratio/high_mean": 0.000751881047108327, "clip_ratio/low_mean": 0.0006131837189968792, "clip_ratio/low_min": 3.237749115214683e-05, "clip_ratio/region_mean": 0.0013650647997565102, "epoch": 2.6997084548104957, "grad_norm": 0.1071053296327591, "learning_rate": 5e-07, "loss": 0.023, "step": 267 }, { "clip_ratio/high_max": 0.0021954697513137944, "clip_ratio/high_mean": 0.0009412277740921127, "clip_ratio/low_mean": 0.0005869284841537592, "clip_ratio/low_min": 1.0616612598823849e-05, "clip_ratio/region_mean": 0.0015281562227755785, "epoch": 2.7090379008746357, "grad_norm": 0.12439851462841034, "learning_rate": 5e-07, "loss": -0.0216, "step": 268 }, { "clip_ratio/high_max": 0.00211604981450364, "clip_ratio/high_mean": 0.0008528916914656293, "clip_ratio/low_mean": 0.0007248500878631603, "clip_ratio/low_min": 7.69624202803243e-05, "clip_ratio/region_mean": 0.0015777417866047472, "epoch": 2.7183673469387752, "grad_norm": 0.1197730228304863, "learning_rate": 5e-07, "loss": 0.0029, "step": 269 }, { "clip_ratio/high_max": 0.0020234759722370654, "clip_ratio/high_mean": 0.0007997913544386392, "clip_ratio/low_mean": 0.0007547600598627469, "clip_ratio/low_min": 5.677217905031284e-05, "clip_ratio/region_mean": 0.0015545513670076616, "epoch": 2.7276967930029157, "grad_norm": 0.12958848476409912, "learning_rate": 5e-07, "loss": 0.012, "step": 270 }, { "clip_ratio/high_max": 0.0021548961522057652, "clip_ratio/high_mean": 0.0009424549079994904, "clip_ratio/low_mean": 0.0006421890320780221, "clip_ratio/low_min": 2.8814019060519058e-05, "clip_ratio/region_mean": 0.0015846439346205443, "epoch": 2.7370262390670552, "grad_norm": 0.13123735785484314, "learning_rate": 5e-07, "loss": -0.0355, "step": 271 }, { "clip_ratio/high_max": 0.002155321286409162, "clip_ratio/high_mean": 0.0008753391266509425, "clip_ratio/low_mean": 0.0008214292993216077, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001696768420515582, "epoch": 2.746355685131195, "grad_norm": 0.11694559454917908, "learning_rate": 5e-07, "loss": 0.0164, "step": 272 }, { "clip_ratio/high_max": 0.001943747112818528, "clip_ratio/high_mean": 0.0007658486319996882, "clip_ratio/low_mean": 0.0008908960826374823, "clip_ratio/low_min": 2.1906764231971465e-05, "clip_ratio/region_mean": 0.0016567447601119056, "epoch": 2.755685131195335, "grad_norm": 0.11748509109020233, "learning_rate": 5e-07, "loss": 0.0382, "step": 273 }, { "clip_ratio/high_max": 0.0027088678180007264, "clip_ratio/high_mean": 0.0010202166195085738, "clip_ratio/low_mean": 0.0009833215808612294, "clip_ratio/low_min": 0.00012682040323852561, "clip_ratio/region_mean": 0.002003538189455867, "epoch": 2.765014577259475, "grad_norm": 0.13397367298603058, "learning_rate": 5e-07, "loss": 0.0075, "step": 274 }, { "clip_ratio/high_max": 0.0019552414523786865, "clip_ratio/high_mean": 0.0008022522033570567, "clip_ratio/low_mean": 0.0007957420821185224, "clip_ratio/low_min": 0.00010018911598308478, "clip_ratio/region_mean": 0.001597994276380632, "epoch": 2.774344023323615, "grad_norm": 0.1218450516462326, "learning_rate": 5e-07, "loss": 0.0445, "step": 275 }, { "clip_ratio/high_max": 0.001994713424210204, "clip_ratio/high_mean": 0.0008153965609380975, "clip_ratio/low_mean": 0.000932067980102147, "clip_ratio/low_min": 8.145323681674199e-05, "clip_ratio/region_mean": 0.0017474645501351915, "epoch": 2.783673469387755, "grad_norm": 0.12842969596385956, "learning_rate": 5e-07, "loss": 0.0713, "step": 276 }, { "clip_ratio/high_max": 0.0020554786424327176, "clip_ratio/high_mean": 0.0009450267243664712, "clip_ratio/low_mean": 0.0008010900055523962, "clip_ratio/low_min": 6.634022156504216e-05, "clip_ratio/region_mean": 0.001746116755384719, "epoch": 2.793002915451895, "grad_norm": 0.1107087954878807, "learning_rate": 5e-07, "loss": -0.0005, "step": 277 }, { "clip_ratio/high_max": 0.001978798594791442, "clip_ratio/high_mean": 0.0008394950800720835, "clip_ratio/low_mean": 0.0009390484119649045, "clip_ratio/low_min": 0.00013724190102948342, "clip_ratio/region_mean": 0.0017785435156838503, "epoch": 2.8023323615160347, "grad_norm": 0.14282046258449554, "learning_rate": 5e-07, "loss": 0.0095, "step": 278 }, { "clip_ratio/high_max": 0.0019009570714842994, "clip_ratio/high_mean": 0.0008041056571528316, "clip_ratio/low_mean": 0.0009972992738767061, "clip_ratio/low_min": 5.992132173560094e-05, "clip_ratio/region_mean": 0.0018014049419434741, "epoch": 2.811661807580175, "grad_norm": 0.11873260140419006, "learning_rate": 5e-07, "loss": 0.0482, "step": 279 }, { "clip_ratio/high_max": 0.001763923530234024, "clip_ratio/high_mean": 0.000855367970871157, "clip_ratio/low_mean": 0.0009675383371359203, "clip_ratio/low_min": 9.527266047371086e-05, "clip_ratio/region_mean": 0.0018229062989121303, "epoch": 2.8209912536443147, "grad_norm": 0.12212757766246796, "learning_rate": 5e-07, "loss": 0.0301, "step": 280 }, { "clip_ratio/high_max": 0.00247984522866318, "clip_ratio/high_mean": 0.0009830775907175848, "clip_ratio/low_mean": 0.0009571606733516091, "clip_ratio/low_min": 4.076690674992278e-05, "clip_ratio/region_mean": 0.0019402383331907913, "epoch": 2.8303206997084547, "grad_norm": 0.13787741959095, "learning_rate": 5e-07, "loss": -0.025, "step": 281 }, { "clip_ratio/high_max": 0.002011908625718206, "clip_ratio/high_mean": 0.0007902748839114793, "clip_ratio/low_mean": 0.0010742945196398068, "clip_ratio/low_min": 5.466553830046905e-05, "clip_ratio/region_mean": 0.0018645694144652225, "epoch": 2.8396501457725947, "grad_norm": 0.11272086948156357, "learning_rate": 5e-07, "loss": 0.01, "step": 282 }, { "clip_ratio/high_max": 0.00209311640355736, "clip_ratio/high_mean": 0.0008656627651362214, "clip_ratio/low_mean": 0.0010540227885940112, "clip_ratio/low_min": 7.026556340861134e-05, "clip_ratio/region_mean": 0.0019196855064365081, "epoch": 2.8489795918367347, "grad_norm": 0.1348952203989029, "learning_rate": 5e-07, "loss": 0.0059, "step": 283 }, { "clip_ratio/high_max": 0.002319593302672729, "clip_ratio/high_mean": 0.0010081664604513207, "clip_ratio/low_mean": 0.0009834856064117048, "clip_ratio/low_min": 6.749444946763106e-05, "clip_ratio/region_mean": 0.0019916520614060573, "epoch": 2.8583090379008746, "grad_norm": 0.12729023396968842, "learning_rate": 5e-07, "loss": -0.0007, "step": 284 }, { "clip_ratio/high_max": 0.0021284574977471493, "clip_ratio/high_mean": 0.0008607679283159086, "clip_ratio/low_mean": 0.001003829695036984, "clip_ratio/low_min": 0.0001357672044832725, "clip_ratio/region_mean": 0.0018645976524567232, "epoch": 2.8676384839650146, "grad_norm": 0.120214082300663, "learning_rate": 5e-07, "loss": 0.0045, "step": 285 }, { "clip_ratio/high_max": 0.002114113300194731, "clip_ratio/high_mean": 0.0009031677564053098, "clip_ratio/low_mean": 0.0009032449270307552, "clip_ratio/low_min": 5.364720527722966e-05, "clip_ratio/region_mean": 0.001806412692531012, "epoch": 2.8769679300291546, "grad_norm": 0.11489567905664444, "learning_rate": 5e-07, "loss": -0.0322, "step": 286 }, { "clip_ratio/high_max": 0.0019336532059242018, "clip_ratio/high_mean": 0.0008539920600014739, "clip_ratio/low_mean": 0.0009489125441177748, "clip_ratio/low_min": 0.00010466121784702409, "clip_ratio/region_mean": 0.001802904596843291, "epoch": 2.8862973760932946, "grad_norm": 0.11608465760946274, "learning_rate": 5e-07, "loss": 0.0063, "step": 287 }, { "clip_ratio/high_max": 0.0022198269798536785, "clip_ratio/high_mean": 0.0008903400703275111, "clip_ratio/low_mean": 0.0009577496566635091, "clip_ratio/low_min": 8.819740196486237e-05, "clip_ratio/region_mean": 0.0018480897633708082, "epoch": 2.8956268221574346, "grad_norm": 0.11759108304977417, "learning_rate": 5e-07, "loss": 0.0256, "step": 288 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.020228794642857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 4090.0, "completions/mean_length": 649.066162109375, "completions/mean_terminated_length": 577.8992309570312, "completions/min_length": 3.0, "completions/min_terminated_length": 3.0, "epoch": 3.00932944606414, "grad_norm": 0.12073925137519836, "learning_rate": 5e-07, "loss": -0.0215, "num_tokens": 188215073.0, "reward": 0.5591169595718384, "reward_std": 0.19755488634109497, "rewards/simpleverify_reward/mean": 0.5591168999671936, "rewards/simpleverify_reward/std": 0.49650153517723083, "step": 289 }, { "clip_ratio/high_max": 0.001932067796587944, "clip_ratio/high_mean": 0.0008511422747687902, "clip_ratio/low_mean": 0.0005463888055601274, "clip_ratio/low_min": 2.739668889262248e-05, "clip_ratio/region_mean": 0.0013975310721434653, "epoch": 3.01865889212828, "grad_norm": 0.12279549241065979, "learning_rate": 5e-07, "loss": -0.0212, "step": 290 }, { "clip_ratio/high_max": 0.0020968365934095345, "clip_ratio/high_mean": 0.0008978242913144641, "clip_ratio/low_mean": 0.0005666115011990769, "clip_ratio/low_min": 4.4258124944462907e-05, "clip_ratio/region_mean": 0.0014644358161604032, "epoch": 3.02798833819242, "grad_norm": 0.12412568181753159, "learning_rate": 5e-07, "loss": 0.0052, "step": 291 }, { "clip_ratio/high_max": 0.0020038593720528297, "clip_ratio/high_mean": 0.0008016972642508335, "clip_ratio/low_mean": 0.0005663975807692623, "clip_ratio/low_min": 5.558447082876228e-05, "clip_ratio/region_mean": 0.0013680948395631276, "epoch": 3.03731778425656, "grad_norm": 0.12160039693117142, "learning_rate": 5e-07, "loss": 0.0081, "step": 292 }, { "clip_ratio/high_max": 0.0021619191829813644, "clip_ratio/high_mean": 0.0009066690545296296, "clip_ratio/low_mean": 0.0005332508644642076, "clip_ratio/low_min": 2.4974107873276807e-05, "clip_ratio/region_mean": 0.0014399199062609114, "epoch": 3.0466472303206995, "grad_norm": 0.13146284222602844, "learning_rate": 5e-07, "loss": 0.0135, "step": 293 }, { "clip_ratio/high_max": 0.001982688278076239, "clip_ratio/high_mean": 0.0009043691152328392, "clip_ratio/low_mean": 0.0005985553480059025, "clip_ratio/low_min": 1.8757502402877435e-05, "clip_ratio/region_mean": 0.0015029244314064272, "epoch": 3.0559766763848395, "grad_norm": 0.1237325668334961, "learning_rate": 5e-07, "loss": -0.0014, "step": 294 }, { "clip_ratio/high_max": 0.0023722867481410503, "clip_ratio/high_mean": 0.0010558086214587092, "clip_ratio/low_mean": 0.000615902602476126, "clip_ratio/low_min": 7.720088524365565e-05, "clip_ratio/region_mean": 0.0016717112448532134, "epoch": 3.0653061224489795, "grad_norm": 0.13182184100151062, "learning_rate": 5e-07, "loss": -0.0171, "step": 295 }, { "clip_ratio/high_max": 0.0018451683681632858, "clip_ratio/high_mean": 0.0007675232664041687, "clip_ratio/low_mean": 0.000621090545791958, "clip_ratio/low_min": 1.2939958651259076e-05, "clip_ratio/region_mean": 0.0013886137821828015, "epoch": 3.0746355685131195, "grad_norm": 0.11502837389707565, "learning_rate": 5e-07, "loss": 0.0031, "step": 296 }, { "clip_ratio/high_max": 0.002070607504720101, "clip_ratio/high_mean": 0.0009246872505173087, "clip_ratio/low_mean": 0.0005640955059789121, "clip_ratio/low_min": 4.8028396122390404e-05, "clip_ratio/region_mean": 0.0014887827273923904, "epoch": 3.0839650145772595, "grad_norm": 0.1119852289557457, "learning_rate": 5e-07, "loss": -0.0142, "step": 297 }, { "clip_ratio/high_max": 0.0020157887411187403, "clip_ratio/high_mean": 0.0008810577310214285, "clip_ratio/low_mean": 0.0006870632187201409, "clip_ratio/low_min": 0.00010049290904134978, "clip_ratio/region_mean": 0.0015681209624744952, "epoch": 3.0932944606413995, "grad_norm": 0.12266629189252853, "learning_rate": 5e-07, "loss": 0.0199, "step": 298 }, { "clip_ratio/high_max": 0.0018538599579187576, "clip_ratio/high_mean": 0.0007545039024989819, "clip_ratio/low_mean": 0.0006923759574419819, "clip_ratio/low_min": 2.405233863100875e-05, "clip_ratio/region_mean": 0.0014468798326561227, "epoch": 3.1026239067055394, "grad_norm": 0.11839115619659424, "learning_rate": 5e-07, "loss": 0.0524, "step": 299 }, { "clip_ratio/high_max": 0.0018061956579913385, "clip_ratio/high_mean": 0.0007741592253296403, "clip_ratio/low_mean": 0.000718145553037175, "clip_ratio/low_min": 3.803555500780931e-05, "clip_ratio/region_mean": 0.0014923048074706458, "epoch": 3.1119533527696794, "grad_norm": 0.12259431183338165, "learning_rate": 5e-07, "loss": 0.0328, "step": 300 }, { "clip_ratio/high_max": 0.001965731538803084, "clip_ratio/high_mean": 0.0008957185309554916, "clip_ratio/low_mean": 0.0006818132042099023, "clip_ratio/low_min": 5.6229790970974136e-05, "clip_ratio/region_mean": 0.0015775317660882138, "epoch": 3.1212827988338194, "grad_norm": 0.12893447279930115, "learning_rate": 5e-07, "loss": 0.0271, "step": 301 }, { "clip_ratio/high_max": 0.002465957157255616, "clip_ratio/high_mean": 0.0009220466236001812, "clip_ratio/low_mean": 0.0006877102077851305, "clip_ratio/low_min": 7.440114495693706e-05, "clip_ratio/region_mean": 0.0016097568368422799, "epoch": 3.130612244897959, "grad_norm": 0.11985207349061966, "learning_rate": 5e-07, "loss": 0.0098, "step": 302 }, { "clip_ratio/high_max": 0.001907071618916234, "clip_ratio/high_mean": 0.0008071527372521814, "clip_ratio/low_mean": 0.0007637609478479135, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015709136860095896, "epoch": 3.139941690962099, "grad_norm": 0.1271803230047226, "learning_rate": 5e-07, "loss": 0.0022, "step": 303 }, { "clip_ratio/high_max": 0.0020417480627656914, "clip_ratio/high_mean": 0.0008123940424411558, "clip_ratio/low_mean": 0.0007366665504378034, "clip_ratio/low_min": 3.1944800866767764e-05, "clip_ratio/region_mean": 0.0015490605910599697, "epoch": 3.149271137026239, "grad_norm": 0.11633644253015518, "learning_rate": 5e-07, "loss": -0.0143, "step": 304 }, { "clip_ratio/high_max": 0.0019052798888878897, "clip_ratio/high_mean": 0.000861658150824951, "clip_ratio/low_mean": 0.0006333671808533836, "clip_ratio/low_min": 3.4006779969786294e-05, "clip_ratio/region_mean": 0.0014950253498682287, "epoch": 3.158600583090379, "grad_norm": 0.11174671351909637, "learning_rate": 5e-07, "loss": -0.0312, "step": 305 }, { "clip_ratio/high_max": 0.002029651470365934, "clip_ratio/high_mean": 0.0007755979931971524, "clip_ratio/low_mean": 0.0007516840978496475, "clip_ratio/low_min": 7.103493771865033e-05, "clip_ratio/region_mean": 0.0015272820746758953, "epoch": 3.167930029154519, "grad_norm": 0.11292817443609238, "learning_rate": 5e-07, "loss": 0.0092, "step": 306 }, { "clip_ratio/high_max": 0.002041622166871093, "clip_ratio/high_mean": 0.000819533268440864, "clip_ratio/low_mean": 0.0007376010325970128, "clip_ratio/low_min": 6.587555071746465e-05, "clip_ratio/region_mean": 0.0015571343028568663, "epoch": 3.177259475218659, "grad_norm": 0.11563138663768768, "learning_rate": 5e-07, "loss": -0.0159, "step": 307 }, { "clip_ratio/high_max": 0.0023002442067081574, "clip_ratio/high_mean": 0.0009782935339899268, "clip_ratio/low_mean": 0.0009936926944646984, "clip_ratio/low_min": 4.243074181431439e-05, "clip_ratio/region_mean": 0.0019719862757483497, "epoch": 3.186588921282799, "grad_norm": 0.12208119034767151, "learning_rate": 5e-07, "loss": 0.0177, "step": 308 }, { "clip_ratio/high_max": 0.0020598779592546634, "clip_ratio/high_mean": 0.0008475941613141913, "clip_ratio/low_mean": 0.0008348036917595891, "clip_ratio/low_min": 8.5282044892665e-05, "clip_ratio/region_mean": 0.0016823978585307486, "epoch": 3.195918367346939, "grad_norm": 0.11807536333799362, "learning_rate": 5e-07, "loss": 0.0024, "step": 309 }, { "clip_ratio/high_max": 0.002026563452091068, "clip_ratio/high_mean": 0.0007712802889727755, "clip_ratio/low_mean": 0.0007655859881197102, "clip_ratio/low_min": 2.0004954421892762e-05, "clip_ratio/region_mean": 0.0015368662934633903, "epoch": 3.205247813411079, "grad_norm": 0.12426210194826126, "learning_rate": 5e-07, "loss": -0.0029, "step": 310 }, { "clip_ratio/high_max": 0.0023614576130057685, "clip_ratio/high_mean": 0.000997989161987789, "clip_ratio/low_mean": 0.0008062829692789819, "clip_ratio/low_min": 2.477700763847679e-05, "clip_ratio/region_mean": 0.001804272127628792, "epoch": 3.2145772594752184, "grad_norm": 0.1274069994688034, "learning_rate": 5e-07, "loss": -0.0275, "step": 311 }, { "clip_ratio/high_max": 0.0017960377263079863, "clip_ratio/high_mean": 0.0006821596170993871, "clip_ratio/low_mean": 0.0010744931205408648, "clip_ratio/low_min": 0.00018415184513287386, "clip_ratio/region_mean": 0.0017566527603776194, "epoch": 3.2239067055393584, "grad_norm": 0.1300588995218277, "learning_rate": 5e-07, "loss": 0.0517, "step": 312 }, { "clip_ratio/high_max": 0.0021511353697860613, "clip_ratio/high_mean": 0.0009253841344616376, "clip_ratio/low_mean": 0.0008788641225692118, "clip_ratio/low_min": 0.00012659685489779804, "clip_ratio/region_mean": 0.0018042482843156904, "epoch": 3.2332361516034984, "grad_norm": 0.12333934009075165, "learning_rate": 5e-07, "loss": -0.0057, "step": 313 }, { "clip_ratio/high_max": 0.002072837633022573, "clip_ratio/high_mean": 0.0008536732784705237, "clip_ratio/low_mean": 0.0009339607659057947, "clip_ratio/low_min": 2.905053406720981e-05, "clip_ratio/region_mean": 0.0017876340716611594, "epoch": 3.2425655976676384, "grad_norm": 0.11493325233459473, "learning_rate": 5e-07, "loss": 0.0195, "step": 314 }, { "clip_ratio/high_max": 0.0024129688172251917, "clip_ratio/high_mean": 0.0009593321592546999, "clip_ratio/low_mean": 0.0007966058019519551, "clip_ratio/low_min": 3.571251272660447e-05, "clip_ratio/region_mean": 0.0017559379557496868, "epoch": 3.2518950437317784, "grad_norm": 0.11924290657043457, "learning_rate": 5e-07, "loss": -0.0102, "step": 315 }, { "clip_ratio/high_max": 0.0019467229503788985, "clip_ratio/high_mean": 0.0008992745169962291, "clip_ratio/low_mean": 0.0010320278197468724, "clip_ratio/low_min": 3.3676209568511695e-05, "clip_ratio/region_mean": 0.0019313022930873558, "epoch": 3.2612244897959184, "grad_norm": 0.11770806461572647, "learning_rate": 5e-07, "loss": 0.0102, "step": 316 }, { "clip_ratio/high_max": 0.0021296979030012153, "clip_ratio/high_mean": 0.0009136885128100403, "clip_ratio/low_mean": 0.0008844937747198856, "clip_ratio/low_min": 5.695749587175669e-05, "clip_ratio/region_mean": 0.0017981822820729576, "epoch": 3.2705539358600584, "grad_norm": 0.12060500681400299, "learning_rate": 5e-07, "loss": -0.0332, "step": 317 }, { "clip_ratio/high_max": 0.0022328115155687556, "clip_ratio/high_mean": 0.0009959243470802903, "clip_ratio/low_mean": 0.0007299170629266882, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0017258413790841587, "epoch": 3.2798833819241984, "grad_norm": 0.11766491085290909, "learning_rate": 5e-07, "loss": -0.0434, "step": 318 }, { "clip_ratio/high_max": 0.0023133463255362585, "clip_ratio/high_mean": 0.0008371060939680319, "clip_ratio/low_mean": 0.0009252078798454022, "clip_ratio/low_min": 7.446560175594641e-05, "clip_ratio/region_mean": 0.00176231398654636, "epoch": 3.2892128279883384, "grad_norm": 0.11903949081897736, "learning_rate": 5e-07, "loss": 0.0334, "step": 319 }, { "clip_ratio/high_max": 0.0019022341039089952, "clip_ratio/high_mean": 0.0008527290210622596, "clip_ratio/low_mean": 0.000857175313285552, "clip_ratio/low_min": 2.6445952244102955e-05, "clip_ratio/region_mean": 0.0017099043470807374, "epoch": 3.298542274052478, "grad_norm": 0.11972557008266449, "learning_rate": 5e-07, "loss": -0.0085, "step": 320 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.017229352678571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 4094.0, "completions/mean_length": 625.10107421875, "completions/mean_terminated_length": 564.2513427734375, "completions/min_length": 5.0, "completions/min_terminated_length": 5.0, "epoch": 3.307871720116618, "grad_norm": 0.13760283589363098, "learning_rate": 5e-07, "loss": 0.0775, "num_tokens": 207037659.0, "reward": 0.5703474283218384, "reward_std": 0.19944807887077332, "rewards/simpleverify_reward/mean": 0.5703473687171936, "rewards/simpleverify_reward/std": 0.4950351417064667, "step": 321 }, { "clip_ratio/high_max": 0.002235098792880308, "clip_ratio/high_mean": 0.000908821340999566, "clip_ratio/low_mean": 0.0006951405648578657, "clip_ratio/low_min": 3.419030508666765e-05, "clip_ratio/region_mean": 0.0016039619004004635, "epoch": 3.317201166180758, "grad_norm": 0.12550115585327148, "learning_rate": 5e-07, "loss": 0.004, "step": 322 }, { "clip_ratio/high_max": 0.002099421475577401, "clip_ratio/high_mean": 0.0008461400066153146, "clip_ratio/low_mean": 0.0005802680570923258, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001426408052793704, "epoch": 3.326530612244898, "grad_norm": 0.12807370722293854, "learning_rate": 5e-07, "loss": -0.0298, "step": 323 }, { "clip_ratio/high_max": 0.002189527978771366, "clip_ratio/high_mean": 0.0008832019593683071, "clip_ratio/low_mean": 0.0005212271230448096, "clip_ratio/low_min": 3.070422462769784e-05, "clip_ratio/region_mean": 0.0014044290546735283, "epoch": 3.335860058309038, "grad_norm": 0.12696242332458496, "learning_rate": 5e-07, "loss": -0.0299, "step": 324 }, { "clip_ratio/high_max": 0.0019018910388695076, "clip_ratio/high_mean": 0.0008249703205365222, "clip_ratio/low_mean": 0.0006513137132060365, "clip_ratio/low_min": 3.2380027732870076e-05, "clip_ratio/region_mean": 0.0014762839782633819, "epoch": 3.345189504373178, "grad_norm": 0.12009655684232712, "learning_rate": 5e-07, "loss": 0.0043, "step": 325 }, { "clip_ratio/high_max": 0.0018215431518910918, "clip_ratio/high_mean": 0.0007667883674002951, "clip_ratio/low_mean": 0.0005547529053728795, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013215412436693441, "epoch": 3.354518950437318, "grad_norm": 0.12460321187973022, "learning_rate": 5e-07, "loss": 0.0242, "step": 326 }, { "clip_ratio/high_max": 0.002206951925472822, "clip_ratio/high_mean": 0.0009058251353053492, "clip_ratio/low_mean": 0.0006163498292153236, "clip_ratio/low_min": 4.268920110916952e-05, "clip_ratio/region_mean": 0.001522174985439051, "epoch": 3.363848396501458, "grad_norm": 0.14015936851501465, "learning_rate": 5e-07, "loss": -0.0337, "step": 327 }, { "clip_ratio/high_max": 0.0017509169119875878, "clip_ratio/high_mean": 0.0007605823084304575, "clip_ratio/low_mean": 0.000670775851176586, "clip_ratio/low_min": 2.772037623799406e-05, "clip_ratio/region_mean": 0.0014313581632450223, "epoch": 3.373177842565598, "grad_norm": 0.1257377564907074, "learning_rate": 5e-07, "loss": 0.0349, "step": 328 }, { "clip_ratio/high_max": 0.0019771703955484554, "clip_ratio/high_mean": 0.0008649628107377794, "clip_ratio/low_mean": 0.0006741419820173178, "clip_ratio/low_min": 6.602419671253301e-05, "clip_ratio/region_mean": 0.0015391047927550972, "epoch": 3.3825072886297374, "grad_norm": 0.11474918574094772, "learning_rate": 5e-07, "loss": -0.0011, "step": 329 }, { "clip_ratio/high_max": 0.0019547446463548113, "clip_ratio/high_mean": 0.0008085059398581507, "clip_ratio/low_mean": 0.0006551918777404353, "clip_ratio/low_min": 6.234509601199534e-05, "clip_ratio/region_mean": 0.0014636978121416178, "epoch": 3.3918367346938774, "grad_norm": 0.13101685047149658, "learning_rate": 5e-07, "loss": 0.0006, "step": 330 }, { "clip_ratio/high_max": 0.002300233711139299, "clip_ratio/high_mean": 0.0008982824292615987, "clip_ratio/low_mean": 0.0006600591441383585, "clip_ratio/low_min": 8.737115149415331e-05, "clip_ratio/region_mean": 0.0015583415370201692, "epoch": 3.4011661807580174, "grad_norm": 0.12134057283401489, "learning_rate": 5e-07, "loss": -0.0114, "step": 331 }, { "clip_ratio/high_max": 0.0016890289698494598, "clip_ratio/high_mean": 0.0007693386560276849, "clip_ratio/low_mean": 0.0007701471831751405, "clip_ratio/low_min": 3.348927430124604e-05, "clip_ratio/region_mean": 0.001539485834655352, "epoch": 3.4104956268221573, "grad_norm": 0.13108669221401215, "learning_rate": 5e-07, "loss": 0.0187, "step": 332 }, { "clip_ratio/high_max": 0.0019605685338319745, "clip_ratio/high_mean": 0.0008122507515508914, "clip_ratio/low_mean": 0.0008035358150664251, "clip_ratio/low_min": 0.00011070189157180721, "clip_ratio/region_mean": 0.001615786532056518, "epoch": 3.4198250728862973, "grad_norm": 0.11989527940750122, "learning_rate": 5e-07, "loss": 0.0409, "step": 333 }, { "clip_ratio/high_max": 0.0019789480793406256, "clip_ratio/high_mean": 0.0008473269008391071, "clip_ratio/low_mean": 0.0007316424071177607, "clip_ratio/low_min": 6.403781844710466e-05, "clip_ratio/region_mean": 0.0015789692915859632, "epoch": 3.4291545189504373, "grad_norm": 0.12012314796447754, "learning_rate": 5e-07, "loss": -0.0115, "step": 334 }, { "clip_ratio/high_max": 0.002234234332718188, "clip_ratio/high_mean": 0.000917071706680872, "clip_ratio/low_mean": 0.0006827858360338723, "clip_ratio/low_min": 3.87512245652033e-05, "clip_ratio/region_mean": 0.0015998575545381755, "epoch": 3.4384839650145773, "grad_norm": 0.11871284246444702, "learning_rate": 5e-07, "loss": -0.0139, "step": 335 }, { "clip_ratio/high_max": 0.0017428114952053875, "clip_ratio/high_mean": 0.0007322880155697931, "clip_ratio/low_mean": 0.0008577596508985152, "clip_ratio/low_min": 2.459661482134834e-05, "clip_ratio/region_mean": 0.0015900476391834673, "epoch": 3.4478134110787173, "grad_norm": 0.11728209257125854, "learning_rate": 5e-07, "loss": -0.0012, "step": 336 }, { "clip_ratio/high_max": 0.002270193195727188, "clip_ratio/high_mean": 0.0009088232782232808, "clip_ratio/low_mean": 0.0007996909898793092, "clip_ratio/low_min": 2.4988659788505174e-05, "clip_ratio/region_mean": 0.0017085142462747172, "epoch": 3.4571428571428573, "grad_norm": 0.12605318427085876, "learning_rate": 5e-07, "loss": -0.0138, "step": 337 }, { "clip_ratio/high_max": 0.001840306278609205, "clip_ratio/high_mean": 0.0008236933754233178, "clip_ratio/low_mean": 0.0007291474694284261, "clip_ratio/low_min": 1.5562749467790127e-05, "clip_ratio/region_mean": 0.001552840883960016, "epoch": 3.466472303206997, "grad_norm": 0.11183943599462509, "learning_rate": 5e-07, "loss": -0.0212, "step": 338 }, { "clip_ratio/high_max": 0.0021740461961599067, "clip_ratio/high_mean": 0.0009422713719686726, "clip_ratio/low_mean": 0.0009807042661122978, "clip_ratio/low_min": 6.190596286614891e-05, "clip_ratio/region_mean": 0.001922975636261981, "epoch": 3.4758017492711373, "grad_norm": 0.13174642622470856, "learning_rate": 5e-07, "loss": 0.0264, "step": 339 }, { "clip_ratio/high_max": 0.0019181189563823864, "clip_ratio/high_mean": 0.0008454526450805133, "clip_ratio/low_mean": 0.0009118616580963135, "clip_ratio/low_min": 0.00011563619500520872, "clip_ratio/region_mean": 0.0017573143122717738, "epoch": 3.485131195335277, "grad_norm": 0.13170062005519867, "learning_rate": 5e-07, "loss": 0.0657, "step": 340 }, { "clip_ratio/high_max": 0.0021356416582420934, "clip_ratio/high_mean": 0.0008588783348386642, "clip_ratio/low_mean": 0.0009097289512283169, "clip_ratio/low_min": 0.00010830905102920951, "clip_ratio/region_mean": 0.001768607304256875, "epoch": 3.494460641399417, "grad_norm": 0.1332024335861206, "learning_rate": 5e-07, "loss": 0.0055, "step": 341 }, { "clip_ratio/high_max": 0.001853825739090098, "clip_ratio/high_mean": 0.0006881113367853686, "clip_ratio/low_mean": 0.000879488648934057, "clip_ratio/low_min": 4.668258952733595e-05, "clip_ratio/region_mean": 0.0015675999893574044, "epoch": 3.503790087463557, "grad_norm": 0.12436844408512115, "learning_rate": 5e-07, "loss": 0.0824, "step": 342 }, { "clip_ratio/high_max": 0.0021139777563803364, "clip_ratio/high_mean": 0.0009053915273398161, "clip_ratio/low_mean": 0.0007728772197879152, "clip_ratio/low_min": 6.412805669242516e-05, "clip_ratio/region_mean": 0.0016782687489467207, "epoch": 3.513119533527697, "grad_norm": 0.11591131240129471, "learning_rate": 5e-07, "loss": -0.0171, "step": 343 }, { "clip_ratio/high_max": 0.0019010339601663873, "clip_ratio/high_mean": 0.0008148813594743842, "clip_ratio/low_mean": 0.0008298849406855879, "clip_ratio/low_min": 1.949469697137829e-05, "clip_ratio/region_mean": 0.0016447662783320993, "epoch": 3.522448979591837, "grad_norm": 0.11431508511304855, "learning_rate": 5e-07, "loss": 0.0079, "step": 344 }, { "clip_ratio/high_max": 0.002124819438904524, "clip_ratio/high_mean": 0.0008864657138474286, "clip_ratio/low_mean": 0.0010054249796667136, "clip_ratio/low_min": 3.160443975502858e-05, "clip_ratio/region_mean": 0.0018918906716862693, "epoch": 3.5317784256559768, "grad_norm": 0.13355807960033417, "learning_rate": 5e-07, "loss": 0.0036, "step": 345 }, { "clip_ratio/high_max": 0.0021399519027909264, "clip_ratio/high_mean": 0.0009347788509330712, "clip_ratio/low_mean": 0.0008155976101988927, "clip_ratio/low_min": 5.9056745158159174e-05, "clip_ratio/region_mean": 0.0017503765047877096, "epoch": 3.5411078717201168, "grad_norm": 0.11793224513530731, "learning_rate": 5e-07, "loss": -0.0416, "step": 346 }, { "clip_ratio/high_max": 0.002157255970814731, "clip_ratio/high_mean": 0.0008851793136273045, "clip_ratio/low_mean": 0.0009679566355771385, "clip_ratio/low_min": 6.654105345660355e-05, "clip_ratio/region_mean": 0.0018531359819462523, "epoch": 3.5504373177842563, "grad_norm": 0.12057925015687943, "learning_rate": 5e-07, "loss": -0.009, "step": 347 }, { "clip_ratio/high_max": 0.001630258142540697, "clip_ratio/high_mean": 0.0006601957484235754, "clip_ratio/low_mean": 0.0009120813338086009, "clip_ratio/low_min": 5.98557917328435e-05, "clip_ratio/region_mean": 0.0015722770949651022, "epoch": 3.5597667638483967, "grad_norm": 0.13221552968025208, "learning_rate": 5e-07, "loss": 0.055, "step": 348 }, { "clip_ratio/high_max": 0.0020650588485295884, "clip_ratio/high_mean": 0.0008787066180957481, "clip_ratio/low_mean": 0.0009819355873332825, "clip_ratio/low_min": 0.00010789096177177271, "clip_ratio/region_mean": 0.0018606422308948822, "epoch": 3.5690962099125363, "grad_norm": 0.130471333861351, "learning_rate": 5e-07, "loss": 0.0069, "step": 349 }, { "clip_ratio/high_max": 0.0018186291054007597, "clip_ratio/high_mean": 0.0007991519560164306, "clip_ratio/low_mean": 0.0010718338016886264, "clip_ratio/low_min": 8.015417006390635e-05, "clip_ratio/region_mean": 0.0018709857322392054, "epoch": 3.5784256559766763, "grad_norm": 0.13457126915454865, "learning_rate": 5e-07, "loss": 0.0129, "step": 350 }, { "clip_ratio/high_max": 0.0017924010644492228, "clip_ratio/high_mean": 0.0007403675135719823, "clip_ratio/low_mean": 0.0009247028665413382, "clip_ratio/low_min": 8.021497615118278e-05, "clip_ratio/region_mean": 0.001665070405579172, "epoch": 3.5877551020408163, "grad_norm": 0.13297319412231445, "learning_rate": 5e-07, "loss": -0.0098, "step": 351 }, { "clip_ratio/high_max": 0.0018931981030618772, "clip_ratio/high_mean": 0.0008630724769318476, "clip_ratio/low_mean": 0.0010399675084045157, "clip_ratio/low_min": 6.0625371588685084e-05, "clip_ratio/region_mean": 0.0019030399998882785, "epoch": 3.5970845481049563, "grad_norm": 0.12198097258806229, "learning_rate": 5e-07, "loss": -0.0126, "step": 352 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.017717633928571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 4074.0, "completions/mean_length": 631.9130859375, "completions/mean_terminated_length": 569.4306030273438, "completions/min_length": 4.0, "completions/min_terminated_length": 4.0, "epoch": 3.6064139941690962, "grad_norm": 0.11928443610668182, "learning_rate": 5e-07, "loss": -0.0108, "num_tokens": 225861094.0, "reward": 0.5849958658218384, "reward_std": 0.19903841614723206, "rewards/simpleverify_reward/mean": 0.5849958062171936, "rewards/simpleverify_reward/std": 0.49273136258125305, "step": 353 }, { "clip_ratio/high_max": 0.0019305525056552142, "clip_ratio/high_mean": 0.0008242950698331697, "clip_ratio/low_mean": 0.0005572140562435379, "clip_ratio/low_min": 3.6715445276058745e-05, "clip_ratio/region_mean": 0.001381509136990644, "epoch": 3.6157434402332362, "grad_norm": 0.11775721609592438, "learning_rate": 5e-07, "loss": 0.007, "step": 354 }, { "clip_ratio/high_max": 0.002076564902381506, "clip_ratio/high_mean": 0.0009019760982482694, "clip_ratio/low_mean": 0.0006155341998237418, "clip_ratio/low_min": 3.7341129427659325e-05, "clip_ratio/region_mean": 0.0015175102780631278, "epoch": 3.6250728862973762, "grad_norm": 0.12979008257389069, "learning_rate": 5e-07, "loss": -0.0051, "step": 355 }, { "clip_ratio/high_max": 0.0019458307288005017, "clip_ratio/high_mean": 0.0008034484599193092, "clip_ratio/low_mean": 0.0006040116568328813, "clip_ratio/low_min": 2.4781918909866363e-05, "clip_ratio/region_mean": 0.001407460127666127, "epoch": 3.6344023323615158, "grad_norm": 0.13516417145729065, "learning_rate": 5e-07, "loss": 0.0217, "step": 356 }, { "clip_ratio/high_max": 0.0018403690919512883, "clip_ratio/high_mean": 0.0007897053837950807, "clip_ratio/low_mean": 0.0006777603475711658, "clip_ratio/low_min": 2.4734103135415353e-05, "clip_ratio/region_mean": 0.0014674657795694657, "epoch": 3.643731778425656, "grad_norm": 0.12091835588216782, "learning_rate": 5e-07, "loss": -0.0018, "step": 357 }, { "clip_ratio/high_max": 0.0021220461567281745, "clip_ratio/high_mean": 0.0009635672613512725, "clip_ratio/low_mean": 0.0006129263401817298, "clip_ratio/low_min": 2.479883005435113e-05, "clip_ratio/region_mean": 0.001576493596076034, "epoch": 3.6530612244897958, "grad_norm": 0.14149075746536255, "learning_rate": 5e-07, "loss": -0.007, "step": 358 }, { "clip_ratio/high_max": 0.002031147880188655, "clip_ratio/high_mean": 0.0008843454543239204, "clip_ratio/low_mean": 0.0006070802974136313, "clip_ratio/low_min": 1.3557483725890052e-05, "clip_ratio/region_mean": 0.0014914257226337213, "epoch": 3.6623906705539357, "grad_norm": 0.11876802146434784, "learning_rate": 5e-07, "loss": 0.005, "step": 359 }, { "clip_ratio/high_max": 0.002137131115887314, "clip_ratio/high_mean": 0.0008752768299018499, "clip_ratio/low_mean": 0.0007290048288268736, "clip_ratio/low_min": 0.00015400576285173884, "clip_ratio/region_mean": 0.0016042816605477128, "epoch": 3.6717201166180757, "grad_norm": 0.12826190888881683, "learning_rate": 5e-07, "loss": 0.0385, "step": 360 }, { "clip_ratio/high_max": 0.0017280116080655716, "clip_ratio/high_mean": 0.0007956444933370221, "clip_ratio/low_mean": 0.0006498703496617964, "clip_ratio/low_min": 6.255895277718082e-05, "clip_ratio/region_mean": 0.0014455148593697231, "epoch": 3.6810495626822157, "grad_norm": 0.11923553049564362, "learning_rate": 5e-07, "loss": -0.0127, "step": 361 }, { "clip_ratio/high_max": 0.002307959664904047, "clip_ratio/high_mean": 0.000979014335825923, "clip_ratio/low_mean": 0.0006801744784752373, "clip_ratio/low_min": 6.912057779118186e-05, "clip_ratio/region_mean": 0.001659188790654298, "epoch": 3.6903790087463557, "grad_norm": 0.12617874145507812, "learning_rate": 5e-07, "loss": -0.0191, "step": 362 }, { "clip_ratio/high_max": 0.002422151512291748, "clip_ratio/high_mean": 0.000986516519333236, "clip_ratio/low_mean": 0.0005695013751392253, "clip_ratio/low_min": 9.937986760633066e-06, "clip_ratio/region_mean": 0.0015560179017484188, "epoch": 3.6997084548104957, "grad_norm": 0.12235263735055923, "learning_rate": 5e-07, "loss": -0.0183, "step": 363 }, { "clip_ratio/high_max": 0.0021195328426983906, "clip_ratio/high_mean": 0.0008152358695951989, "clip_ratio/low_mean": 0.0006433533781091683, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014585892386094201, "epoch": 3.7090379008746357, "grad_norm": 0.11789847910404205, "learning_rate": 5e-07, "loss": 0.0242, "step": 364 }, { "clip_ratio/high_max": 0.0019479552065604366, "clip_ratio/high_mean": 0.0007642410400876543, "clip_ratio/low_mean": 0.0007234865079226438, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001487727538915351, "epoch": 3.7183673469387752, "grad_norm": 0.12926769256591797, "learning_rate": 5e-07, "loss": 0.0018, "step": 365 }, { "clip_ratio/high_max": 0.0018199267105956096, "clip_ratio/high_mean": 0.000879983912454918, "clip_ratio/low_mean": 0.0007258042805915466, "clip_ratio/low_min": 3.0490757126244716e-05, "clip_ratio/region_mean": 0.0016057881912274752, "epoch": 3.7276967930029157, "grad_norm": 0.12348343431949615, "learning_rate": 5e-07, "loss": -0.0314, "step": 366 }, { "clip_ratio/high_max": 0.0020350202394183725, "clip_ratio/high_mean": 0.0008440347392024705, "clip_ratio/low_mean": 0.0007340199535974534, "clip_ratio/low_min": 2.8115786335547455e-05, "clip_ratio/region_mean": 0.001578054692799924, "epoch": 3.7370262390670552, "grad_norm": 0.1180219054222107, "learning_rate": 5e-07, "loss": 0.0027, "step": 367 }, { "clip_ratio/high_max": 0.0023137333555496298, "clip_ratio/high_mean": 0.0009648601517255884, "clip_ratio/low_mean": 0.0007374309279839508, "clip_ratio/low_min": 1.4272664884629194e-05, "clip_ratio/region_mean": 0.0017022910760715604, "epoch": 3.746355685131195, "grad_norm": 0.11982972919940948, "learning_rate": 5e-07, "loss": -0.0353, "step": 368 }, { "clip_ratio/high_max": 0.0018885291101469193, "clip_ratio/high_mean": 0.0008487308386975201, "clip_ratio/low_mean": 0.0008506225713063031, "clip_ratio/low_min": 0.00012365418115223292, "clip_ratio/region_mean": 0.0016993534154607914, "epoch": 3.755685131195335, "grad_norm": 0.12584300339221954, "learning_rate": 5e-07, "loss": 0.0015, "step": 369 }, { "clip_ratio/high_max": 0.002281102912093047, "clip_ratio/high_mean": 0.0009468297066632658, "clip_ratio/low_mean": 0.0008614163252786966, "clip_ratio/low_min": 0.00010211167045781622, "clip_ratio/region_mean": 0.0018082460519508459, "epoch": 3.765014577259475, "grad_norm": 0.12293557077646255, "learning_rate": 5e-07, "loss": -0.0036, "step": 370 }, { "clip_ratio/high_max": 0.002334260891075246, "clip_ratio/high_mean": 0.0009018013333843555, "clip_ratio/low_mean": 0.0008469931290164823, "clip_ratio/low_min": 0.00010759298857010435, "clip_ratio/region_mean": 0.001748794449667912, "epoch": 3.774344023323615, "grad_norm": 0.12434820085763931, "learning_rate": 5e-07, "loss": -0.0104, "step": 371 }, { "clip_ratio/high_max": 0.0022176484962983523, "clip_ratio/high_mean": 0.0008415376760240179, "clip_ratio/low_mean": 0.0007843216662877239, "clip_ratio/low_min": 1.4191644368111156e-05, "clip_ratio/region_mean": 0.0016258593095699325, "epoch": 3.783673469387755, "grad_norm": 0.11780090630054474, "learning_rate": 5e-07, "loss": -0.0065, "step": 372 }, { "clip_ratio/high_max": 0.0018911586703325156, "clip_ratio/high_mean": 0.0008378295879083453, "clip_ratio/low_mean": 0.0007639497398486128, "clip_ratio/low_min": 1.6438716556876898e-05, "clip_ratio/region_mean": 0.0016017793532228097, "epoch": 3.793002915451895, "grad_norm": 0.1273912638425827, "learning_rate": 5e-07, "loss": -0.0119, "step": 373 }, { "clip_ratio/high_max": 0.0018821691264747642, "clip_ratio/high_mean": 0.0008130887799779885, "clip_ratio/low_mean": 0.0009894224858726375, "clip_ratio/low_min": 7.191503482317785e-05, "clip_ratio/region_mean": 0.001802511302230414, "epoch": 3.8023323615160347, "grad_norm": 0.12411832809448242, "learning_rate": 5e-07, "loss": 0.0185, "step": 374 }, { "clip_ratio/high_max": 0.0021663649422407616, "clip_ratio/high_mean": 0.0009730844958539819, "clip_ratio/low_mean": 0.0009986877266783267, "clip_ratio/low_min": 0.00011403044663893525, "clip_ratio/region_mean": 0.0019717722170753404, "epoch": 3.811661807580175, "grad_norm": 0.11757338047027588, "learning_rate": 5e-07, "loss": 0.0192, "step": 375 }, { "clip_ratio/high_max": 0.002128038126102183, "clip_ratio/high_mean": 0.0009241248408216052, "clip_ratio/low_mean": 0.0009170730209007161, "clip_ratio/low_min": 0.00012611063448275672, "clip_ratio/region_mean": 0.0018411978526273742, "epoch": 3.8209912536443147, "grad_norm": 0.11443732678890228, "learning_rate": 5e-07, "loss": 0.0028, "step": 376 }, { "clip_ratio/high_max": 0.002079549871268682, "clip_ratio/high_mean": 0.0008368410562979989, "clip_ratio/low_mean": 0.0010071184988191817, "clip_ratio/low_min": 6.149287310108775e-05, "clip_ratio/region_mean": 0.0018439595878589898, "epoch": 3.8303206997084547, "grad_norm": 0.12175998091697693, "learning_rate": 5e-07, "loss": 0.0092, "step": 377 }, { "clip_ratio/high_max": 0.0019666573280119337, "clip_ratio/high_mean": 0.0008210823307308601, "clip_ratio/low_mean": 0.0009484965685260249, "clip_ratio/low_min": 0.00010868925892282277, "clip_ratio/region_mean": 0.0017695788847049698, "epoch": 3.8396501457725947, "grad_norm": 0.11610057204961777, "learning_rate": 5e-07, "loss": 0.0244, "step": 378 }, { "clip_ratio/high_max": 0.002094651681545656, "clip_ratio/high_mean": 0.0008722258953639539, "clip_ratio/low_mean": 0.0009956824287655763, "clip_ratio/low_min": 7.186613402154762e-05, "clip_ratio/region_mean": 0.0018679083223105408, "epoch": 3.8489795918367347, "grad_norm": 0.1264968365430832, "learning_rate": 5e-07, "loss": 0.0079, "step": 379 }, { "clip_ratio/high_max": 0.0022572985690203495, "clip_ratio/high_mean": 0.0009901694174914155, "clip_ratio/low_mean": 0.0009582082930137403, "clip_ratio/low_min": 5.6353830586886033e-05, "clip_ratio/region_mean": 0.0019483776850393042, "epoch": 3.8583090379008746, "grad_norm": 0.12398642301559448, "learning_rate": 5e-07, "loss": 0.0036, "step": 380 }, { "clip_ratio/high_max": 0.0021242410330160055, "clip_ratio/high_mean": 0.0008417691478825873, "clip_ratio/low_mean": 0.0010636501319822855, "clip_ratio/low_min": 0.00010761041357909562, "clip_ratio/region_mean": 0.0019054192525800318, "epoch": 3.8676384839650146, "grad_norm": 0.1181425079703331, "learning_rate": 5e-07, "loss": 0.0389, "step": 381 }, { "clip_ratio/high_max": 0.0019741140204132535, "clip_ratio/high_mean": 0.0008833259835228091, "clip_ratio/low_mean": 0.0010153608109249035, "clip_ratio/low_min": 8.028122829273343e-05, "clip_ratio/region_mean": 0.0018986867362400517, "epoch": 3.8769679300291546, "grad_norm": 0.11669744551181793, "learning_rate": 5e-07, "loss": 0.0293, "step": 382 }, { "clip_ratio/high_max": 0.0019939444100600667, "clip_ratio/high_mean": 0.0008327972263941774, "clip_ratio/low_mean": 0.0008153222643159097, "clip_ratio/low_min": 9.090464391192654e-05, "clip_ratio/region_mean": 0.0016481195052620023, "epoch": 3.8862973760932946, "grad_norm": 0.12800665199756622, "learning_rate": 5e-07, "loss": -0.0083, "step": 383 }, { "clip_ratio/high_max": 0.0019197950823581778, "clip_ratio/high_mean": 0.0008554133110010298, "clip_ratio/low_mean": 0.0008303146460093558, "clip_ratio/low_min": 5.948393572907662e-05, "clip_ratio/region_mean": 0.001685727977019269, "epoch": 3.8956268221574346, "grad_norm": 0.12684711813926697, "learning_rate": 5e-07, "loss": 0.0139, "step": 384 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0211704799107143, "completions/max_length": 4096.0, "completions/max_terminated_length": 4066.0, "completions/mean_length": 650.4197998046875, "completions/mean_terminated_length": 575.8975219726562, "completions/min_length": 92.0, "completions/min_terminated_length": 92.0, "epoch": 4.0093294460641395, "grad_norm": 0.12339719384908676, "learning_rate": 5e-07, "loss": 0.0326, "num_tokens": 244879386.0, "reward": 0.576729953289032, "reward_std": 0.2039199322462082, "rewards/simpleverify_reward/mean": 0.5767298936843872, "rewards/simpleverify_reward/std": 0.49408602714538574, "step": 385 }, { "clip_ratio/high_max": 0.0018937842069135513, "clip_ratio/high_mean": 0.0008142235565173905, "clip_ratio/low_mean": 0.0005739950447605224, "clip_ratio/low_min": 1.047778732754523e-05, "clip_ratio/region_mean": 0.0013882186030969024, "epoch": 4.01865889212828, "grad_norm": 0.12093694508075714, "learning_rate": 5e-07, "loss": 0.0084, "step": 386 }, { "clip_ratio/high_max": 0.0021320479427231476, "clip_ratio/high_mean": 0.0008182114625014947, "clip_ratio/low_mean": 0.0004729626980406465, "clip_ratio/low_min": 1.0383784683654085e-05, "clip_ratio/region_mean": 0.0012911741541756783, "epoch": 4.0279883381924195, "grad_norm": 0.1283557415008545, "learning_rate": 5e-07, "loss": 0.0023, "step": 387 }, { "clip_ratio/high_max": 0.0021084989930386655, "clip_ratio/high_mean": 0.000914565989660332, "clip_ratio/low_mean": 0.0005386485727285617, "clip_ratio/low_min": 3.7439793231897056e-05, "clip_ratio/region_mean": 0.001453214543289505, "epoch": 4.03731778425656, "grad_norm": 0.11660812795162201, "learning_rate": 5e-07, "loss": -0.0145, "step": 388 }, { "clip_ratio/high_max": 0.0019984650607511867, "clip_ratio/high_mean": 0.0008034166967263445, "clip_ratio/low_mean": 0.0006448092444770737, "clip_ratio/low_min": 1.7882690372061916e-05, "clip_ratio/region_mean": 0.0014482259430224076, "epoch": 4.0466472303206995, "grad_norm": 0.11109878122806549, "learning_rate": 5e-07, "loss": 0.0089, "step": 389 }, { "clip_ratio/high_max": 0.00188700381477247, "clip_ratio/high_mean": 0.0006570268524228595, "clip_ratio/low_mean": 0.000668877150019398, "clip_ratio/low_min": 0.0001018845068756491, "clip_ratio/region_mean": 0.0013259039697004482, "epoch": 4.05597667638484, "grad_norm": 0.10944829881191254, "learning_rate": 5e-07, "loss": 0.0556, "step": 390 }, { "clip_ratio/high_max": 0.0019075073214480653, "clip_ratio/high_mean": 0.00095739064636291, "clip_ratio/low_mean": 0.0006881902627355885, "clip_ratio/low_min": 6.910313641128596e-05, "clip_ratio/region_mean": 0.0016455808909086045, "epoch": 4.0653061224489795, "grad_norm": 0.12819761037826538, "learning_rate": 5e-07, "loss": -0.0144, "step": 391 }, { "clip_ratio/high_max": 0.002081131169688888, "clip_ratio/high_mean": 0.0008804531553323613, "clip_ratio/low_mean": 0.0006155496676001349, "clip_ratio/low_min": 3.8253943785093725e-05, "clip_ratio/region_mean": 0.0014960028383939061, "epoch": 4.07463556851312, "grad_norm": 0.12790217995643616, "learning_rate": 5e-07, "loss": -0.016, "step": 392 }, { "clip_ratio/high_max": 0.0023204203389468603, "clip_ratio/high_mean": 0.000939099547395017, "clip_ratio/low_mean": 0.0007122346214600839, "clip_ratio/low_min": 3.455031674093334e-05, "clip_ratio/region_mean": 0.0016513341543031856, "epoch": 4.0839650145772595, "grad_norm": 0.12658055126667023, "learning_rate": 5e-07, "loss": -0.0014, "step": 393 }, { "clip_ratio/high_max": 0.0016877960988495033, "clip_ratio/high_mean": 0.0007960842112879618, "clip_ratio/low_mean": 0.0006443353577196831, "clip_ratio/low_min": 2.7771606255555525e-05, "clip_ratio/region_mean": 0.001440419571736129, "epoch": 4.093294460641399, "grad_norm": 0.12540622055530548, "learning_rate": 5e-07, "loss": -0.0099, "step": 394 }, { "clip_ratio/high_max": 0.0021207013123785146, "clip_ratio/high_mean": 0.0008546627650503069, "clip_ratio/low_mean": 0.0005781586496595992, "clip_ratio/low_min": 5.100186535855755e-05, "clip_ratio/region_mean": 0.0014328214019769803, "epoch": 4.1026239067055394, "grad_norm": 0.13325974345207214, "learning_rate": 5e-07, "loss": 0.0035, "step": 395 }, { "clip_ratio/high_max": 0.0019604338594945148, "clip_ratio/high_mean": 0.0008375379256904125, "clip_ratio/low_mean": 0.0007282993337867083, "clip_ratio/low_min": 9.368208793603117e-05, "clip_ratio/region_mean": 0.0015658372685720678, "epoch": 4.111953352769679, "grad_norm": 0.13563032448291779, "learning_rate": 5e-07, "loss": 0.0159, "step": 396 }, { "clip_ratio/high_max": 0.0019057239915127866, "clip_ratio/high_mean": 0.0008503669578203699, "clip_ratio/low_mean": 0.0006465872920671245, "clip_ratio/low_min": 6.680366823275108e-05, "clip_ratio/region_mean": 0.001496954275353346, "epoch": 4.121282798833819, "grad_norm": 0.12375207990407944, "learning_rate": 5e-07, "loss": -0.0024, "step": 397 }, { "clip_ratio/high_max": 0.002086859032715438, "clip_ratio/high_mean": 0.0009338541858596727, "clip_ratio/low_mean": 0.00073888362658181, "clip_ratio/low_min": 8.516917296219617e-05, "clip_ratio/region_mean": 0.0016727377733332105, "epoch": 4.130612244897959, "grad_norm": 0.11645853519439697, "learning_rate": 5e-07, "loss": -0.0009, "step": 398 }, { "clip_ratio/high_max": 0.0019703733196365647, "clip_ratio/high_mean": 0.0008069401137618115, "clip_ratio/low_mean": 0.0007055267687974265, "clip_ratio/low_min": 1.9342757695994806e-05, "clip_ratio/region_mean": 0.0015124668680073228, "epoch": 4.139941690962099, "grad_norm": 0.12215206772089005, "learning_rate": 5e-07, "loss": 0.0044, "step": 399 }, { "clip_ratio/high_max": 0.001976637911866419, "clip_ratio/high_mean": 0.0008898580163076986, "clip_ratio/low_mean": 0.0008659603063279064, "clip_ratio/low_min": 8.483826968586072e-05, "clip_ratio/region_mean": 0.00175581836811034, "epoch": 4.149271137026239, "grad_norm": 0.1243974044919014, "learning_rate": 5e-07, "loss": 0.0163, "step": 400 }, { "clip_ratio/high_max": 0.0019773305393755436, "clip_ratio/high_mean": 0.000821938032459002, "clip_ratio/low_mean": 0.0008011992140382063, "clip_ratio/low_min": 5.66508851989056e-05, "clip_ratio/region_mean": 0.0016231372574111447, "epoch": 4.158600583090379, "grad_norm": 0.1100330501794815, "learning_rate": 5e-07, "loss": 0.0093, "step": 401 }, { "clip_ratio/high_max": 0.0021798947636852972, "clip_ratio/high_mean": 0.0009447866814298322, "clip_ratio/low_mean": 0.0008396318571612937, "clip_ratio/low_min": 7.441331035806797e-05, "clip_ratio/region_mean": 0.0017844185640569776, "epoch": 4.167930029154519, "grad_norm": 0.12896180152893066, "learning_rate": 5e-07, "loss": -0.0111, "step": 402 }, { "clip_ratio/high_max": 0.002168082690332085, "clip_ratio/high_mean": 0.0009410784878127743, "clip_ratio/low_mean": 0.0007940596169646597, "clip_ratio/low_min": 5.923482603975572e-05, "clip_ratio/region_mean": 0.0017351380884065293, "epoch": 4.1772594752186585, "grad_norm": 0.12242531031370163, "learning_rate": 5e-07, "loss": -0.0169, "step": 403 }, { "clip_ratio/high_max": 0.002380603225901723, "clip_ratio/high_mean": 0.0010376797727076337, "clip_ratio/low_mean": 0.00094840027486498, "clip_ratio/low_min": 0.0001413376048731152, "clip_ratio/region_mean": 0.0019860800239257514, "epoch": 4.186588921282799, "grad_norm": 0.1407574713230133, "learning_rate": 5e-07, "loss": 0.0006, "step": 404 }, { "clip_ratio/high_max": 0.002196265195379965, "clip_ratio/high_mean": 0.0009004378262034152, "clip_ratio/low_mean": 0.000926824111957103, "clip_ratio/low_min": 8.828777026792523e-05, "clip_ratio/region_mean": 0.0018272619490744546, "epoch": 4.1959183673469385, "grad_norm": 0.11704783886671066, "learning_rate": 5e-07, "loss": -0.0072, "step": 405 }, { "clip_ratio/high_max": 0.002381329220952466, "clip_ratio/high_mean": 0.0010047402302006958, "clip_ratio/low_mean": 0.000851000496368215, "clip_ratio/low_min": 7.24503643141361e-05, "clip_ratio/region_mean": 0.0018557407092885114, "epoch": 4.205247813411079, "grad_norm": 0.12448139488697052, "learning_rate": 5e-07, "loss": -0.0107, "step": 406 }, { "clip_ratio/high_max": 0.0022144693102745805, "clip_ratio/high_mean": 0.0009267043897125404, "clip_ratio/low_mean": 0.00099902824331366, "clip_ratio/low_min": 0.0001037456868289155, "clip_ratio/region_mean": 0.0019257326421211474, "epoch": 4.214577259475218, "grad_norm": 0.12555278837680817, "learning_rate": 5e-07, "loss": 0.0229, "step": 407 }, { "clip_ratio/high_max": 0.0021390679758042097, "clip_ratio/high_mean": 0.000996342530925176, "clip_ratio/low_mean": 0.0009928098897944437, "clip_ratio/low_min": 0.00010706675129767973, "clip_ratio/region_mean": 0.0019891524498234503, "epoch": 4.223906705539359, "grad_norm": 0.12399319559335709, "learning_rate": 5e-07, "loss": -0.0083, "step": 408 }, { "clip_ratio/high_max": 0.0020765656445291825, "clip_ratio/high_mean": 0.0008954633412940893, "clip_ratio/low_mean": 0.0009185060425807023, "clip_ratio/low_min": 0.00013437840425467584, "clip_ratio/region_mean": 0.0018139693638659082, "epoch": 4.233236151603498, "grad_norm": 0.11642805486917496, "learning_rate": 5e-07, "loss": -0.0525, "step": 409 }, { "clip_ratio/high_max": 0.0021404800172604155, "clip_ratio/high_mean": 0.0008580854901083512, "clip_ratio/low_mean": 0.0010211738972429885, "clip_ratio/low_min": 7.784242825437104e-05, "clip_ratio/region_mean": 0.0018792593837133609, "epoch": 4.242565597667639, "grad_norm": 0.1174674853682518, "learning_rate": 5e-07, "loss": 0.0417, "step": 410 }, { "clip_ratio/high_max": 0.0019296939062769525, "clip_ratio/high_mean": 0.0008298166621898417, "clip_ratio/low_mean": 0.001032624135405058, "clip_ratio/low_min": 4.268618704372784e-05, "clip_ratio/region_mean": 0.0018624408039613627, "epoch": 4.251895043731778, "grad_norm": 0.11283983290195465, "learning_rate": 5e-07, "loss": 0.0216, "step": 411 }, { "clip_ratio/high_max": 0.001995379920117557, "clip_ratio/high_mean": 0.0009291774294979405, "clip_ratio/low_mean": 0.0007891522982390597, "clip_ratio/low_min": 6.600019605684793e-05, "clip_ratio/region_mean": 0.0017183297168230638, "epoch": 4.261224489795918, "grad_norm": 0.12374062836170197, "learning_rate": 5e-07, "loss": -0.0179, "step": 412 }, { "clip_ratio/high_max": 0.0021152625558897853, "clip_ratio/high_mean": 0.0008658350252517266, "clip_ratio/low_mean": 0.0009613567581254756, "clip_ratio/low_min": 2.9488086511264555e-05, "clip_ratio/region_mean": 0.0018271917651873082, "epoch": 4.270553935860058, "grad_norm": 0.12893056869506836, "learning_rate": 5e-07, "loss": -0.0062, "step": 413 }, { "clip_ratio/high_max": 0.0021350187380448915, "clip_ratio/high_mean": 0.0008871963418641826, "clip_ratio/low_mean": 0.0009978523848985787, "clip_ratio/low_min": 0.00013897791177441832, "clip_ratio/region_mean": 0.0018850487103918567, "epoch": 4.279883381924198, "grad_norm": 0.1323830634355545, "learning_rate": 5e-07, "loss": 0.0529, "step": 414 }, { "clip_ratio/high_max": 0.0022283111538854428, "clip_ratio/high_mean": 0.001070211535989074, "clip_ratio/low_mean": 0.0008530865788998199, "clip_ratio/low_min": 1.63612567121163e-05, "clip_ratio/region_mean": 0.001923298135807272, "epoch": 4.289212827988338, "grad_norm": 0.1262178272008896, "learning_rate": 5e-07, "loss": -0.0284, "step": 415 }, { "clip_ratio/high_max": 0.0025965042441384867, "clip_ratio/high_mean": 0.0010047702453448437, "clip_ratio/low_mean": 0.0010274794767610729, "clip_ratio/low_min": 6.467611638072412e-05, "clip_ratio/region_mean": 0.002032249751209747, "epoch": 4.298542274052478, "grad_norm": 0.12956133484840393, "learning_rate": 5e-07, "loss": -0.0232, "step": 416 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 4071.0, "completions/mean_length": 628.3648681640625, "completions/mean_terminated_length": 565.31689453125, "completions/min_length": 3.0, "completions/min_terminated_length": 3.0, "epoch": 4.307871720116618, "grad_norm": 0.12778323888778687, "learning_rate": 5e-07, "loss": 0.0047, "num_tokens": 263655502.0, "reward": 0.5949358344078064, "reward_std": 0.1944745033979416, "rewards/simpleverify_reward/mean": 0.5949358344078064, "rewards/simpleverify_reward/std": 0.4909130334854126, "step": 417 }, { "clip_ratio/high_max": 0.0020922366966260597, "clip_ratio/high_mean": 0.0008193765934265684, "clip_ratio/low_mean": 0.0006349528484861366, "clip_ratio/low_min": 4.870771954301745e-05, "clip_ratio/region_mean": 0.001454329387343023, "epoch": 4.317201166180758, "grad_norm": 0.13008491694927216, "learning_rate": 5e-07, "loss": -0.0008, "step": 418 }, { "clip_ratio/high_max": 0.002121485482348362, "clip_ratio/high_mean": 0.0008758043713896768, "clip_ratio/low_mean": 0.0005645754499710165, "clip_ratio/low_min": 5.273597707855515e-05, "clip_ratio/region_mean": 0.0014403798049897887, "epoch": 4.326530612244898, "grad_norm": 0.113673634827137, "learning_rate": 5e-07, "loss": 0.0172, "step": 419 }, { "clip_ratio/high_max": 0.002068107532977592, "clip_ratio/high_mean": 0.0009187957475660369, "clip_ratio/low_mean": 0.0006023537807777757, "clip_ratio/low_min": 3.9833861592342146e-05, "clip_ratio/region_mean": 0.0015211495046969503, "epoch": 4.335860058309038, "grad_norm": 0.13076207041740417, "learning_rate": 5e-07, "loss": 0.0151, "step": 420 }, { "clip_ratio/high_max": 0.0024326413185917772, "clip_ratio/high_mean": 0.0009406317130924435, "clip_ratio/low_mean": 0.00045845456543247565, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013990862789796665, "epoch": 4.345189504373177, "grad_norm": 0.12257233262062073, "learning_rate": 5e-07, "loss": -0.0217, "step": 421 }, { "clip_ratio/high_max": 0.002145751379430294, "clip_ratio/high_mean": 0.0009121818748099031, "clip_ratio/low_mean": 0.0005870369404874509, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014992188444011845, "epoch": 4.354518950437318, "grad_norm": 0.12296847254037857, "learning_rate": 5e-07, "loss": -0.0241, "step": 422 }, { "clip_ratio/high_max": 0.0021460870011651423, "clip_ratio/high_mean": 0.0007341562486544717, "clip_ratio/low_mean": 0.0005294304219205515, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001263586640561698, "epoch": 4.363848396501457, "grad_norm": 0.11589694023132324, "learning_rate": 5e-07, "loss": 0.0006, "step": 423 }, { "clip_ratio/high_max": 0.0018302974676771555, "clip_ratio/high_mean": 0.0007407811335724546, "clip_ratio/low_mean": 0.0006417296890504076, "clip_ratio/low_min": 1.4321723028842825e-05, "clip_ratio/region_mean": 0.0013825108217133675, "epoch": 4.373177842565598, "grad_norm": 0.12973569333553314, "learning_rate": 5e-07, "loss": 0.0266, "step": 424 }, { "clip_ratio/high_max": 0.002162597404094413, "clip_ratio/high_mean": 0.00091067458379257, "clip_ratio/low_mean": 0.0005898400695514283, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015005146597104613, "epoch": 4.382507288629737, "grad_norm": 0.1350025087594986, "learning_rate": 5e-07, "loss": -0.0017, "step": 425 }, { "clip_ratio/high_max": 0.002028798004175769, "clip_ratio/high_mean": 0.0008291658741654828, "clip_ratio/low_mean": 0.0006095615826779976, "clip_ratio/low_min": 2.8050353648723103e-05, "clip_ratio/region_mean": 0.001438727464119438, "epoch": 4.391836734693878, "grad_norm": 0.1347055435180664, "learning_rate": 5e-07, "loss": -0.0094, "step": 426 }, { "clip_ratio/high_max": 0.0022811544768046588, "clip_ratio/high_mean": 0.0008949961520556826, "clip_ratio/low_mean": 0.0007068973864079453, "clip_ratio/low_min": 5.059995601186529e-05, "clip_ratio/region_mean": 0.0016018935129977763, "epoch": 4.401166180758017, "grad_norm": 0.12371125817298889, "learning_rate": 5e-07, "loss": -0.0029, "step": 427 }, { "clip_ratio/high_max": 0.001834435544878943, "clip_ratio/high_mean": 0.0008770251370151527, "clip_ratio/low_mean": 0.0006157053921924671, "clip_ratio/low_min": 2.867171679099556e-05, "clip_ratio/region_mean": 0.0014927305201126728, "epoch": 4.410495626822158, "grad_norm": 0.11444864422082901, "learning_rate": 5e-07, "loss": -0.0017, "step": 428 }, { "clip_ratio/high_max": 0.0021408364846138284, "clip_ratio/high_mean": 0.000892818807187723, "clip_ratio/low_mean": 0.0006374448344104167, "clip_ratio/low_min": 1.2176114978501573e-05, "clip_ratio/region_mean": 0.001530263642052887, "epoch": 4.419825072886297, "grad_norm": 0.12551458179950714, "learning_rate": 5e-07, "loss": -0.0122, "step": 429 }, { "clip_ratio/high_max": 0.00210711891122628, "clip_ratio/high_mean": 0.0009307780037488556, "clip_ratio/low_mean": 0.0007500258761865553, "clip_ratio/low_min": 6.269064397201873e-05, "clip_ratio/region_mean": 0.0016808038635645062, "epoch": 4.429154518950437, "grad_norm": 0.13980968296527863, "learning_rate": 5e-07, "loss": 0.0195, "step": 430 }, { "clip_ratio/high_max": 0.0021833953869645484, "clip_ratio/high_mean": 0.0010497144648979884, "clip_ratio/low_mean": 0.0006737910971423844, "clip_ratio/low_min": 6.523335650854278e-05, "clip_ratio/region_mean": 0.0017235055311175529, "epoch": 4.438483965014577, "grad_norm": 0.12422941625118256, "learning_rate": 5e-07, "loss": -0.0542, "step": 431 }, { "clip_ratio/high_max": 0.0021381279730121605, "clip_ratio/high_mean": 0.0009278154120693216, "clip_ratio/low_mean": 0.0007837412413209677, "clip_ratio/low_min": 1.258558222616557e-05, "clip_ratio/region_mean": 0.001711556669761194, "epoch": 4.447813411078717, "grad_norm": 0.1339101344347, "learning_rate": 5e-07, "loss": 0.0089, "step": 432 }, { "clip_ratio/high_max": 0.0022843089391244575, "clip_ratio/high_mean": 0.0009073848377738614, "clip_ratio/low_mean": 0.0008951009749580408, "clip_ratio/low_min": 8.104365315375617e-05, "clip_ratio/region_mean": 0.0018024858291028067, "epoch": 4.457142857142857, "grad_norm": 0.1342819482088089, "learning_rate": 5e-07, "loss": 0.0312, "step": 433 }, { "clip_ratio/high_max": 0.002441441305563785, "clip_ratio/high_mean": 0.0009925134891091147, "clip_ratio/low_mean": 0.0008524071017745882, "clip_ratio/low_min": 4.001995512226131e-05, "clip_ratio/region_mean": 0.0018449206254445016, "epoch": 4.466472303206997, "grad_norm": 0.13498589396476746, "learning_rate": 5e-07, "loss": -0.0032, "step": 434 }, { "clip_ratio/high_max": 0.0019501376991684083, "clip_ratio/high_mean": 0.0008391420196858235, "clip_ratio/low_mean": 0.0008449795732303755, "clip_ratio/low_min": 3.200756236765301e-05, "clip_ratio/region_mean": 0.0016841216129250824, "epoch": 4.475801749271137, "grad_norm": 0.12426140904426575, "learning_rate": 5e-07, "loss": -0.0027, "step": 435 }, { "clip_ratio/high_max": 0.0020382710135891102, "clip_ratio/high_mean": 0.0008557031651434954, "clip_ratio/low_mean": 0.0008224030880228383, "clip_ratio/low_min": 2.4925224352045916e-05, "clip_ratio/region_mean": 0.0016781062622612808, "epoch": 4.485131195335277, "grad_norm": 0.12012751400470734, "learning_rate": 5e-07, "loss": 0.0183, "step": 436 }, { "clip_ratio/high_max": 0.002416578739939723, "clip_ratio/high_mean": 0.000981440141913481, "clip_ratio/low_mean": 0.0006930201871000463, "clip_ratio/low_min": 4.115925639780471e-05, "clip_ratio/region_mean": 0.0016744603854021989, "epoch": 4.494460641399417, "grad_norm": 0.12333562970161438, "learning_rate": 5e-07, "loss": -0.0081, "step": 437 }, { "clip_ratio/high_max": 0.0020750459261762444, "clip_ratio/high_mean": 0.0007916850954643451, "clip_ratio/low_mean": 0.0008410509672103217, "clip_ratio/low_min": 0.00013387454964686185, "clip_ratio/region_mean": 0.0016327360572176985, "epoch": 4.503790087463557, "grad_norm": 0.2522282898426056, "learning_rate": 5e-07, "loss": 0.0776, "step": 438 }, { "clip_ratio/high_max": 0.0021391015143308323, "clip_ratio/high_mean": 0.0008188885603885865, "clip_ratio/low_mean": 0.0007274186464201193, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001546307212265674, "epoch": 4.513119533527696, "grad_norm": 0.1308024674654007, "learning_rate": 5e-07, "loss": 0.0034, "step": 439 }, { "clip_ratio/high_max": 0.0020560188713716343, "clip_ratio/high_mean": 0.0008466565413982607, "clip_ratio/low_mean": 0.0008346803533640923, "clip_ratio/low_min": 0.00010397801634098869, "clip_ratio/region_mean": 0.001681336943875067, "epoch": 4.522448979591837, "grad_norm": 0.12734054028987885, "learning_rate": 5e-07, "loss": 0.0181, "step": 440 }, { "clip_ratio/high_max": 0.0023855327199271414, "clip_ratio/high_mean": 0.0009562708301018574, "clip_ratio/low_mean": 0.000717448388968478, "clip_ratio/low_min": 2.115060851792805e-05, "clip_ratio/region_mean": 0.0016737191726861056, "epoch": 4.531778425655976, "grad_norm": 0.12272682040929794, "learning_rate": 5e-07, "loss": -0.0251, "step": 441 }, { "clip_ratio/high_max": 0.0019946314496337436, "clip_ratio/high_mean": 0.0009076805708900793, "clip_ratio/low_mean": 0.0007704850850132061, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0016781656740931794, "epoch": 4.541107871720117, "grad_norm": 0.11665749549865723, "learning_rate": 5e-07, "loss": 0.0011, "step": 442 }, { "clip_ratio/high_max": 0.0018091297570208553, "clip_ratio/high_mean": 0.0007725462783128023, "clip_ratio/low_mean": 0.000963283146120375, "clip_ratio/low_min": 9.457067608309444e-05, "clip_ratio/region_mean": 0.001735829391691368, "epoch": 4.550437317784256, "grad_norm": 0.13305199146270752, "learning_rate": 5e-07, "loss": 0.0787, "step": 443 }, { "clip_ratio/high_max": 0.002384287683526054, "clip_ratio/high_mean": 0.0009701756207505241, "clip_ratio/low_mean": 0.0008662878753966652, "clip_ratio/low_min": 1.2021542715956457e-05, "clip_ratio/region_mean": 0.0018364635179750621, "epoch": 4.559766763848397, "grad_norm": 0.12892185151576996, "learning_rate": 5e-07, "loss": -0.0202, "step": 444 }, { "clip_ratio/high_max": 0.002182872409321135, "clip_ratio/high_mean": 0.0009753560534591088, "clip_ratio/low_mean": 0.0008268016044894466, "clip_ratio/low_min": 6.609109186683781e-05, "clip_ratio/region_mean": 0.0018021576688624918, "epoch": 4.569096209912536, "grad_norm": 0.12255056202411652, "learning_rate": 5e-07, "loss": -0.0173, "step": 445 }, { "clip_ratio/high_max": 0.0020495330172707327, "clip_ratio/high_mean": 0.0009566917142365128, "clip_ratio/low_mean": 0.0008165123635990312, "clip_ratio/low_min": 3.197610567440279e-05, "clip_ratio/region_mean": 0.0017732040723785758, "epoch": 4.578425655976677, "grad_norm": 0.11784346401691437, "learning_rate": 5e-07, "loss": 0.0123, "step": 446 }, { "clip_ratio/high_max": 0.001986919392948039, "clip_ratio/high_mean": 0.000918127490876941, "clip_ratio/low_mean": 0.0010893595826928504, "clip_ratio/low_min": 0.00015212662401609123, "clip_ratio/region_mean": 0.0020074870699318126, "epoch": 4.587755102040816, "grad_norm": 0.11697184294462204, "learning_rate": 5e-07, "loss": 0.0244, "step": 447 }, { "clip_ratio/high_max": 0.0022593688481720164, "clip_ratio/high_mean": 0.0010086106449307408, "clip_ratio/low_mean": 0.0009170646935672266, "clip_ratio/low_min": 6.035576734575443e-05, "clip_ratio/region_mean": 0.001925675394886639, "epoch": 4.597084548104956, "grad_norm": 0.1251981556415558, "learning_rate": 5e-07, "loss": 0.0146, "step": 448 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.020647321428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 4042.0, "completions/mean_length": 638.4129028320312, "completions/mean_terminated_length": 565.5178833007812, "completions/min_length": 5.0, "completions/min_terminated_length": 5.0, "epoch": 4.606413994169096, "grad_norm": 0.13884030282497406, "learning_rate": 5e-07, "loss": -0.0027, "num_tokens": 282427116.0, "reward": 0.586251437664032, "reward_std": 0.19493626058101654, "rewards/simpleverify_reward/mean": 0.5862513780593872, "rewards/simpleverify_reward/std": 0.49251309037208557, "step": 449 }, { "clip_ratio/high_max": 0.0019630149981821887, "clip_ratio/high_mean": 0.0007966451048559975, "clip_ratio/low_mean": 0.0005298724345266237, "clip_ratio/low_min": 2.7313274586049374e-05, "clip_ratio/region_mean": 0.0013265175512060523, "epoch": 4.615743440233236, "grad_norm": 0.135202556848526, "learning_rate": 5e-07, "loss": 0.0018, "step": 450 }, { "clip_ratio/high_max": 0.001980036307941191, "clip_ratio/high_mean": 0.0007830955855752109, "clip_ratio/low_mean": 0.0005061414713054546, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001289237043238245, "epoch": 4.625072886297376, "grad_norm": 0.13277316093444824, "learning_rate": 5e-07, "loss": 0.0007, "step": 451 }, { "clip_ratio/high_max": 0.0019350623988430016, "clip_ratio/high_mean": 0.0008263289964816067, "clip_ratio/low_mean": 0.0005811457831441658, "clip_ratio/low_min": 2.9384123990894295e-05, "clip_ratio/region_mean": 0.0014074747741688043, "epoch": 4.634402332361516, "grad_norm": 0.13163484632968903, "learning_rate": 5e-07, "loss": 0.0134, "step": 452 }, { "clip_ratio/high_max": 0.002019974657741841, "clip_ratio/high_mean": 0.000832763900689315, "clip_ratio/low_mean": 0.0005304867936501978, "clip_ratio/low_min": 1.2033115126541816e-05, "clip_ratio/region_mean": 0.0013632506961585023, "epoch": 4.643731778425656, "grad_norm": 0.1309274435043335, "learning_rate": 5e-07, "loss": -0.0139, "step": 453 }, { "clip_ratio/high_max": 0.002475005261658225, "clip_ratio/high_mean": 0.0008769523446972016, "clip_ratio/low_mean": 0.0005545726344280411, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014315249791252427, "epoch": 4.653061224489796, "grad_norm": 0.1312720775604248, "learning_rate": 5e-07, "loss": 0.0348, "step": 454 }, { "clip_ratio/high_max": 0.002188902726629749, "clip_ratio/high_mean": 0.0009879598092084052, "clip_ratio/low_mean": 0.0007283913819264853, "clip_ratio/low_min": 1.629301368666347e-05, "clip_ratio/region_mean": 0.001716351165669039, "epoch": 4.662390670553936, "grad_norm": 0.1249343752861023, "learning_rate": 5e-07, "loss": -0.0175, "step": 455 }, { "clip_ratio/high_max": 0.0019866632101184223, "clip_ratio/high_mean": 0.0008737412172195036, "clip_ratio/low_mean": 0.000581863056140719, "clip_ratio/low_min": 1.3377568393480033e-05, "clip_ratio/region_mean": 0.0014556042879121378, "epoch": 4.671720116618076, "grad_norm": 0.11462453007698059, "learning_rate": 5e-07, "loss": -0.0247, "step": 456 }, { "clip_ratio/high_max": 0.0017965762090170756, "clip_ratio/high_mean": 0.000752511981772841, "clip_ratio/low_mean": 0.0006916727925272426, "clip_ratio/low_min": 3.587002902349923e-05, "clip_ratio/region_mean": 0.0014441847779380623, "epoch": 4.681049562682215, "grad_norm": 0.1294611245393753, "learning_rate": 5e-07, "loss": 0.0147, "step": 457 }, { "clip_ratio/high_max": 0.001954619143361924, "clip_ratio/high_mean": 0.000762293575462536, "clip_ratio/low_mean": 0.0006549922809426789, "clip_ratio/low_min": 2.905184646806447e-05, "clip_ratio/region_mean": 0.001417285850038752, "epoch": 4.690379008746356, "grad_norm": 0.11060868203639984, "learning_rate": 5e-07, "loss": 0.0079, "step": 458 }, { "clip_ratio/high_max": 0.0021997821386321448, "clip_ratio/high_mean": 0.0008632506633148296, "clip_ratio/low_mean": 0.0006803137257520575, "clip_ratio/low_min": 2.7469930500956252e-05, "clip_ratio/region_mean": 0.0015435643872478977, "epoch": 4.699708454810495, "grad_norm": 0.13194335997104645, "learning_rate": 5e-07, "loss": -0.0319, "step": 459 }, { "clip_ratio/high_max": 0.0020686305288108997, "clip_ratio/high_mean": 0.0007673289564991137, "clip_ratio/low_mean": 0.0006706120184389874, "clip_ratio/low_min": 5.815993790747598e-05, "clip_ratio/region_mean": 0.0014379409549292177, "epoch": 4.709037900874636, "grad_norm": 0.12165791541337967, "learning_rate": 5e-07, "loss": -0.0228, "step": 460 }, { "clip_ratio/high_max": 0.002275247337820474, "clip_ratio/high_mean": 0.0008969585342128994, "clip_ratio/low_mean": 0.0006547142120325589, "clip_ratio/low_min": 3.9900647607282735e-05, "clip_ratio/region_mean": 0.0015516727435169742, "epoch": 4.718367346938775, "grad_norm": 0.11859557032585144, "learning_rate": 5e-07, "loss": -0.0211, "step": 461 }, { "clip_ratio/high_max": 0.002375031188421417, "clip_ratio/high_mean": 0.0009282011415052693, "clip_ratio/low_mean": 0.00074022988883371, "clip_ratio/low_min": 2.988601590914186e-05, "clip_ratio/region_mean": 0.0016684310539858416, "epoch": 4.727696793002916, "grad_norm": 0.12684595584869385, "learning_rate": 5e-07, "loss": -0.0304, "step": 462 }, { "clip_ratio/high_max": 0.002100137746310793, "clip_ratio/high_mean": 0.0009202267210639548, "clip_ratio/low_mean": 0.00068907181139366, "clip_ratio/low_min": 3.9949813071871176e-05, "clip_ratio/region_mean": 0.0016092985097202472, "epoch": 4.737026239067055, "grad_norm": 0.13316386938095093, "learning_rate": 5e-07, "loss": 0.0127, "step": 463 }, { "clip_ratio/high_max": 0.0021984396371408366, "clip_ratio/high_mean": 0.0008450617524431436, "clip_ratio/low_mean": 0.0006838976187282242, "clip_ratio/low_min": 4.495206212595804e-05, "clip_ratio/region_mean": 0.0015289593393390533, "epoch": 4.746355685131196, "grad_norm": 0.11983081698417664, "learning_rate": 5e-07, "loss": -0.0087, "step": 464 }, { "clip_ratio/high_max": 0.002141573975677602, "clip_ratio/high_mean": 0.0009485814443905838, "clip_ratio/low_mean": 0.0008795427638688125, "clip_ratio/low_min": 7.669733440707205e-05, "clip_ratio/region_mean": 0.0018281242155353539, "epoch": 4.755685131195335, "grad_norm": 0.12710651755332947, "learning_rate": 5e-07, "loss": -0.0047, "step": 465 }, { "clip_ratio/high_max": 0.0019553861748136114, "clip_ratio/high_mean": 0.0007938556700537447, "clip_ratio/low_mean": 0.0008544841111870483, "clip_ratio/low_min": 4.513207113632234e-05, "clip_ratio/region_mean": 0.0016483398067066446, "epoch": 4.765014577259475, "grad_norm": 0.19662417471408844, "learning_rate": 5e-07, "loss": -0.0206, "step": 466 }, { "clip_ratio/high_max": 0.0021562992042163387, "clip_ratio/high_mean": 0.0008550553957320517, "clip_ratio/low_mean": 0.0010285750322509557, "clip_ratio/low_min": 5.389004036260303e-05, "clip_ratio/region_mean": 0.0018836304734577425, "epoch": 4.774344023323615, "grad_norm": 0.1259915977716446, "learning_rate": 5e-07, "loss": 0.0371, "step": 467 }, { "clip_ratio/high_max": 0.0020953909370291512, "clip_ratio/high_mean": 0.000835101584016229, "clip_ratio/low_mean": 0.0008578447395848343, "clip_ratio/low_min": 3.913655018550344e-05, "clip_ratio/region_mean": 0.0016929463017731905, "epoch": 4.783673469387755, "grad_norm": 0.1271364539861679, "learning_rate": 5e-07, "loss": -0.0008, "step": 468 }, { "clip_ratio/high_max": 0.0020887958889943548, "clip_ratio/high_mean": 0.0009408393207195331, "clip_ratio/low_mean": 0.0008038483392738272, "clip_ratio/low_min": 3.216685217921622e-05, "clip_ratio/region_mean": 0.0017446876590838656, "epoch": 4.793002915451895, "grad_norm": 0.13549822568893433, "learning_rate": 5e-07, "loss": -0.0099, "step": 469 }, { "clip_ratio/high_max": 0.0024922741213231348, "clip_ratio/high_mean": 0.000865196740051033, "clip_ratio/low_mean": 0.0008552013714506757, "clip_ratio/low_min": 1.3551604752137791e-05, "clip_ratio/region_mean": 0.0017203981187776662, "epoch": 4.802332361516035, "grad_norm": 0.1312236785888672, "learning_rate": 5e-07, "loss": 0.0228, "step": 470 }, { "clip_ratio/high_max": 0.0020389831624925137, "clip_ratio/high_mean": 0.0008881742232915713, "clip_ratio/low_mean": 0.0008815491546556586, "clip_ratio/low_min": 5.31316200067522e-05, "clip_ratio/region_mean": 0.00176972337794723, "epoch": 4.811661807580175, "grad_norm": 0.1314009428024292, "learning_rate": 5e-07, "loss": -0.0228, "step": 471 }, { "clip_ratio/high_max": 0.002474597204127349, "clip_ratio/high_mean": 0.0008964410135376966, "clip_ratio/low_mean": 0.0009810764204303268, "clip_ratio/low_min": 9.569949725118931e-05, "clip_ratio/region_mean": 0.001877517490356695, "epoch": 4.820991253644315, "grad_norm": 0.1271851360797882, "learning_rate": 5e-07, "loss": 0.0295, "step": 472 }, { "clip_ratio/high_max": 0.002184415629017167, "clip_ratio/high_mean": 0.000958494478254579, "clip_ratio/low_mean": 0.0009115305729210377, "clip_ratio/low_min": 5.892269837204367e-05, "clip_ratio/region_mean": 0.0018700250730034895, "epoch": 4.830320699708455, "grad_norm": 0.11777620017528534, "learning_rate": 5e-07, "loss": -0.0245, "step": 473 }, { "clip_ratio/high_max": 0.0019927767825720366, "clip_ratio/high_mean": 0.0008435399195150239, "clip_ratio/low_mean": 0.000922443401577766, "clip_ratio/low_min": 8.3709063801507e-05, "clip_ratio/region_mean": 0.0017659832956269383, "epoch": 4.839650145772595, "grad_norm": 0.13246919214725494, "learning_rate": 5e-07, "loss": 0.0082, "step": 474 }, { "clip_ratio/high_max": 0.0018465589892002754, "clip_ratio/high_mean": 0.0007768582454446005, "clip_ratio/low_mean": 0.0009516665159026161, "clip_ratio/low_min": 8.733660979487468e-05, "clip_ratio/region_mean": 0.0017285247813561, "epoch": 4.848979591836734, "grad_norm": 0.12691669166088104, "learning_rate": 5e-07, "loss": 0.0168, "step": 475 }, { "clip_ratio/high_max": 0.001749836839735508, "clip_ratio/high_mean": 0.0007606961353303632, "clip_ratio/low_mean": 0.0009362451273773331, "clip_ratio/low_min": 5.2484141633613035e-05, "clip_ratio/region_mean": 0.0016969412245089188, "epoch": 4.858309037900875, "grad_norm": 0.12853863835334778, "learning_rate": 5e-07, "loss": -0.0054, "step": 476 }, { "clip_ratio/high_max": 0.002105704948917264, "clip_ratio/high_mean": 0.0007867604017519625, "clip_ratio/low_mean": 0.000929565961996559, "clip_ratio/low_min": 7.290719440788962e-05, "clip_ratio/region_mean": 0.0017163263764814474, "epoch": 4.867638483965014, "grad_norm": 0.12546277046203613, "learning_rate": 5e-07, "loss": 0.0235, "step": 477 }, { "clip_ratio/high_max": 0.0026045719205285423, "clip_ratio/high_mean": 0.0009636410577513743, "clip_ratio/low_mean": 0.0009489142739766976, "clip_ratio/low_min": 9.571609462000197e-05, "clip_ratio/region_mean": 0.001912555359012913, "epoch": 4.876967930029155, "grad_norm": 0.143890842795372, "learning_rate": 5e-07, "loss": 0.0019, "step": 478 }, { "clip_ratio/high_max": 0.0019164484256180003, "clip_ratio/high_mean": 0.0008429658028035192, "clip_ratio/low_mean": 0.001013290740957018, "clip_ratio/low_min": 2.9809943043801468e-05, "clip_ratio/region_mean": 0.0018562565455795266, "epoch": 4.886297376093294, "grad_norm": 0.12975412607192993, "learning_rate": 5e-07, "loss": 0.0211, "step": 479 }, { "clip_ratio/high_max": 0.002023090550210327, "clip_ratio/high_mean": 0.0008278234345198143, "clip_ratio/low_mean": 0.0010644006142683793, "clip_ratio/low_min": 7.133808503567707e-05, "clip_ratio/region_mean": 0.0018922240487881936, "epoch": 4.895626822157435, "grad_norm": 0.1249682754278183, "learning_rate": 5e-07, "loss": 0.0368, "step": 480 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0221470424107143, "completions/max_length": 4096.0, "completions/max_terminated_length": 4095.0, "completions/mean_length": 651.0966796875, "completions/mean_terminated_length": 573.0743408203125, "completions/min_length": 3.0, "completions/min_terminated_length": 3.0, "epoch": 5.0093294460641395, "grad_norm": 0.14227749407291412, "learning_rate": 5e-07, "loss": 0.0492, "num_tokens": 301320208.0, "reward": 0.5865653157234192, "reward_std": 0.18675154447555542, "rewards/simpleverify_reward/mean": 0.5865653157234192, "rewards/simpleverify_reward/std": 0.4924580454826355, "step": 481 }, { "clip_ratio/high_max": 0.0020721136752399616, "clip_ratio/high_mean": 0.0007697474502492696, "clip_ratio/low_mean": 0.0005560868630709592, "clip_ratio/low_min": 1.5439723938470706e-05, "clip_ratio/region_mean": 0.0013258343351481017, "epoch": 5.01865889212828, "grad_norm": 0.11371316015720367, "learning_rate": 5e-07, "loss": 0.0063, "step": 482 }, { "clip_ratio/high_max": 0.001912576386530418, "clip_ratio/high_mean": 0.0007300738616322633, "clip_ratio/low_mean": 0.0005859073171450291, "clip_ratio/low_min": 9.834775482886471e-06, "clip_ratio/region_mean": 0.0013159811896912288, "epoch": 5.0279883381924195, "grad_norm": 0.12221777439117432, "learning_rate": 5e-07, "loss": -0.0247, "step": 483 }, { "clip_ratio/high_max": 0.001980648969038157, "clip_ratio/high_mean": 0.0008048721374507295, "clip_ratio/low_mean": 0.0005524147036339855, "clip_ratio/low_min": 5.9261624301143456e-05, "clip_ratio/region_mean": 0.0013572868629125878, "epoch": 5.03731778425656, "grad_norm": 0.13951541483402252, "learning_rate": 5e-07, "loss": 0.0249, "step": 484 }, { "clip_ratio/high_max": 0.0016672820820531342, "clip_ratio/high_mean": 0.0006801252347941045, "clip_ratio/low_mean": 0.0005808063415315701, "clip_ratio/low_min": 1.7605634639039636e-05, "clip_ratio/region_mean": 0.0012609316108864732, "epoch": 5.0466472303206995, "grad_norm": 0.1197991818189621, "learning_rate": 5e-07, "loss": 0.025, "step": 485 }, { "clip_ratio/high_max": 0.0018517736680223607, "clip_ratio/high_mean": 0.000795468979049474, "clip_ratio/low_mean": 0.0005832754864059098, "clip_ratio/low_min": 1.9822391550405882e-05, "clip_ratio/region_mean": 0.0013787444913759828, "epoch": 5.05597667638484, "grad_norm": 0.1191205158829689, "learning_rate": 5e-07, "loss": -0.0249, "step": 486 }, { "clip_ratio/high_max": 0.0019517745458870195, "clip_ratio/high_mean": 0.0008000829002412502, "clip_ratio/low_mean": 0.000551348033695831, "clip_ratio/low_min": 1.2552721273095813e-05, "clip_ratio/region_mean": 0.0013514309430320282, "epoch": 5.0653061224489795, "grad_norm": 0.1214328482747078, "learning_rate": 5e-07, "loss": -0.007, "step": 487 }, { "clip_ratio/high_max": 0.001805867846996989, "clip_ratio/high_mean": 0.000846155378894764, "clip_ratio/low_mean": 0.0006290824594543665, "clip_ratio/low_min": 3.312016087875236e-05, "clip_ratio/region_mean": 0.0014752378592675086, "epoch": 5.07463556851312, "grad_norm": 0.13187570869922638, "learning_rate": 5e-07, "loss": -0.0153, "step": 488 }, { "clip_ratio/high_max": 0.0018021935502474662, "clip_ratio/high_mean": 0.0006288247714110184, "clip_ratio/low_mean": 0.0006515828154078918, "clip_ratio/low_min": 8.869003340805648e-05, "clip_ratio/region_mean": 0.0012804075849999208, "epoch": 5.0839650145772595, "grad_norm": 0.12324228882789612, "learning_rate": 5e-07, "loss": 0.0254, "step": 489 }, { "clip_ratio/high_max": 0.0022587066050618887, "clip_ratio/high_mean": 0.0009108105114137288, "clip_ratio/low_mean": 0.0005383617281040642, "clip_ratio/low_min": 4.370366150396876e-05, "clip_ratio/region_mean": 0.001449172184948111, "epoch": 5.093294460641399, "grad_norm": 0.12684619426727295, "learning_rate": 5e-07, "loss": -0.0325, "step": 490 }, { "clip_ratio/high_max": 0.0015641900026821531, "clip_ratio/high_mean": 0.0007337381794059183, "clip_ratio/low_mean": 0.0007827592598914634, "clip_ratio/low_min": 4.0973847717395984e-05, "clip_ratio/region_mean": 0.0015164974392973818, "epoch": 5.1026239067055394, "grad_norm": 0.12216655164957047, "learning_rate": 5e-07, "loss": 0.0156, "step": 491 }, { "clip_ratio/high_max": 0.002160827920306474, "clip_ratio/high_mean": 0.0008584248716942966, "clip_ratio/low_mean": 0.0006362482799886493, "clip_ratio/low_min": 2.666951149876695e-05, "clip_ratio/region_mean": 0.001494673160777893, "epoch": 5.111953352769679, "grad_norm": 0.11905211955308914, "learning_rate": 5e-07, "loss": 0.0166, "step": 492 }, { "clip_ratio/high_max": 0.00209060862835031, "clip_ratio/high_mean": 0.0008688719517522259, "clip_ratio/low_mean": 0.0006697180888295406, "clip_ratio/low_min": 5.078163303551264e-05, "clip_ratio/region_mean": 0.0015385900405817665, "epoch": 5.121282798833819, "grad_norm": 0.12893009185791016, "learning_rate": 5e-07, "loss": 0.008, "step": 493 }, { "clip_ratio/high_max": 0.0019212378465454094, "clip_ratio/high_mean": 0.0007819591810402926, "clip_ratio/low_mean": 0.0006920155465195421, "clip_ratio/low_min": 4.8567559133516625e-05, "clip_ratio/region_mean": 0.0014739747057319619, "epoch": 5.130612244897959, "grad_norm": 0.13720732927322388, "learning_rate": 5e-07, "loss": -0.0044, "step": 494 }, { "clip_ratio/high_max": 0.0019198472691641655, "clip_ratio/high_mean": 0.0007530285474786069, "clip_ratio/low_mean": 0.0007759917752991896, "clip_ratio/low_min": 3.071506034757476e-05, "clip_ratio/region_mean": 0.0015290203373297118, "epoch": 5.139941690962099, "grad_norm": 0.13620974123477936, "learning_rate": 5e-07, "loss": -0.0127, "step": 495 }, { "clip_ratio/high_max": 0.001816354564653011, "clip_ratio/high_mean": 0.0007553374334747787, "clip_ratio/low_mean": 0.0007581180325360037, "clip_ratio/low_min": 6.132876478659455e-05, "clip_ratio/region_mean": 0.001513455448730383, "epoch": 5.149271137026239, "grad_norm": 0.19053654372692108, "learning_rate": 5e-07, "loss": 0.0204, "step": 496 }, { "clip_ratio/high_max": 0.002035702385910554, "clip_ratio/high_mean": 0.0009472122583247256, "clip_ratio/low_mean": 0.0006754170462954789, "clip_ratio/low_min": 2.1851637939107604e-05, "clip_ratio/region_mean": 0.0016226293082581833, "epoch": 5.158600583090379, "grad_norm": 0.11475814878940582, "learning_rate": 5e-07, "loss": -0.018, "step": 497 }, { "clip_ratio/high_max": 0.0018256586845382117, "clip_ratio/high_mean": 0.000824503309559077, "clip_ratio/low_mean": 0.0008365770572709152, "clip_ratio/low_min": 6.023105925123673e-05, "clip_ratio/region_mean": 0.0016610803722869605, "epoch": 5.167930029154519, "grad_norm": 0.1240600198507309, "learning_rate": 5e-07, "loss": 0.0136, "step": 498 }, { "clip_ratio/high_max": 0.001950200901774224, "clip_ratio/high_mean": 0.0008545933815184981, "clip_ratio/low_mean": 0.0008951486652222229, "clip_ratio/low_min": 0.0001318327686021803, "clip_ratio/region_mean": 0.0017497420849394985, "epoch": 5.1772594752186585, "grad_norm": 0.16396501660346985, "learning_rate": 5e-07, "loss": 0.0036, "step": 499 }, { "clip_ratio/high_max": 0.00182327348011313, "clip_ratio/high_mean": 0.0008284535506390966, "clip_ratio/low_mean": 0.0008835345506668091, "clip_ratio/low_min": 5.134160710440483e-05, "clip_ratio/region_mean": 0.0017119881304097362, "epoch": 5.186588921282799, "grad_norm": 0.13015493750572205, "learning_rate": 5e-07, "loss": -0.0022, "step": 500 }, { "clip_ratio/high_max": 0.0018456240541127045, "clip_ratio/high_mean": 0.0007374058586719912, "clip_ratio/low_mean": 0.000812286967629916, "clip_ratio/low_min": 4.583981535688508e-05, "clip_ratio/region_mean": 0.0015496928681386635, "epoch": 5.1959183673469385, "grad_norm": 0.12784874439239502, "learning_rate": 5e-07, "loss": 0.0206, "step": 501 }, { "clip_ratio/high_max": 0.002066002372885123, "clip_ratio/high_mean": 0.0008040987140702782, "clip_ratio/low_mean": 0.0008565694697608706, "clip_ratio/low_min": 5.681757738784654e-05, "clip_ratio/region_mean": 0.0016606682183919474, "epoch": 5.205247813411079, "grad_norm": 0.1350013166666031, "learning_rate": 5e-07, "loss": 0.0089, "step": 502 }, { "clip_ratio/high_max": 0.0021843631111551076, "clip_ratio/high_mean": 0.0009551645744068082, "clip_ratio/low_mean": 0.0008249053626059322, "clip_ratio/low_min": 2.730450069066137e-05, "clip_ratio/region_mean": 0.001780069953383645, "epoch": 5.214577259475218, "grad_norm": 0.1382187306880951, "learning_rate": 5e-07, "loss": -0.0483, "step": 503 }, { "clip_ratio/high_max": 0.0019721593598660547, "clip_ratio/high_mean": 0.0007745533093839185, "clip_ratio/low_mean": 0.0010443954151924117, "clip_ratio/low_min": 5.2648165365098976e-05, "clip_ratio/region_mean": 0.00181894877096056, "epoch": 5.223906705539359, "grad_norm": 0.120248943567276, "learning_rate": 5e-07, "loss": 0.0402, "step": 504 }, { "clip_ratio/high_max": 0.001960013174539199, "clip_ratio/high_mean": 0.000744179018511204, "clip_ratio/low_mean": 0.0007854207360651344, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001529599743662402, "epoch": 5.233236151603498, "grad_norm": 0.10076788812875748, "learning_rate": 5e-07, "loss": -0.0213, "step": 505 }, { "clip_ratio/high_max": 0.0020479266640904825, "clip_ratio/high_mean": 0.000769593141740188, "clip_ratio/low_mean": 0.0009359454397781519, "clip_ratio/low_min": 9.104684068006463e-05, "clip_ratio/region_mean": 0.001705538590613287, "epoch": 5.242565597667639, "grad_norm": 0.11219361424446106, "learning_rate": 5e-07, "loss": 0.0304, "step": 506 }, { "clip_ratio/high_max": 0.0019024582506972365, "clip_ratio/high_mean": 0.0007195085818239022, "clip_ratio/low_mean": 0.0009310593941336265, "clip_ratio/low_min": 0.00011185333642060868, "clip_ratio/region_mean": 0.001650567977776518, "epoch": 5.251895043731778, "grad_norm": 0.12195122241973877, "learning_rate": 5e-07, "loss": 0.026, "step": 507 }, { "clip_ratio/high_max": 0.0018863191071432084, "clip_ratio/high_mean": 0.0008335168622579658, "clip_ratio/low_mean": 0.0008388433798245387, "clip_ratio/low_min": 7.837392331566662e-05, "clip_ratio/region_mean": 0.0016723602675483562, "epoch": 5.261224489795918, "grad_norm": 0.1267365664243698, "learning_rate": 5e-07, "loss": -0.0229, "step": 508 }, { "clip_ratio/high_max": 0.002199983755417634, "clip_ratio/high_mean": 0.0008958636626630323, "clip_ratio/low_mean": 0.0008177479612641037, "clip_ratio/low_min": 2.8533463591884356e-05, "clip_ratio/region_mean": 0.0017136115857283585, "epoch": 5.270553935860058, "grad_norm": 0.13057278096675873, "learning_rate": 5e-07, "loss": -0.0389, "step": 509 }, { "clip_ratio/high_max": 0.0019591986856539734, "clip_ratio/high_mean": 0.0008255781813204521, "clip_ratio/low_mean": 0.0008566299638914643, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001682208152487874, "epoch": 5.279883381924198, "grad_norm": 0.11141727864742279, "learning_rate": 5e-07, "loss": -0.003, "step": 510 }, { "clip_ratio/high_max": 0.0017905569020513212, "clip_ratio/high_mean": 0.0007902456288775284, "clip_ratio/low_mean": 0.0008679087586642709, "clip_ratio/low_min": 2.649418092914857e-05, "clip_ratio/region_mean": 0.0016581544186919928, "epoch": 5.289212827988338, "grad_norm": 0.1174594983458519, "learning_rate": 5e-07, "loss": 0.0132, "step": 511 }, { "clip_ratio/high_max": 0.0018171828050981276, "clip_ratio/high_mean": 0.0008148354554577963, "clip_ratio/low_mean": 0.0008082895692496095, "clip_ratio/low_min": 2.7448397304397076e-05, "clip_ratio/region_mean": 0.0016231249974225648, "epoch": 5.298542274052478, "grad_norm": 0.1260427087545395, "learning_rate": 5e-07, "loss": -0.0272, "step": 512 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0238560267857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 4088.0, "completions/mean_length": 656.0258178710938, "completions/mean_terminated_length": 571.9561767578125, "completions/min_length": 5.0, "completions/min_terminated_length": 5.0, "epoch": 5.307871720116618, "grad_norm": 0.12293235212564468, "learning_rate": 5e-07, "loss": 0.0408, "num_tokens": 320203525.0, "reward": 0.5856584906578064, "reward_std": 0.1836635321378708, "rewards/simpleverify_reward/mean": 0.5856584906578064, "rewards/simpleverify_reward/std": 0.49261659383773804, "step": 513 }, { "clip_ratio/high_max": 0.0019642410552478395, "clip_ratio/high_mean": 0.0007682883169763954, "clip_ratio/low_mean": 0.0005577765050475136, "clip_ratio/low_min": 2.5463900783506688e-05, "clip_ratio/region_mean": 0.001326064837485319, "epoch": 5.317201166180758, "grad_norm": 0.11803028732538223, "learning_rate": 5e-07, "loss": -0.0016, "step": 514 }, { "clip_ratio/high_max": 0.0017833998681453522, "clip_ratio/high_mean": 0.0008229149298131233, "clip_ratio/low_mean": 0.0005711965804948704, "clip_ratio/low_min": 1.3957123883301392e-05, "clip_ratio/region_mean": 0.001394111484842142, "epoch": 5.326530612244898, "grad_norm": 0.12738896906375885, "learning_rate": 5e-07, "loss": -0.0225, "step": 515 }, { "clip_ratio/high_max": 0.001949180423252983, "clip_ratio/high_mean": 0.0006973346025915816, "clip_ratio/low_mean": 0.0005385087019931234, "clip_ratio/low_min": 1.710454307612963e-05, "clip_ratio/region_mean": 0.0012358432832115795, "epoch": 5.335860058309038, "grad_norm": 0.13244758546352386, "learning_rate": 5e-07, "loss": 0.0284, "step": 516 }, { "clip_ratio/high_max": 0.002177193258830812, "clip_ratio/high_mean": 0.0009168542965198867, "clip_ratio/low_mean": 0.0005118728349771118, "clip_ratio/low_min": 3.7508702007471584e-05, "clip_ratio/region_mean": 0.0014287271405919455, "epoch": 5.345189504373177, "grad_norm": 0.1344238519668579, "learning_rate": 5e-07, "loss": -0.0516, "step": 517 }, { "clip_ratio/high_max": 0.0019214810599805787, "clip_ratio/high_mean": 0.0007409544869005913, "clip_ratio/low_mean": 0.0006163562193250982, "clip_ratio/low_min": 8.87367750692647e-05, "clip_ratio/region_mean": 0.00135731073532952, "epoch": 5.354518950437318, "grad_norm": 0.12372617423534393, "learning_rate": 5e-07, "loss": 0.0496, "step": 518 }, { "clip_ratio/high_max": 0.0018768218433251604, "clip_ratio/high_mean": 0.0007655978115508333, "clip_ratio/low_mean": 0.0004913290722470265, "clip_ratio/low_min": 1.190249440696789e-05, "clip_ratio/region_mean": 0.0012569268910738174, "epoch": 5.363848396501457, "grad_norm": 0.12876088917255402, "learning_rate": 5e-07, "loss": -0.0167, "step": 519 }, { "clip_ratio/high_max": 0.001699558153632097, "clip_ratio/high_mean": 0.0006902454497321742, "clip_ratio/low_mean": 0.0006121850765339332, "clip_ratio/low_min": 1.3083525118418038e-05, "clip_ratio/region_mean": 0.0013024305189901497, "epoch": 5.373177842565598, "grad_norm": 0.12782542407512665, "learning_rate": 5e-07, "loss": 0.0181, "step": 520 }, { "clip_ratio/high_max": 0.001871196465799585, "clip_ratio/high_mean": 0.0007965621334733441, "clip_ratio/low_mean": 0.0005917618263993063, "clip_ratio/low_min": 2.893056171160424e-05, "clip_ratio/region_mean": 0.0013883239516871981, "epoch": 5.382507288629737, "grad_norm": 0.12284059077501297, "learning_rate": 5e-07, "loss": -0.0082, "step": 521 }, { "clip_ratio/high_max": 0.002033664073678665, "clip_ratio/high_mean": 0.0008054644185904181, "clip_ratio/low_mean": 0.0005627255122817587, "clip_ratio/low_min": 2.2893773348187096e-05, "clip_ratio/region_mean": 0.0013681899436051026, "epoch": 5.391836734693878, "grad_norm": 0.12365730106830597, "learning_rate": 5e-07, "loss": -0.0068, "step": 522 }, { "clip_ratio/high_max": 0.0017699455784168094, "clip_ratio/high_mean": 0.000789091154729249, "clip_ratio/low_mean": 0.0007234800505102612, "clip_ratio/low_min": 8.228000115195755e-05, "clip_ratio/region_mean": 0.0015125712379813194, "epoch": 5.401166180758017, "grad_norm": 0.12097049504518509, "learning_rate": 5e-07, "loss": 0.0223, "step": 523 }, { "clip_ratio/high_max": 0.0018543958285590634, "clip_ratio/high_mean": 0.000751684880015091, "clip_ratio/low_mean": 0.00060665618366329, "clip_ratio/low_min": 9.966564721253235e-05, "clip_ratio/region_mean": 0.0013583410691353492, "epoch": 5.410495626822158, "grad_norm": 0.12303454428911209, "learning_rate": 5e-07, "loss": -0.0162, "step": 524 }, { "clip_ratio/high_max": 0.001665137053350918, "clip_ratio/high_mean": 0.0007217788224807009, "clip_ratio/low_mean": 0.0007369023715000367, "clip_ratio/low_min": 3.574903166736476e-05, "clip_ratio/region_mean": 0.0014586812103516422, "epoch": 5.419825072886297, "grad_norm": 0.11272701621055603, "learning_rate": 5e-07, "loss": 0.0087, "step": 525 }, { "clip_ratio/high_max": 0.002492884454113664, "clip_ratio/high_mean": 0.0008377370977541432, "clip_ratio/low_mean": 0.0006963856067159213, "clip_ratio/low_min": 1.92307688848814e-05, "clip_ratio/region_mean": 0.001534122715384001, "epoch": 5.429154518950437, "grad_norm": 0.1215175911784172, "learning_rate": 5e-07, "loss": -0.0054, "step": 526 }, { "clip_ratio/high_max": 0.0019127663545077667, "clip_ratio/high_mean": 0.0007522906216763658, "clip_ratio/low_mean": 0.0006988623517827364, "clip_ratio/low_min": 8.226722820836585e-05, "clip_ratio/region_mean": 0.0014511529843730386, "epoch": 5.438483965014577, "grad_norm": 0.12053031474351883, "learning_rate": 5e-07, "loss": 0.0154, "step": 527 }, { "clip_ratio/high_max": 0.0021414435395854525, "clip_ratio/high_mean": 0.0008441325626336038, "clip_ratio/low_mean": 0.0006807974868934252, "clip_ratio/low_min": 1.5060240912134759e-05, "clip_ratio/region_mean": 0.0015249300304276403, "epoch": 5.447813411078717, "grad_norm": 0.12031996995210648, "learning_rate": 5e-07, "loss": 0.0026, "step": 528 }, { "clip_ratio/high_max": 0.002175284724216908, "clip_ratio/high_mean": 0.0008716531938262051, "clip_ratio/low_mean": 0.0007416726475639734, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0016133258104673587, "epoch": 5.457142857142857, "grad_norm": 0.1330840289592743, "learning_rate": 5e-07, "loss": 0.0064, "step": 529 }, { "clip_ratio/high_max": 0.001879553237813525, "clip_ratio/high_mean": 0.0008289276956929825, "clip_ratio/low_mean": 0.0006666787830909016, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001495606469688937, "epoch": 5.466472303206997, "grad_norm": 0.1261214315891266, "learning_rate": 5e-07, "loss": -0.0186, "step": 530 }, { "clip_ratio/high_max": 0.0020207956586091314, "clip_ratio/high_mean": 0.000762684296205407, "clip_ratio/low_mean": 0.0006962353127164533, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001458919625292765, "epoch": 5.475801749271137, "grad_norm": 0.11765404790639877, "learning_rate": 5e-07, "loss": 0.0015, "step": 531 }, { "clip_ratio/high_max": 0.0020067860168637708, "clip_ratio/high_mean": 0.000801641264843056, "clip_ratio/low_mean": 0.0006482761709776241, "clip_ratio/low_min": 2.5265570911869872e-05, "clip_ratio/region_mean": 0.0014499174358206801, "epoch": 5.485131195335277, "grad_norm": 0.12586121261119843, "learning_rate": 5e-07, "loss": 0.005, "step": 532 }, { "clip_ratio/high_max": 0.0019560561995604075, "clip_ratio/high_mean": 0.0007940435934870038, "clip_ratio/low_mean": 0.0009121457860601367, "clip_ratio/low_min": 0.0001065513361027115, "clip_ratio/region_mean": 0.001706189417745918, "epoch": 5.494460641399417, "grad_norm": 0.11965102702379227, "learning_rate": 5e-07, "loss": 0.0114, "step": 533 }, { "clip_ratio/high_max": 0.002280754422827158, "clip_ratio/high_mean": 0.0009250249313481618, "clip_ratio/low_mean": 0.0008283258423489315, "clip_ratio/low_min": 1.6382700778194703e-05, "clip_ratio/region_mean": 0.0017533507270854898, "epoch": 5.503790087463557, "grad_norm": 0.12290887534618378, "learning_rate": 5e-07, "loss": 0.0247, "step": 534 }, { "clip_ratio/high_max": 0.0018036572109849658, "clip_ratio/high_mean": 0.0007371420078925439, "clip_ratio/low_mean": 0.0006236534236450098, "clip_ratio/low_min": 9.180375855066814e-06, "clip_ratio/region_mean": 0.0013607954097096808, "epoch": 5.513119533527696, "grad_norm": 0.12702606618404388, "learning_rate": 5e-07, "loss": -0.0366, "step": 535 }, { "clip_ratio/high_max": 0.0021112746035214514, "clip_ratio/high_mean": 0.000810953712061746, "clip_ratio/low_mean": 0.0008088535014394438, "clip_ratio/low_min": 3.45375756296562e-05, "clip_ratio/region_mean": 0.0016198071971302852, "epoch": 5.522448979591837, "grad_norm": 0.11984357237815857, "learning_rate": 5e-07, "loss": 0.0142, "step": 536 }, { "clip_ratio/high_max": 0.0023610934549651574, "clip_ratio/high_mean": 0.0009637845687393565, "clip_ratio/low_mean": 0.0008263869240181521, "clip_ratio/low_min": 8.839275869831908e-05, "clip_ratio/region_mean": 0.0017901714891195297, "epoch": 5.531778425655976, "grad_norm": 0.11964539438486099, "learning_rate": 5e-07, "loss": -0.0287, "step": 537 }, { "clip_ratio/high_max": 0.0022820798549219035, "clip_ratio/high_mean": 0.0009540870123601053, "clip_ratio/low_mean": 0.000794811332525569, "clip_ratio/low_min": 9.936993956216611e-05, "clip_ratio/region_mean": 0.0017488983576186001, "epoch": 5.541107871720117, "grad_norm": 0.1394490897655487, "learning_rate": 5e-07, "loss": -0.0266, "step": 538 }, { "clip_ratio/high_max": 0.0020352073406684212, "clip_ratio/high_mean": 0.0008240972510975553, "clip_ratio/low_mean": 0.0008569014426029753, "clip_ratio/low_min": 0.00012755031275446527, "clip_ratio/region_mean": 0.0016809987209853716, "epoch": 5.550437317784256, "grad_norm": 0.13986925780773163, "learning_rate": 5e-07, "loss": 0.0376, "step": 539 }, { "clip_ratio/high_max": 0.001966376326890895, "clip_ratio/high_mean": 0.0008069754348980496, "clip_ratio/low_mean": 0.000777008226577891, "clip_ratio/low_min": 2.663947088876739e-05, "clip_ratio/region_mean": 0.0015839836851228029, "epoch": 5.559766763848397, "grad_norm": 0.12896405160427094, "learning_rate": 5e-07, "loss": -0.0054, "step": 540 }, { "clip_ratio/high_max": 0.0022013274574419484, "clip_ratio/high_mean": 0.0009247100770153338, "clip_ratio/low_mean": 0.0008004844748938922, "clip_ratio/low_min": 3.5849605410476215e-05, "clip_ratio/region_mean": 0.0017251945464522578, "epoch": 5.569096209912536, "grad_norm": 0.12394282221794128, "learning_rate": 5e-07, "loss": -0.0133, "step": 541 }, { "clip_ratio/high_max": 0.0023509712700615637, "clip_ratio/high_mean": 0.000918090037885122, "clip_ratio/low_mean": 0.0008322380126628559, "clip_ratio/low_min": 6.732651127094869e-05, "clip_ratio/region_mean": 0.001750328046909999, "epoch": 5.578425655976677, "grad_norm": 0.13442008197307587, "learning_rate": 5e-07, "loss": -0.007, "step": 542 }, { "clip_ratio/high_max": 0.002015518519328907, "clip_ratio/high_mean": 0.0008090567971521523, "clip_ratio/low_mean": 0.0008081086562015116, "clip_ratio/low_min": 3.3134266232082155e-05, "clip_ratio/region_mean": 0.001617165431525791, "epoch": 5.587755102040816, "grad_norm": 0.11133599281311035, "learning_rate": 5e-07, "loss": 0.0052, "step": 543 }, { "clip_ratio/high_max": 0.00199684091785457, "clip_ratio/high_mean": 0.0008705170002940577, "clip_ratio/low_mean": 0.0007722383452346548, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0016427553637186065, "epoch": 5.597084548104956, "grad_norm": 0.12899178266525269, "learning_rate": 5e-07, "loss": -0.0021, "step": 544 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.021379743303571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 4067.0, "completions/mean_length": 643.1791381835938, "completions/mean_terminated_length": 567.7460327148438, "completions/min_length": 93.0, "completions/min_terminated_length": 93.0, "epoch": 5.606413994169096, "grad_norm": 0.1334053874015808, "learning_rate": 5e-07, "loss": 0.0248, "num_tokens": 339011830.0, "reward": 0.607735812664032, "reward_std": 0.18788620829582214, "rewards/simpleverify_reward/mean": 0.6077357530593872, "rewards/simpleverify_reward/std": 0.48826363682746887, "step": 545 }, { "clip_ratio/high_max": 0.002082285347569268, "clip_ratio/high_mean": 0.0008271069200418424, "clip_ratio/low_mean": 0.0005989291730656987, "clip_ratio/low_min": 6.233880867512198e-05, "clip_ratio/region_mean": 0.0014260361131164245, "epoch": 5.615743440233236, "grad_norm": 0.12878137826919556, "learning_rate": 5e-07, "loss": -0.0041, "step": 546 }, { "clip_ratio/high_max": 0.0021064399770693853, "clip_ratio/high_mean": 0.0008432689373876201, "clip_ratio/low_mean": 0.0004755636036861688, "clip_ratio/low_min": 1.055030406860169e-05, "clip_ratio/region_mean": 0.0013188325428927783, "epoch": 5.625072886297376, "grad_norm": 0.12197627127170563, "learning_rate": 5e-07, "loss": -0.0108, "step": 547 }, { "clip_ratio/high_max": 0.0019448067978373729, "clip_ratio/high_mean": 0.0008356680009455886, "clip_ratio/low_mean": 0.0005614524452539627, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013971204389235936, "epoch": 5.634402332361516, "grad_norm": 0.12095198780298233, "learning_rate": 5e-07, "loss": 0.0051, "step": 548 }, { "clip_ratio/high_max": 0.00202586296654772, "clip_ratio/high_mean": 0.0009351628305012127, "clip_ratio/low_mean": 0.000511776260282204, "clip_ratio/low_min": 1.4175549949868582e-05, "clip_ratio/region_mean": 0.0014469391098828055, "epoch": 5.643731778425656, "grad_norm": 0.12609301507472992, "learning_rate": 5e-07, "loss": -0.0191, "step": 549 }, { "clip_ratio/high_max": 0.002126500694430433, "clip_ratio/high_mean": 0.0008177285799320089, "clip_ratio/low_mean": 0.0005604938478427357, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013782224268652499, "epoch": 5.653061224489796, "grad_norm": 0.13720718026161194, "learning_rate": 5e-07, "loss": 0.0112, "step": 550 }, { "clip_ratio/high_max": 0.002298442217579577, "clip_ratio/high_mean": 0.0009363967437820975, "clip_ratio/low_mean": 0.000512019758389215, "clip_ratio/low_min": 4.508861093199812e-05, "clip_ratio/region_mean": 0.0014484165149042383, "epoch": 5.662390670553936, "grad_norm": 0.12918448448181152, "learning_rate": 5e-07, "loss": -0.022, "step": 551 }, { "clip_ratio/high_max": 0.0018744200424407609, "clip_ratio/high_mean": 0.0008680203573021572, "clip_ratio/low_mean": 0.0006800329447287368, "clip_ratio/low_min": 4.3776078200608026e-05, "clip_ratio/region_mean": 0.0015480532892979681, "epoch": 5.671720116618076, "grad_norm": 0.14416968822479248, "learning_rate": 5e-07, "loss": -0.0409, "step": 552 }, { "clip_ratio/high_max": 0.0016729363251215545, "clip_ratio/high_mean": 0.000668589410452114, "clip_ratio/low_mean": 0.0005515138100236072, "clip_ratio/low_min": 1.2525049896794371e-05, "clip_ratio/region_mean": 0.0012201032113807742, "epoch": 5.681049562682215, "grad_norm": 0.12379064410924911, "learning_rate": 5e-07, "loss": 0.0159, "step": 553 }, { "clip_ratio/high_max": 0.002131918889062945, "clip_ratio/high_mean": 0.0008921394310164033, "clip_ratio/low_mean": 0.0006339939554891316, "clip_ratio/low_min": 2.248333657917101e-05, "clip_ratio/region_mean": 0.0015261333792295773, "epoch": 5.690379008746356, "grad_norm": 0.12850970029830933, "learning_rate": 5e-07, "loss": 0.0078, "step": 554 }, { "clip_ratio/high_max": 0.001965381634363439, "clip_ratio/high_mean": 0.0007535275617556181, "clip_ratio/low_mean": 0.0006170835458760848, "clip_ratio/low_min": 2.720487827900797e-05, "clip_ratio/region_mean": 0.0013706110876228195, "epoch": 5.699708454810495, "grad_norm": 0.12541022896766663, "learning_rate": 5e-07, "loss": 0.0014, "step": 555 }, { "clip_ratio/high_max": 0.0019625873428594787, "clip_ratio/high_mean": 0.0007719148397882236, "clip_ratio/low_mean": 0.0005710573568649124, "clip_ratio/low_min": 1.394466744386591e-05, "clip_ratio/region_mean": 0.0013429722093860619, "epoch": 5.709037900874636, "grad_norm": 0.11864602565765381, "learning_rate": 5e-07, "loss": 0.0012, "step": 556 }, { "clip_ratio/high_max": 0.0019669919347506948, "clip_ratio/high_mean": 0.0008284589966933709, "clip_ratio/low_mean": 0.0007269833804457448, "clip_ratio/low_min": 4.1211512325389776e-05, "clip_ratio/region_mean": 0.001555442409880925, "epoch": 5.718367346938775, "grad_norm": 0.14233919978141785, "learning_rate": 5e-07, "loss": 0.0212, "step": 557 }, { "clip_ratio/high_max": 0.0019074160372838378, "clip_ratio/high_mean": 0.0007899888823885703, "clip_ratio/low_mean": 0.0008022165802685777, "clip_ratio/low_min": 3.542239574017003e-05, "clip_ratio/region_mean": 0.0015922054371912964, "epoch": 5.727696793002916, "grad_norm": 0.12786468863487244, "learning_rate": 5e-07, "loss": 0.0132, "step": 558 }, { "clip_ratio/high_max": 0.0020632306150218938, "clip_ratio/high_mean": 0.0008989738216769183, "clip_ratio/low_mean": 0.0006796833440603223, "clip_ratio/low_min": 6.319540170807159e-05, "clip_ratio/region_mean": 0.0015786572112119757, "epoch": 5.737026239067055, "grad_norm": 0.1301458328962326, "learning_rate": 5e-07, "loss": -0.0282, "step": 559 }, { "clip_ratio/high_max": 0.002001629356527701, "clip_ratio/high_mean": 0.0007838844448997406, "clip_ratio/low_mean": 0.0008018044682103209, "clip_ratio/low_min": 1.5625000742147677e-05, "clip_ratio/region_mean": 0.0015856889003771357, "epoch": 5.746355685131196, "grad_norm": 0.12945948541164398, "learning_rate": 5e-07, "loss": 0.0112, "step": 560 }, { "clip_ratio/high_max": 0.0020644906908273697, "clip_ratio/high_mean": 0.0008466862727800617, "clip_ratio/low_mean": 0.0007468975782103371, "clip_ratio/low_min": 6.771781409042887e-05, "clip_ratio/region_mean": 0.001593583838257473, "epoch": 5.755685131195335, "grad_norm": 0.1158437430858612, "learning_rate": 5e-07, "loss": -0.0133, "step": 561 }, { "clip_ratio/high_max": 0.0021100183585076593, "clip_ratio/high_mean": 0.000936092488700524, "clip_ratio/low_mean": 0.0005457105280584074, "clip_ratio/low_min": 1.2376237464195583e-05, "clip_ratio/region_mean": 0.0014818030249443837, "epoch": 5.765014577259475, "grad_norm": 0.11970330029726028, "learning_rate": 5e-07, "loss": -0.0465, "step": 562 }, { "clip_ratio/high_max": 0.0019257608510088176, "clip_ratio/high_mean": 0.0008103340278466931, "clip_ratio/low_mean": 0.0007599779219162883, "clip_ratio/low_min": 2.7336020139046013e-05, "clip_ratio/region_mean": 0.001570311938849045, "epoch": 5.774344023323615, "grad_norm": 0.11428262293338776, "learning_rate": 5e-07, "loss": 0.0023, "step": 563 }, { "clip_ratio/high_max": 0.002150091495423112, "clip_ratio/high_mean": 0.000926574593904661, "clip_ratio/low_mean": 0.0008002605845831567, "clip_ratio/low_min": 3.8179598050192e-05, "clip_ratio/region_mean": 0.0017268351803068072, "epoch": 5.783673469387755, "grad_norm": 0.14955191314220428, "learning_rate": 5e-07, "loss": 0.0017, "step": 564 }, { "clip_ratio/high_max": 0.002129578009771649, "clip_ratio/high_mean": 0.0009245159781130496, "clip_ratio/low_mean": 0.0007263678598974366, "clip_ratio/low_min": 1.4501159967039712e-05, "clip_ratio/region_mean": 0.001650883841648465, "epoch": 5.793002915451895, "grad_norm": 0.1254526525735855, "learning_rate": 5e-07, "loss": -0.0165, "step": 565 }, { "clip_ratio/high_max": 0.002075710530334618, "clip_ratio/high_mean": 0.000882328073203098, "clip_ratio/low_mean": 0.0009206483136949828, "clip_ratio/low_min": 7.807379279256565e-05, "clip_ratio/region_mean": 0.0018029763450613245, "epoch": 5.802332361516035, "grad_norm": 0.12574465572834015, "learning_rate": 5e-07, "loss": 0.0133, "step": 566 }, { "clip_ratio/high_max": 0.0018510844602133147, "clip_ratio/high_mean": 0.000719018762538326, "clip_ratio/low_mean": 0.000952836900978582, "clip_ratio/low_min": 0.00010179191940551391, "clip_ratio/region_mean": 0.001671855672611855, "epoch": 5.811661807580175, "grad_norm": 0.13247837126255035, "learning_rate": 5e-07, "loss": 0.0303, "step": 567 }, { "clip_ratio/high_max": 0.001916762143082451, "clip_ratio/high_mean": 0.000881643120010267, "clip_ratio/low_mean": 0.0008812985252006911, "clip_ratio/low_min": 4.465679285203805e-05, "clip_ratio/region_mean": 0.0017629416106501594, "epoch": 5.820991253644315, "grad_norm": 0.12308746576309204, "learning_rate": 5e-07, "loss": 0.0104, "step": 568 }, { "clip_ratio/high_max": 0.0017924950552696828, "clip_ratio/high_mean": 0.0007072198241075967, "clip_ratio/low_mean": 0.0009605758714315016, "clip_ratio/low_min": 6.895816113683395e-05, "clip_ratio/region_mean": 0.0016677956664352678, "epoch": 5.830320699708455, "grad_norm": 0.12457672506570816, "learning_rate": 5e-07, "loss": 0.0129, "step": 569 }, { "clip_ratio/high_max": 0.0018222620892629493, "clip_ratio/high_mean": 0.0008277566867036512, "clip_ratio/low_mean": 0.000962347872700775, "clip_ratio/low_min": 0.00015187933149718447, "clip_ratio/region_mean": 0.0017901045212056488, "epoch": 5.839650145772595, "grad_norm": 0.1268325299024582, "learning_rate": 5e-07, "loss": -0.0079, "step": 570 }, { "clip_ratio/high_max": 0.002201547489676159, "clip_ratio/high_mean": 0.0008737643129279604, "clip_ratio/low_mean": 0.000829363324555743, "clip_ratio/low_min": 1.2517524737631902e-05, "clip_ratio/region_mean": 0.0017031276547641028, "epoch": 5.848979591836734, "grad_norm": 0.1315494179725647, "learning_rate": 5e-07, "loss": 0.0122, "step": 571 }, { "clip_ratio/high_max": 0.001926430210005492, "clip_ratio/high_mean": 0.0008959148690337315, "clip_ratio/low_mean": 0.000988169087577262, "clip_ratio/low_min": 3.964641746279085e-05, "clip_ratio/region_mean": 0.0018840839838958345, "epoch": 5.858309037900875, "grad_norm": 0.13211916387081146, "learning_rate": 5e-07, "loss": 0.02, "step": 572 }, { "clip_ratio/high_max": 0.001945608397363685, "clip_ratio/high_mean": 0.0007810929964762181, "clip_ratio/low_mean": 0.0007782667544233846, "clip_ratio/low_min": 1.1991557585133705e-05, "clip_ratio/region_mean": 0.0015593597490806133, "epoch": 5.867638483965014, "grad_norm": 0.17761482298374176, "learning_rate": 5e-07, "loss": 0.0012, "step": 573 }, { "clip_ratio/high_max": 0.001812844377127476, "clip_ratio/high_mean": 0.0007903923942649271, "clip_ratio/low_mean": 0.0009243618114851415, "clip_ratio/low_min": 0.0001205757662319229, "clip_ratio/region_mean": 0.0017147541511803865, "epoch": 5.876967930029155, "grad_norm": 0.12732607126235962, "learning_rate": 5e-07, "loss": 0.0136, "step": 574 }, { "clip_ratio/high_max": 0.0018337654910283163, "clip_ratio/high_mean": 0.0008626730850664899, "clip_ratio/low_mean": 0.0008401763170695631, "clip_ratio/low_min": 7.54908596718451e-05, "clip_ratio/region_mean": 0.001702849411231, "epoch": 5.886297376093294, "grad_norm": 0.12095412611961365, "learning_rate": 5e-07, "loss": 0.0011, "step": 575 }, { "clip_ratio/high_max": 0.0017253766491194256, "clip_ratio/high_mean": 0.0007474166213796707, "clip_ratio/low_mean": 0.0009507301474513952, "clip_ratio/low_min": 7.89962750786799e-05, "clip_ratio/region_mean": 0.00169814675609814, "epoch": 5.895626822157435, "grad_norm": 0.12744082510471344, "learning_rate": 5e-07, "loss": 0.0253, "step": 576 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.021763392857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 4036.0, "completions/mean_length": 643.8881225585938, "completions/mean_terminated_length": 567.0869750976562, "completions/min_length": 3.0, "completions/min_terminated_length": 3.0, "epoch": 6.0093294460641395, "grad_norm": 0.1240546777844429, "learning_rate": 5e-07, "loss": 0.0209, "num_tokens": 357786717.0, "reward": 0.599190890789032, "reward_std": 0.18380965292453766, "rewards/simpleverify_reward/mean": 0.5991908311843872, "rewards/simpleverify_reward/std": 0.49007099866867065, "step": 577 }, { "clip_ratio/high_max": 0.0019288507064629812, "clip_ratio/high_mean": 0.0006993368278926937, "clip_ratio/low_mean": 0.0005906488813707256, "clip_ratio/low_min": 2.5207466023857705e-05, "clip_ratio/region_mean": 0.0012899857356387656, "epoch": 6.01865889212828, "grad_norm": 0.12858697772026062, "learning_rate": 5e-07, "loss": 0.0434, "step": 578 }, { "clip_ratio/high_max": 0.0020339748480182607, "clip_ratio/high_mean": 0.000830766606668476, "clip_ratio/low_mean": 0.0005601760576610104, "clip_ratio/low_min": 9.372720342071261e-05, "clip_ratio/region_mean": 0.001390942659782013, "epoch": 6.0279883381924195, "grad_norm": 0.11733090877532959, "learning_rate": 5e-07, "loss": -0.0002, "step": 579 }, { "clip_ratio/high_max": 0.0017265329806832597, "clip_ratio/high_mean": 0.00071573884360987, "clip_ratio/low_mean": 0.0006066355672373902, "clip_ratio/low_min": 6.977505108807236e-05, "clip_ratio/region_mean": 0.0013223744099377654, "epoch": 6.03731778425656, "grad_norm": 0.11561767756938934, "learning_rate": 5e-07, "loss": 0.0123, "step": 580 }, { "clip_ratio/high_max": 0.0018308641665498726, "clip_ratio/high_mean": 0.0007963118841871619, "clip_ratio/low_mean": 0.000653477713058237, "clip_ratio/low_min": 3.8838994441903196e-05, "clip_ratio/region_mean": 0.00144978959360742, "epoch": 6.0466472303206995, "grad_norm": 0.11772079765796661, "learning_rate": 5e-07, "loss": 0.0305, "step": 581 }, { "clip_ratio/high_max": 0.0020497745063039474, "clip_ratio/high_mean": 0.0009292997729062336, "clip_ratio/low_mean": 0.0006093227475503227, "clip_ratio/low_min": 2.5591111807443667e-05, "clip_ratio/region_mean": 0.0015386225168185774, "epoch": 6.05597667638484, "grad_norm": 0.13216020166873932, "learning_rate": 5e-07, "loss": 0.0108, "step": 582 }, { "clip_ratio/high_max": 0.002020219777477905, "clip_ratio/high_mean": 0.0008030597637116443, "clip_ratio/low_mean": 0.0005577401661867043, "clip_ratio/low_min": 4.927720146952197e-05, "clip_ratio/region_mean": 0.001360799931717338, "epoch": 6.0653061224489795, "grad_norm": 0.13742752373218536, "learning_rate": 5e-07, "loss": -0.0249, "step": 583 }, { "clip_ratio/high_max": 0.0020720607644761913, "clip_ratio/high_mean": 0.0008472062345390441, "clip_ratio/low_mean": 0.0006595534414373105, "clip_ratio/low_min": 1.0154346455237828e-05, "clip_ratio/region_mean": 0.0015067596832523122, "epoch": 6.07463556851312, "grad_norm": 0.14109601080417633, "learning_rate": 5e-07, "loss": 0.0152, "step": 584 }, { "clip_ratio/high_max": 0.0021033204029663466, "clip_ratio/high_mean": 0.0008688256784807891, "clip_ratio/low_mean": 0.0005672771021636436, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014361027606355492, "epoch": 6.0839650145772595, "grad_norm": 0.13856996595859528, "learning_rate": 5e-07, "loss": -0.0046, "step": 585 }, { "clip_ratio/high_max": 0.001962129106686916, "clip_ratio/high_mean": 0.0007715577230555937, "clip_ratio/low_mean": 0.0006947034780750982, "clip_ratio/low_min": 6.137995296739973e-05, "clip_ratio/region_mean": 0.0014662612084066495, "epoch": 6.093294460641399, "grad_norm": 0.12950178980827332, "learning_rate": 5e-07, "loss": 0.0065, "step": 586 }, { "clip_ratio/high_max": 0.0022203073676791973, "clip_ratio/high_mean": 0.0007632698252564296, "clip_ratio/low_mean": 0.0007270013156812638, "clip_ratio/low_min": 1.7179769201902673e-05, "clip_ratio/region_mean": 0.0014902711554896086, "epoch": 6.1026239067055394, "grad_norm": 0.133062481880188, "learning_rate": 5e-07, "loss": 0.055, "step": 587 }, { "clip_ratio/high_max": 0.002155910144210793, "clip_ratio/high_mean": 0.0008982488652691245, "clip_ratio/low_mean": 0.0007370712282863678, "clip_ratio/low_min": 2.718270661716815e-05, "clip_ratio/region_mean": 0.0016353200990124606, "epoch": 6.111953352769679, "grad_norm": 0.5321926474571228, "learning_rate": 5e-07, "loss": -0.0037, "step": 588 }, { "clip_ratio/high_max": 0.0021711986846639775, "clip_ratio/high_mean": 0.0008582033915445209, "clip_ratio/low_mean": 0.0006169808739286964, "clip_ratio/low_min": 2.3408239940181375e-05, "clip_ratio/region_mean": 0.0014751842827536166, "epoch": 6.121282798833819, "grad_norm": 0.12114585936069489, "learning_rate": 5e-07, "loss": -0.0331, "step": 589 }, { "clip_ratio/high_max": 0.0017421764678147156, "clip_ratio/high_mean": 0.0007882188638177468, "clip_ratio/low_mean": 0.0006427077405533055, "clip_ratio/low_min": 6.011578898323933e-05, "clip_ratio/region_mean": 0.0014309266043710522, "epoch": 6.130612244897959, "grad_norm": 0.13131296634674072, "learning_rate": 5e-07, "loss": 0.0019, "step": 590 }, { "clip_ratio/high_max": 0.002034262754023075, "clip_ratio/high_mean": 0.0008134372237691423, "clip_ratio/low_mean": 0.0007664469194423873, "clip_ratio/low_min": 3.238836052332772e-05, "clip_ratio/region_mean": 0.0015798841704963706, "epoch": 6.139941690962099, "grad_norm": 0.11746270954608917, "learning_rate": 5e-07, "loss": 0.0082, "step": 591 }, { "clip_ratio/high_max": 0.0022255271978792734, "clip_ratio/high_mean": 0.000920559457881609, "clip_ratio/low_mean": 0.0006800666342314798, "clip_ratio/low_min": 2.8747068427037448e-05, "clip_ratio/region_mean": 0.001600626070285216, "epoch": 6.149271137026239, "grad_norm": 0.12486302852630615, "learning_rate": 5e-07, "loss": -0.022, "step": 592 }, { "clip_ratio/high_max": 0.001798354962375015, "clip_ratio/high_mean": 0.0007254130214278121, "clip_ratio/low_mean": 0.0009105404133151751, "clip_ratio/low_min": 7.259883750521112e-05, "clip_ratio/region_mean": 0.0016359534420189448, "epoch": 6.158600583090379, "grad_norm": 0.12696833908557892, "learning_rate": 5e-07, "loss": 0.0258, "step": 593 }, { "clip_ratio/high_max": 0.0022116872132755816, "clip_ratio/high_mean": 0.0008981417704490013, "clip_ratio/low_mean": 0.0008961927869677311, "clip_ratio/low_min": 2.7062134904554114e-05, "clip_ratio/region_mean": 0.001794334573787637, "epoch": 6.167930029154519, "grad_norm": 0.14565309882164001, "learning_rate": 5e-07, "loss": 0.012, "step": 594 }, { "clip_ratio/high_max": 0.0018149144962080754, "clip_ratio/high_mean": 0.0007721517267782474, "clip_ratio/low_mean": 0.0008653445456729969, "clip_ratio/low_min": 2.9785544029437006e-05, "clip_ratio/region_mean": 0.0016374962542613503, "epoch": 6.1772594752186585, "grad_norm": 0.1348152607679367, "learning_rate": 5e-07, "loss": 0.0143, "step": 595 }, { "clip_ratio/high_max": 0.0021065445253043436, "clip_ratio/high_mean": 0.0007751333687338047, "clip_ratio/low_mean": 0.0007833029740140773, "clip_ratio/low_min": 4.946258559357375e-05, "clip_ratio/region_mean": 0.0015584363827656489, "epoch": 6.186588921282799, "grad_norm": 0.12767454981803894, "learning_rate": 5e-07, "loss": -0.0082, "step": 596 }, { "clip_ratio/high_max": 0.0019292397737444844, "clip_ratio/high_mean": 0.000763574562370195, "clip_ratio/low_mean": 0.0009623011919757118, "clip_ratio/low_min": 3.167842987750191e-05, "clip_ratio/region_mean": 0.0017258757870877162, "epoch": 6.1959183673469385, "grad_norm": 0.1246240958571434, "learning_rate": 5e-07, "loss": 0.0144, "step": 597 }, { "clip_ratio/high_max": 0.0023632031043234747, "clip_ratio/high_mean": 0.0009562783643559669, "clip_ratio/low_mean": 0.0007371502961177612, "clip_ratio/low_min": 1.803751729312353e-05, "clip_ratio/region_mean": 0.0016934286541072652, "epoch": 6.205247813411079, "grad_norm": 0.12627720832824707, "learning_rate": 5e-07, "loss": -0.0173, "step": 598 }, { "clip_ratio/high_max": 0.002241402311483398, "clip_ratio/high_mean": 0.0009644538567954442, "clip_ratio/low_mean": 0.0009206840823026141, "clip_ratio/low_min": 6.294649028859567e-05, "clip_ratio/region_mean": 0.0018851379281841218, "epoch": 6.214577259475218, "grad_norm": 0.13178157806396484, "learning_rate": 5e-07, "loss": -0.0191, "step": 599 }, { "clip_ratio/high_max": 0.0020922125768265687, "clip_ratio/high_mean": 0.0008822112358757295, "clip_ratio/low_mean": 0.00072404793354508, "clip_ratio/low_min": 3.192032818333246e-05, "clip_ratio/region_mean": 0.00160625916760182, "epoch": 6.223906705539359, "grad_norm": 0.13679754734039307, "learning_rate": 5e-07, "loss": 0.0059, "step": 600 }, { "clip_ratio/high_max": 0.0019580993066483643, "clip_ratio/high_mean": 0.0007753465106361546, "clip_ratio/low_mean": 0.0009113547457673121, "clip_ratio/low_min": 0.00012446032178559108, "clip_ratio/region_mean": 0.001686701267317403, "epoch": 6.233236151603498, "grad_norm": 0.13213829696178436, "learning_rate": 5e-07, "loss": 0.0125, "step": 601 }, { "clip_ratio/high_max": 0.0018463375490682665, "clip_ratio/high_mean": 0.0008813036929495865, "clip_ratio/low_mean": 0.0007943682594486745, "clip_ratio/low_min": 9.722930462885415e-05, "clip_ratio/region_mean": 0.0016756719560362399, "epoch": 6.242565597667639, "grad_norm": 0.12222902476787567, "learning_rate": 5e-07, "loss": -0.0241, "step": 602 }, { "clip_ratio/high_max": 0.002086294312903192, "clip_ratio/high_mean": 0.0008550438087695511, "clip_ratio/low_mean": 0.0008065389883995522, "clip_ratio/low_min": 1.7443482647649944e-05, "clip_ratio/region_mean": 0.0016615827989880927, "epoch": 6.251895043731778, "grad_norm": 0.1464071124792099, "learning_rate": 5e-07, "loss": -0.0227, "step": 603 }, { "clip_ratio/high_max": 0.0017502287737443112, "clip_ratio/high_mean": 0.0007493883240385912, "clip_ratio/low_mean": 0.0008640018522783066, "clip_ratio/low_min": 0.00011995881868642755, "clip_ratio/region_mean": 0.0016133901590364985, "epoch": 6.261224489795918, "grad_norm": 0.12265537679195404, "learning_rate": 5e-07, "loss": 0.041, "step": 604 }, { "clip_ratio/high_max": 0.0017414499088772573, "clip_ratio/high_mean": 0.0008416582713834941, "clip_ratio/low_mean": 0.0008619346062914701, "clip_ratio/low_min": 1.326963865722064e-05, "clip_ratio/region_mean": 0.0017035928394761868, "epoch": 6.270553935860058, "grad_norm": 0.12494516372680664, "learning_rate": 5e-07, "loss": -0.0358, "step": 605 }, { "clip_ratio/high_max": 0.0017651302805461455, "clip_ratio/high_mean": 0.0008082658405328402, "clip_ratio/low_mean": 0.0009009808854898438, "clip_ratio/low_min": 4.676976823247969e-05, "clip_ratio/region_mean": 0.001709246716927737, "epoch": 6.279883381924198, "grad_norm": 0.13563041388988495, "learning_rate": 5e-07, "loss": -0.0119, "step": 606 }, { "clip_ratio/high_max": 0.0018108887743437663, "clip_ratio/high_mean": 0.0007301503765120287, "clip_ratio/low_mean": 0.0008986912898762967, "clip_ratio/low_min": 9.890548972180113e-05, "clip_ratio/region_mean": 0.0016288416809402406, "epoch": 6.289212827988338, "grad_norm": 0.11872313171625137, "learning_rate": 5e-07, "loss": 0.0161, "step": 607 }, { "clip_ratio/high_max": 0.0019140814220008906, "clip_ratio/high_mean": 0.0007788016519043595, "clip_ratio/low_mean": 0.0008287906193800154, "clip_ratio/low_min": 4.036855170852505e-05, "clip_ratio/region_mean": 0.0016075922612799332, "epoch": 6.298542274052478, "grad_norm": 0.12449943274259567, "learning_rate": 5e-07, "loss": -0.0288, "step": 608 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.024693080357142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 4093.0, "completions/mean_length": 663.795654296875, "completions/mean_terminated_length": 576.8981323242188, "completions/min_length": 77.0, "completions/min_terminated_length": 77.0, "epoch": 6.307871720116618, "grad_norm": 0.12391631305217743, "learning_rate": 5e-07, "loss": -0.0242, "num_tokens": 376752169.0, "reward": 0.6075265407562256, "reward_std": 0.18983885645866394, "rewards/simpleverify_reward/mean": 0.6075264811515808, "rewards/simpleverify_reward/std": 0.48830971121788025, "step": 609 }, { "clip_ratio/high_max": 0.002164737175917253, "clip_ratio/high_mean": 0.000863262790517183, "clip_ratio/low_mean": 0.000550651018784265, "clip_ratio/low_min": 2.4418832254013978e-05, "clip_ratio/region_mean": 0.001413913800206501, "epoch": 6.317201166180758, "grad_norm": 0.12810933589935303, "learning_rate": 5e-07, "loss": -0.0329, "step": 610 }, { "clip_ratio/high_max": 0.002007543109357357, "clip_ratio/high_mean": 0.0008242405729106395, "clip_ratio/low_mean": 0.0004806091237696819, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013048496839473955, "epoch": 6.326530612244898, "grad_norm": 0.1360631138086319, "learning_rate": 5e-07, "loss": -0.0601, "step": 611 }, { "clip_ratio/high_max": 0.0023122205384424888, "clip_ratio/high_mean": 0.0009304102877649711, "clip_ratio/low_mean": 0.0004913843158647069, "clip_ratio/low_min": 2.724143269006163e-05, "clip_ratio/region_mean": 0.0014217945899872575, "epoch": 6.335860058309038, "grad_norm": 0.15040592849254608, "learning_rate": 5e-07, "loss": -0.0461, "step": 612 }, { "clip_ratio/high_max": 0.0019607354697654955, "clip_ratio/high_mean": 0.0007957539583003381, "clip_ratio/low_mean": 0.0006008436648698989, "clip_ratio/low_min": 1.5504838302149437e-05, "clip_ratio/region_mean": 0.0013965975813334808, "epoch": 6.345189504373177, "grad_norm": 0.13224826753139496, "learning_rate": 5e-07, "loss": 0.0106, "step": 613 }, { "clip_ratio/high_max": 0.0018643069779500365, "clip_ratio/high_mean": 0.0007679275186092127, "clip_ratio/low_mean": 0.0005390251953940606, "clip_ratio/low_min": 1.641281596675981e-05, "clip_ratio/region_mean": 0.0013069526830804534, "epoch": 6.354518950437318, "grad_norm": 0.1205330491065979, "learning_rate": 5e-07, "loss": -0.0154, "step": 614 }, { "clip_ratio/high_max": 0.0019402025936869904, "clip_ratio/high_mean": 0.00085068223233975, "clip_ratio/low_mean": 0.0006270395169849508, "clip_ratio/low_min": 2.8859128178737592e-05, "clip_ratio/region_mean": 0.0014777217511436902, "epoch": 6.363848396501457, "grad_norm": 0.12947741150856018, "learning_rate": 5e-07, "loss": 0.0189, "step": 615 }, { "clip_ratio/high_max": 0.001797153858206002, "clip_ratio/high_mean": 0.000728042890841607, "clip_ratio/low_mean": 0.0007132006448955508, "clip_ratio/low_min": 3.212681895092828e-05, "clip_ratio/region_mean": 0.0014412435339181684, "epoch": 6.373177842565598, "grad_norm": 0.1301521360874176, "learning_rate": 5e-07, "loss": 0.0393, "step": 616 }, { "clip_ratio/high_max": 0.0018344468990107998, "clip_ratio/high_mean": 0.0007630111649632454, "clip_ratio/low_mean": 0.0006203354732861044, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013833466364303604, "epoch": 6.382507288629737, "grad_norm": 0.13540413975715637, "learning_rate": 5e-07, "loss": 0.0235, "step": 617 }, { "clip_ratio/high_max": 0.0015159371250774711, "clip_ratio/high_mean": 0.0006734544676874066, "clip_ratio/low_mean": 0.000775581047491869, "clip_ratio/low_min": 0.00011374256064300425, "clip_ratio/region_mean": 0.001449035498808371, "epoch": 6.391836734693878, "grad_norm": 0.12207652628421783, "learning_rate": 5e-07, "loss": 0.0337, "step": 618 }, { "clip_ratio/high_max": 0.0019371653761481866, "clip_ratio/high_mean": 0.0008665852528793039, "clip_ratio/low_mean": 0.0006181513126648497, "clip_ratio/low_min": 1.2862728908658028e-05, "clip_ratio/region_mean": 0.0014847365455352701, "epoch": 6.401166180758017, "grad_norm": 0.13582007586956024, "learning_rate": 5e-07, "loss": 0.029, "step": 619 }, { "clip_ratio/high_max": 0.002047131485596765, "clip_ratio/high_mean": 0.000796926664406783, "clip_ratio/low_mean": 0.000660545324535633, "clip_ratio/low_min": 2.939133901236346e-05, "clip_ratio/region_mean": 0.0014574719898519106, "epoch": 6.410495626822158, "grad_norm": 0.14896313846111298, "learning_rate": 5e-07, "loss": 0.0445, "step": 620 }, { "clip_ratio/high_max": 0.001920396687637549, "clip_ratio/high_mean": 0.0007732257618044969, "clip_ratio/low_mean": 0.0005883845697098877, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013616103569802362, "epoch": 6.419825072886297, "grad_norm": 0.1246793195605278, "learning_rate": 5e-07, "loss": -0.008, "step": 621 }, { "clip_ratio/high_max": 0.0022427561998483725, "clip_ratio/high_mean": 0.0008872181206243113, "clip_ratio/low_mean": 0.000585610767302569, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014728289170307107, "epoch": 6.429154518950437, "grad_norm": 0.1255161315202713, "learning_rate": 5e-07, "loss": -0.0319, "step": 622 }, { "clip_ratio/high_max": 0.0021527102289837785, "clip_ratio/high_mean": 0.000786772565334104, "clip_ratio/low_mean": 0.0007851552418287611, "clip_ratio/low_min": 1.198236168420408e-05, "clip_ratio/region_mean": 0.0015719278017058969, "epoch": 6.438483965014577, "grad_norm": 0.12915243208408356, "learning_rate": 5e-07, "loss": 0.0123, "step": 623 }, { "clip_ratio/high_max": 0.0017485297976236325, "clip_ratio/high_mean": 0.0007437639505951665, "clip_ratio/low_mean": 0.0007700011628912762, "clip_ratio/low_min": 4.0074218304653186e-05, "clip_ratio/region_mean": 0.001513765164418146, "epoch": 6.447813411078717, "grad_norm": 0.11543770879507065, "learning_rate": 5e-07, "loss": 0.0181, "step": 624 }, { "clip_ratio/high_max": 0.0019031587980862241, "clip_ratio/high_mean": 0.0007592465881316457, "clip_ratio/low_mean": 0.00083565718182399, "clip_ratio/low_min": 0.00010934722922684159, "clip_ratio/region_mean": 0.0015949038279359229, "epoch": 6.457142857142857, "grad_norm": 0.12982146441936493, "learning_rate": 5e-07, "loss": -0.0101, "step": 625 }, { "clip_ratio/high_max": 0.0018080958252539858, "clip_ratio/high_mean": 0.0008148578781401739, "clip_ratio/low_mean": 0.0008722468264750205, "clip_ratio/low_min": 0.0001273871512239566, "clip_ratio/region_mean": 0.0016871047409949824, "epoch": 6.466472303206997, "grad_norm": 0.126524418592453, "learning_rate": 5e-07, "loss": 0.0336, "step": 626 }, { "clip_ratio/high_max": 0.0021108994806127157, "clip_ratio/high_mean": 0.0008345699443452759, "clip_ratio/low_mean": 0.000729350178517052, "clip_ratio/low_min": 4.3960635593975894e-05, "clip_ratio/region_mean": 0.0015639201301382855, "epoch": 6.475801749271137, "grad_norm": 0.12444368749856949, "learning_rate": 5e-07, "loss": -0.0317, "step": 627 }, { "clip_ratio/high_max": 0.0017748605387168936, "clip_ratio/high_mean": 0.0007705798598180991, "clip_ratio/low_mean": 0.0007752097553748172, "clip_ratio/low_min": 4.572304078465095e-05, "clip_ratio/region_mean": 0.0015457895788131282, "epoch": 6.485131195335277, "grad_norm": 0.1258181780576706, "learning_rate": 5e-07, "loss": 0.0021, "step": 628 }, { "clip_ratio/high_max": 0.0024410089245066047, "clip_ratio/high_mean": 0.000962962643825449, "clip_ratio/low_mean": 0.0007777102046020445, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.00174067284388002, "epoch": 6.494460641399417, "grad_norm": 0.13542871177196503, "learning_rate": 5e-07, "loss": -0.0068, "step": 629 }, { "clip_ratio/high_max": 0.002140998258255422, "clip_ratio/high_mean": 0.0008727475724299438, "clip_ratio/low_mean": 0.0009177964693662943, "clip_ratio/low_min": 7.353146884270245e-05, "clip_ratio/region_mean": 0.0017905440981849097, "epoch": 6.503790087463557, "grad_norm": 0.1396140456199646, "learning_rate": 5e-07, "loss": 0.0277, "step": 630 }, { "clip_ratio/high_max": 0.0017724180652294308, "clip_ratio/high_mean": 0.0008263159670605091, "clip_ratio/low_mean": 0.0009014896168082487, "clip_ratio/low_min": 0.0001285899006688851, "clip_ratio/region_mean": 0.0017278055747738108, "epoch": 6.513119533527696, "grad_norm": 0.12419480830430984, "learning_rate": 5e-07, "loss": 0.0265, "step": 631 }, { "clip_ratio/high_max": 0.0023796225286787376, "clip_ratio/high_mean": 0.0009804708170122467, "clip_ratio/low_mean": 0.0007715492265560897, "clip_ratio/low_min": 6.684051732008811e-05, "clip_ratio/region_mean": 0.001752020078129135, "epoch": 6.522448979591837, "grad_norm": 0.1238010972738266, "learning_rate": 5e-07, "loss": -0.0436, "step": 632 }, { "clip_ratio/high_max": 0.0020069348138349596, "clip_ratio/high_mean": 0.0007953845288284356, "clip_ratio/low_mean": 0.000868357166837086, "clip_ratio/low_min": 2.7187547857465688e-05, "clip_ratio/region_mean": 0.0016637416774756275, "epoch": 6.531778425655976, "grad_norm": 0.12352171540260315, "learning_rate": 5e-07, "loss": -0.0185, "step": 633 }, { "clip_ratio/high_max": 0.0019405056809773669, "clip_ratio/high_mean": 0.0008982107901829295, "clip_ratio/low_mean": 0.0008084897926892154, "clip_ratio/low_min": 3.3985614209086634e-05, "clip_ratio/region_mean": 0.0017067005828721449, "epoch": 6.541107871720117, "grad_norm": 0.13162550330162048, "learning_rate": 5e-07, "loss": -0.0165, "step": 634 }, { "clip_ratio/high_max": 0.0021864785812795162, "clip_ratio/high_mean": 0.0007891833515714097, "clip_ratio/low_mean": 0.0009232933552993927, "clip_ratio/low_min": 0.000131419968965929, "clip_ratio/region_mean": 0.0017124767182394862, "epoch": 6.550437317784256, "grad_norm": 0.1299324482679367, "learning_rate": 5e-07, "loss": 0.0318, "step": 635 }, { "clip_ratio/high_max": 0.0020354634179966524, "clip_ratio/high_mean": 0.0007695901695115026, "clip_ratio/low_mean": 0.0009213795037794625, "clip_ratio/low_min": 9.366568338009529e-05, "clip_ratio/region_mean": 0.0016909696787479334, "epoch": 6.559766763848397, "grad_norm": 0.1285596340894699, "learning_rate": 5e-07, "loss": 0.027, "step": 636 }, { "clip_ratio/high_max": 0.002234992411104031, "clip_ratio/high_mean": 0.0008878835942596197, "clip_ratio/low_mean": 0.0009892329180729575, "clip_ratio/low_min": 0.000127838580738171, "clip_ratio/region_mean": 0.0018771165268844925, "epoch": 6.569096209912536, "grad_norm": 0.12402414530515671, "learning_rate": 5e-07, "loss": 0.0147, "step": 637 }, { "clip_ratio/high_max": 0.002005862635996891, "clip_ratio/high_mean": 0.0008266402765002567, "clip_ratio/low_mean": 0.0008438839595328318, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001670524216024205, "epoch": 6.578425655976677, "grad_norm": 0.1273399293422699, "learning_rate": 5e-07, "loss": 0.0112, "step": 638 }, { "clip_ratio/high_max": 0.002205679840699304, "clip_ratio/high_mean": 0.0008493245259160176, "clip_ratio/low_mean": 0.00097341624496039, "clip_ratio/low_min": 6.372926873154938e-05, "clip_ratio/region_mean": 0.0018227408363600262, "epoch": 6.587755102040816, "grad_norm": 0.13730767369270325, "learning_rate": 5e-07, "loss": -0.005, "step": 639 }, { "clip_ratio/high_max": 0.0018926870725408662, "clip_ratio/high_mean": 0.0008652521773910848, "clip_ratio/low_mean": 0.0008401349205087172, "clip_ratio/low_min": 1.618332498765085e-05, "clip_ratio/region_mean": 0.0017053871306416113, "epoch": 6.597084548104956, "grad_norm": 0.13321101665496826, "learning_rate": 5e-07, "loss": -0.0151, "step": 640 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.026750837053571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 4089.0, "completions/mean_length": 666.4493408203125, "completions/mean_terminated_length": 572.1842651367188, "completions/min_length": 93.0, "completions/min_terminated_length": 93.0, "epoch": 6.606413994169096, "grad_norm": 0.12682510912418365, "learning_rate": 5e-07, "loss": 0.0329, "num_tokens": 395622819.0, "reward": 0.5947614908218384, "reward_std": 0.18294788897037506, "rewards/simpleverify_reward/mean": 0.5947614312171936, "rewards/simpleverify_reward/std": 0.4909466803073883, "step": 641 }, { "clip_ratio/high_max": 0.0017097203599405475, "clip_ratio/high_mean": 0.0007306676543521462, "clip_ratio/low_mean": 0.0005955111755611142, "clip_ratio/low_min": 3.781073883146746e-05, "clip_ratio/region_mean": 0.0013261788117233664, "epoch": 6.615743440233236, "grad_norm": 0.15034013986587524, "learning_rate": 5e-07, "loss": 0.0393, "step": 642 }, { "clip_ratio/high_max": 0.001865597820142284, "clip_ratio/high_mean": 0.0007539129092037911, "clip_ratio/low_mean": 0.0005488001115736552, "clip_ratio/low_min": 1.6819160009617917e-05, "clip_ratio/region_mean": 0.0013027130007685628, "epoch": 6.625072886297376, "grad_norm": 0.12359079718589783, "learning_rate": 5e-07, "loss": 0.0011, "step": 643 }, { "clip_ratio/high_max": 0.0017728767670632806, "clip_ratio/high_mean": 0.0007504938821512042, "clip_ratio/low_mean": 0.0004887302056886256, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012392240860208403, "epoch": 6.634402332361516, "grad_norm": 0.12299814820289612, "learning_rate": 5e-07, "loss": -0.0372, "step": 644 }, { "clip_ratio/high_max": 0.001864318925072439, "clip_ratio/high_mean": 0.000755319790187059, "clip_ratio/low_mean": 0.0005620813062705565, "clip_ratio/low_min": 9.993604180635884e-06, "clip_ratio/region_mean": 0.0013174011364753824, "epoch": 6.643731778425656, "grad_norm": 0.1252467781305313, "learning_rate": 5e-07, "loss": -0.0106, "step": 645 }, { "clip_ratio/high_max": 0.0018720185216807295, "clip_ratio/high_mean": 0.0007069225821396685, "clip_ratio/low_mean": 0.0005938241574767744, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013007466986891814, "epoch": 6.653061224489796, "grad_norm": 0.12251404672861099, "learning_rate": 5e-07, "loss": -0.0105, "step": 646 }, { "clip_ratio/high_max": 0.0017521338049846236, "clip_ratio/high_mean": 0.0007177228726504836, "clip_ratio/low_mean": 0.0005579804073931882, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001275703267310746, "epoch": 6.662390670553936, "grad_norm": 0.12762346863746643, "learning_rate": 5e-07, "loss": -0.0179, "step": 647 }, { "clip_ratio/high_max": 0.002127338455466088, "clip_ratio/high_mean": 0.0007988872730493313, "clip_ratio/low_mean": 0.0006362796448229346, "clip_ratio/low_min": 3.973394359491067e-05, "clip_ratio/region_mean": 0.0014351669342431705, "epoch": 6.671720116618076, "grad_norm": 0.13345035910606384, "learning_rate": 5e-07, "loss": -0.0401, "step": 648 }, { "clip_ratio/high_max": 0.0018436394675518386, "clip_ratio/high_mean": 0.0007415657728415681, "clip_ratio/low_mean": 0.0007150134642870398, "clip_ratio/low_min": 4.192221058474388e-05, "clip_ratio/region_mean": 0.001456579237128608, "epoch": 6.681049562682215, "grad_norm": 0.1288323551416397, "learning_rate": 5e-07, "loss": 0.0271, "step": 649 }, { "clip_ratio/high_max": 0.0019227436278015375, "clip_ratio/high_mean": 0.0008165699491655687, "clip_ratio/low_mean": 0.0006149926230136771, "clip_ratio/low_min": 3.63585850209347e-05, "clip_ratio/region_mean": 0.0014315625448944047, "epoch": 6.690379008746356, "grad_norm": 0.11926820129156113, "learning_rate": 5e-07, "loss": -0.0088, "step": 650 }, { "clip_ratio/high_max": 0.001647152384975925, "clip_ratio/high_mean": 0.0006641282361670164, "clip_ratio/low_mean": 0.000611556527474022, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001275684768188512, "epoch": 6.699708454810495, "grad_norm": 0.11461739242076874, "learning_rate": 5e-07, "loss": 0.0001, "step": 651 }, { "clip_ratio/high_max": 0.0021867606410523877, "clip_ratio/high_mean": 0.0008891593406588072, "clip_ratio/low_mean": 0.0006475283407780807, "clip_ratio/low_min": 4.2252599087078124e-05, "clip_ratio/region_mean": 0.0015366876759799197, "epoch": 6.709037900874636, "grad_norm": 0.13303445279598236, "learning_rate": 5e-07, "loss": -0.0248, "step": 652 }, { "clip_ratio/high_max": 0.00195182754760026, "clip_ratio/high_mean": 0.0007318146326724673, "clip_ratio/low_mean": 0.0008135922980727628, "clip_ratio/low_min": 3.9501233914052136e-05, "clip_ratio/region_mean": 0.0015454069107363466, "epoch": 6.718367346938775, "grad_norm": 0.13776570558547974, "learning_rate": 5e-07, "loss": 0.0051, "step": 653 }, { "clip_ratio/high_max": 0.0021534457555389963, "clip_ratio/high_mean": 0.000850376478410908, "clip_ratio/low_mean": 0.0007336505623243283, "clip_ratio/low_min": 8.350498410436558e-05, "clip_ratio/region_mean": 0.0015840270352782682, "epoch": 6.727696793002916, "grad_norm": 0.13401465117931366, "learning_rate": 5e-07, "loss": -0.0487, "step": 654 }, { "clip_ratio/high_max": 0.0017451550338591915, "clip_ratio/high_mean": 0.0007330738317250507, "clip_ratio/low_mean": 0.0007537239052908262, "clip_ratio/low_min": 5.039843472331995e-05, "clip_ratio/region_mean": 0.0014867977224639617, "epoch": 6.737026239067055, "grad_norm": 0.1262640506029129, "learning_rate": 5e-07, "loss": 0.0167, "step": 655 }, { "clip_ratio/high_max": 0.002069927955744788, "clip_ratio/high_mean": 0.0008802115062280791, "clip_ratio/low_mean": 0.0006403436200344004, "clip_ratio/low_min": 1.6979081919998862e-05, "clip_ratio/region_mean": 0.00152055512444349, "epoch": 6.746355685131196, "grad_norm": 0.1366174966096878, "learning_rate": 5e-07, "loss": -0.0065, "step": 656 }, { "clip_ratio/high_max": 0.002157247276045382, "clip_ratio/high_mean": 0.0008158000800904119, "clip_ratio/low_mean": 0.0007251312254084041, "clip_ratio/low_min": 3.80064138880698e-05, "clip_ratio/region_mean": 0.0015409312945848797, "epoch": 6.755685131195335, "grad_norm": 0.14236469566822052, "learning_rate": 5e-07, "loss": -0.0228, "step": 657 }, { "clip_ratio/high_max": 0.0020100157053093426, "clip_ratio/high_mean": 0.0008628317655166029, "clip_ratio/low_mean": 0.0008380287308682455, "clip_ratio/low_min": 1.9367833374417387e-05, "clip_ratio/region_mean": 0.0017008605136652477, "epoch": 6.765014577259475, "grad_norm": 0.14630308747291565, "learning_rate": 5e-07, "loss": -0.0117, "step": 658 }, { "clip_ratio/high_max": 0.0021461696924234275, "clip_ratio/high_mean": 0.0009063107663678238, "clip_ratio/low_mean": 0.000667810374579858, "clip_ratio/low_min": 1.565239108458627e-05, "clip_ratio/region_mean": 0.0015741211318527348, "epoch": 6.774344023323615, "grad_norm": 0.13020429015159607, "learning_rate": 5e-07, "loss": -0.0403, "step": 659 }, { "clip_ratio/high_max": 0.0019788531135418452, "clip_ratio/high_mean": 0.0008370651157747488, "clip_ratio/low_mean": 0.0007984128023963422, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0016354779072571546, "epoch": 6.783673469387755, "grad_norm": 0.11887270212173462, "learning_rate": 5e-07, "loss": -0.0193, "step": 660 }, { "clip_ratio/high_max": 0.0020594361776602454, "clip_ratio/high_mean": 0.0007827744511814672, "clip_ratio/low_mean": 0.0008419817368121585, "clip_ratio/low_min": 9.531699288345408e-05, "clip_ratio/region_mean": 0.0016247562089120038, "epoch": 6.793002915451895, "grad_norm": 0.13123011589050293, "learning_rate": 5e-07, "loss": 0.0307, "step": 661 }, { "clip_ratio/high_max": 0.0019191451247024816, "clip_ratio/high_mean": 0.0007889309054007754, "clip_ratio/low_mean": 0.0007412565755657852, "clip_ratio/low_min": 1.2831041203753557e-05, "clip_ratio/region_mean": 0.001530187473690603, "epoch": 6.802332361516035, "grad_norm": 0.11874429881572723, "learning_rate": 5e-07, "loss": 0.0088, "step": 662 }, { "clip_ratio/high_max": 0.0018722589047683869, "clip_ratio/high_mean": 0.0007456223647750448, "clip_ratio/low_mean": 0.0008120618149405345, "clip_ratio/low_min": 2.578797375463182e-05, "clip_ratio/region_mean": 0.001557684205181431, "epoch": 6.811661807580175, "grad_norm": 0.14248572289943695, "learning_rate": 5e-07, "loss": 0.027, "step": 663 }, { "clip_ratio/high_max": 0.0019879621777363354, "clip_ratio/high_mean": 0.0007899657966845552, "clip_ratio/low_mean": 0.0007048066972856759, "clip_ratio/low_min": 2.7144407795276493e-05, "clip_ratio/region_mean": 0.0014947724739613477, "epoch": 6.820991253644315, "grad_norm": 0.1346021443605423, "learning_rate": 5e-07, "loss": -0.0247, "step": 664 }, { "clip_ratio/high_max": 0.0017473559455538634, "clip_ratio/high_mean": 0.0006589427775907097, "clip_ratio/low_mean": 0.0008222144952014787, "clip_ratio/low_min": 2.0421067347342614e-05, "clip_ratio/region_mean": 0.0014811572582402732, "epoch": 6.830320699708455, "grad_norm": 0.12528717517852783, "learning_rate": 5e-07, "loss": 0.0093, "step": 665 }, { "clip_ratio/high_max": 0.0015211064419418108, "clip_ratio/high_mean": 0.000644093204755336, "clip_ratio/low_mean": 0.0008172985253622755, "clip_ratio/low_min": 4.0744464058661833e-05, "clip_ratio/region_mean": 0.00146139170465176, "epoch": 6.839650145772595, "grad_norm": 0.12892603874206543, "learning_rate": 5e-07, "loss": -0.0216, "step": 666 }, { "clip_ratio/high_max": 0.0018581941476440988, "clip_ratio/high_mean": 0.000747740879887715, "clip_ratio/low_mean": 0.000757420013542287, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015051608788780868, "epoch": 6.848979591836734, "grad_norm": 0.11524396389722824, "learning_rate": 5e-07, "loss": -0.0193, "step": 667 }, { "clip_ratio/high_max": 0.001999178966798354, "clip_ratio/high_mean": 0.0008045838385442039, "clip_ratio/low_mean": 0.0009233754772139946, "clip_ratio/low_min": 6.65328170725843e-05, "clip_ratio/region_mean": 0.0017279593303101137, "epoch": 6.858309037900875, "grad_norm": 0.11849892139434814, "learning_rate": 5e-07, "loss": -0.0026, "step": 668 }, { "clip_ratio/high_max": 0.002090760761348065, "clip_ratio/high_mean": 0.00083374572750472, "clip_ratio/low_mean": 0.0009831268289417494, "clip_ratio/low_min": 9.494435380474897e-05, "clip_ratio/region_mean": 0.0018168725800933316, "epoch": 6.867638483965014, "grad_norm": 0.13012216985225677, "learning_rate": 5e-07, "loss": 0.0303, "step": 669 }, { "clip_ratio/high_max": 0.0020459956758713815, "clip_ratio/high_mean": 0.0008998116372822551, "clip_ratio/low_mean": 0.0008943041975726373, "clip_ratio/low_min": 5.284286726237042e-05, "clip_ratio/region_mean": 0.0017941158002940938, "epoch": 6.876967930029155, "grad_norm": 0.12167589366436005, "learning_rate": 5e-07, "loss": 0.0365, "step": 670 }, { "clip_ratio/high_max": 0.0021419746590254363, "clip_ratio/high_mean": 0.0007948726906761294, "clip_ratio/low_mean": 0.0007363279200944817, "clip_ratio/low_min": 3.502054460113868e-05, "clip_ratio/region_mean": 0.0015312005816667806, "epoch": 6.886297376093294, "grad_norm": 0.12781858444213867, "learning_rate": 5e-07, "loss": 0.045, "step": 671 }, { "clip_ratio/high_max": 0.0017812872829381377, "clip_ratio/high_mean": 0.0007718562628724612, "clip_ratio/low_mean": 0.000878211327290046, "clip_ratio/low_min": 7.456514322257135e-05, "clip_ratio/region_mean": 0.0016500675701536238, "epoch": 6.895626822157435, "grad_norm": 0.123275525867939, "learning_rate": 5e-07, "loss": -0.0023, "step": 672 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0292271205357143, "completions/max_length": 4096.0, "completions/max_terminated_length": 4094.0, "completions/mean_length": 675.9100341796875, "completions/mean_terminated_length": 572.941162109375, "completions/min_length": 94.0, "completions/min_terminated_length": 94.0, "epoch": 7.0093294460641395, "grad_norm": 0.12534502148628235, "learning_rate": 5e-07, "loss": 0.0127, "num_tokens": 414427176.0, "reward": 0.605294406414032, "reward_std": 0.1856737583875656, "rewards/simpleverify_reward/mean": 0.6052943468093872, "rewards/simpleverify_reward/std": 0.4887958765029907, "step": 673 }, { "clip_ratio/high_max": 0.0018060463298752438, "clip_ratio/high_mean": 0.0007825705542927608, "clip_ratio/low_mean": 0.0006238487712835195, "clip_ratio/low_min": 2.2776968762627803e-05, "clip_ratio/region_mean": 0.0014064193310332485, "epoch": 7.01865889212828, "grad_norm": 0.14348982274532318, "learning_rate": 5e-07, "loss": -0.0236, "step": 674 }, { "clip_ratio/high_max": 0.0021049814531579614, "clip_ratio/high_mean": 0.0008666296780575067, "clip_ratio/low_mean": 0.0005583520478467108, "clip_ratio/low_min": 2.873527137126075e-05, "clip_ratio/region_mean": 0.0014249817249947228, "epoch": 7.0279883381924195, "grad_norm": 0.13903912901878357, "learning_rate": 5e-07, "loss": -0.038, "step": 675 }, { "clip_ratio/high_max": 0.0018520789089961909, "clip_ratio/high_mean": 0.0007423079041473102, "clip_ratio/low_mean": 0.0006279097297010594, "clip_ratio/low_min": 3.9743015804560855e-05, "clip_ratio/region_mean": 0.001370217611111002, "epoch": 7.03731778425656, "grad_norm": 0.14651916921138763, "learning_rate": 5e-07, "loss": 0.0178, "step": 676 }, { "clip_ratio/high_max": 0.0016853489032655489, "clip_ratio/high_mean": 0.0007432670099660754, "clip_ratio/low_mean": 0.0006708868495479692, "clip_ratio/low_min": 5.106454591441434e-05, "clip_ratio/region_mean": 0.001414153888617875, "epoch": 7.0466472303206995, "grad_norm": 0.358439177274704, "learning_rate": 5e-07, "loss": 0.0415, "step": 677 }, { "clip_ratio/high_max": 0.0019142288947477937, "clip_ratio/high_mean": 0.0007842663944757078, "clip_ratio/low_mean": 0.0006669486119790236, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014512149973597843, "epoch": 7.05597667638484, "grad_norm": 0.14371873438358307, "learning_rate": 5e-07, "loss": -0.0084, "step": 678 }, { "clip_ratio/high_max": 0.001933225565153407, "clip_ratio/high_mean": 0.0007559258447145112, "clip_ratio/low_mean": 0.0006108708257670514, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013667966704815626, "epoch": 7.0653061224489795, "grad_norm": 0.1289217621088028, "learning_rate": 5e-07, "loss": 0.0095, "step": 679 }, { "clip_ratio/high_max": 0.002080955142446328, "clip_ratio/high_mean": 0.0008451556350337341, "clip_ratio/low_mean": 0.0005567891766986577, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014019448026374448, "epoch": 7.07463556851312, "grad_norm": 0.141281396150589, "learning_rate": 5e-07, "loss": -0.0412, "step": 680 }, { "clip_ratio/high_max": 0.002025420224526897, "clip_ratio/high_mean": 0.0008458852989861043, "clip_ratio/low_mean": 0.0006733232139595202, "clip_ratio/low_min": 4.3940753130300436e-05, "clip_ratio/region_mean": 0.001519208544777939, "epoch": 7.0839650145772595, "grad_norm": 0.13135038316249847, "learning_rate": 5e-07, "loss": -0.0097, "step": 681 }, { "clip_ratio/high_max": 0.0020815290372411255, "clip_ratio/high_mean": 0.0008961236453615129, "clip_ratio/low_mean": 0.0006618110091949347, "clip_ratio/low_min": 2.8203970941831358e-05, "clip_ratio/region_mean": 0.00155793464728049, "epoch": 7.093294460641399, "grad_norm": 0.13015587627887726, "learning_rate": 5e-07, "loss": -0.0116, "step": 682 }, { "clip_ratio/high_max": 0.0018461136205587536, "clip_ratio/high_mean": 0.0007707527529419167, "clip_ratio/low_mean": 0.0007036987262836192, "clip_ratio/low_min": 1.3224714166426565e-05, "clip_ratio/region_mean": 0.0014744514810445253, "epoch": 7.1026239067055394, "grad_norm": 0.12428536266088486, "learning_rate": 5e-07, "loss": 0.0369, "step": 683 }, { "clip_ratio/high_max": 0.002223650662926957, "clip_ratio/high_mean": 0.0009120087852352299, "clip_ratio/low_mean": 0.0006940751136426115, "clip_ratio/low_min": 3.202172229066491e-05, "clip_ratio/region_mean": 0.0016060839006968308, "epoch": 7.111953352769679, "grad_norm": 0.13048934936523438, "learning_rate": 5e-07, "loss": -0.0003, "step": 684 }, { "clip_ratio/high_max": 0.0022204177221283317, "clip_ratio/high_mean": 0.0008366691054106923, "clip_ratio/low_mean": 0.0007659495240659453, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001602618649485521, "epoch": 7.121282798833819, "grad_norm": 0.12661120295524597, "learning_rate": 5e-07, "loss": 0.0001, "step": 685 }, { "clip_ratio/high_max": 0.0021382894919952378, "clip_ratio/high_mean": 0.0008470087541354587, "clip_ratio/low_mean": 0.0006969543819650426, "clip_ratio/low_min": 1.2007684745185543e-05, "clip_ratio/region_mean": 0.0015439631315530278, "epoch": 7.130612244897959, "grad_norm": 0.12664374709129333, "learning_rate": 5e-07, "loss": 0.0256, "step": 686 }, { "clip_ratio/high_max": 0.002017913884628797, "clip_ratio/high_mean": 0.0008802167267276673, "clip_ratio/low_mean": 0.0006056786951376125, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014858954345982056, "epoch": 7.139941690962099, "grad_norm": 0.1273665577173233, "learning_rate": 5e-07, "loss": -0.0277, "step": 687 }, { "clip_ratio/high_max": 0.0022665519136353396, "clip_ratio/high_mean": 0.0008974677475634962, "clip_ratio/low_mean": 0.0006760666374248103, "clip_ratio/low_min": 2.973358641611412e-05, "clip_ratio/region_mean": 0.0015735343840788119, "epoch": 7.149271137026239, "grad_norm": 0.1310872584581375, "learning_rate": 5e-07, "loss": -0.0284, "step": 688 }, { "clip_ratio/high_max": 0.0019792217135545798, "clip_ratio/high_mean": 0.0008504235993314069, "clip_ratio/low_mean": 0.0007706450542173116, "clip_ratio/low_min": 4.9541167754796334e-05, "clip_ratio/region_mean": 0.0016210686444537714, "epoch": 7.158600583090379, "grad_norm": 0.13514217734336853, "learning_rate": 5e-07, "loss": 0.0091, "step": 689 }, { "clip_ratio/high_max": 0.0016976806728052907, "clip_ratio/high_mean": 0.0007497852693632012, "clip_ratio/low_mean": 0.000737709794066177, "clip_ratio/low_min": 4.1293046706414316e-05, "clip_ratio/region_mean": 0.001487495068431599, "epoch": 7.167930029154519, "grad_norm": 0.13161665201187134, "learning_rate": 5e-07, "loss": 0.0296, "step": 690 }, { "clip_ratio/high_max": 0.001969821019883966, "clip_ratio/high_mean": 0.0008262926785391755, "clip_ratio/low_mean": 0.0006930870931682875, "clip_ratio/low_min": 3.052799547731411e-05, "clip_ratio/region_mean": 0.0015193797662504949, "epoch": 7.1772594752186585, "grad_norm": 0.10706352442502975, "learning_rate": 5e-07, "loss": -0.0202, "step": 691 }, { "clip_ratio/high_max": 0.00186648074304685, "clip_ratio/high_mean": 0.0008621454362582881, "clip_ratio/low_mean": 0.000993132111034356, "clip_ratio/low_min": 7.738777003396535e-05, "clip_ratio/region_mean": 0.0018552775363787077, "epoch": 7.186588921282799, "grad_norm": 0.14446547627449036, "learning_rate": 5e-07, "loss": 0.0166, "step": 692 }, { "clip_ratio/high_max": 0.0021017963517806493, "clip_ratio/high_mean": 0.000905728815268958, "clip_ratio/low_mean": 0.0008935148744058097, "clip_ratio/low_min": 0.000131122583297838, "clip_ratio/region_mean": 0.0017992436987697147, "epoch": 7.1959183673469385, "grad_norm": 0.13494423031806946, "learning_rate": 5e-07, "loss": -0.0205, "step": 693 }, { "clip_ratio/high_max": 0.002161045827961061, "clip_ratio/high_mean": 0.0008258578018285334, "clip_ratio/low_mean": 0.0007012951227807207, "clip_ratio/low_min": 2.694201157282805e-05, "clip_ratio/region_mean": 0.0015271529337042011, "epoch": 7.205247813411079, "grad_norm": 0.12062602490186691, "learning_rate": 5e-07, "loss": -0.0043, "step": 694 }, { "clip_ratio/high_max": 0.002120134828146547, "clip_ratio/high_mean": 0.0008719401466805721, "clip_ratio/low_mean": 0.0009608884756744374, "clip_ratio/low_min": 7.327575804083608e-05, "clip_ratio/region_mean": 0.0018328286241739988, "epoch": 7.214577259475218, "grad_norm": 0.1359880268573761, "learning_rate": 5e-07, "loss": -0.0098, "step": 695 }, { "clip_ratio/high_max": 0.00207853003666969, "clip_ratio/high_mean": 0.0008603526384831639, "clip_ratio/low_mean": 0.000869955054440652, "clip_ratio/low_min": 3.305021527921781e-05, "clip_ratio/region_mean": 0.0017303077074757311, "epoch": 7.223906705539359, "grad_norm": 0.13957162201404572, "learning_rate": 5e-07, "loss": -0.0244, "step": 696 }, { "clip_ratio/high_max": 0.002146478080248926, "clip_ratio/high_mean": 0.0008471526525681838, "clip_ratio/low_mean": 0.0008939697709138272, "clip_ratio/low_min": 6.028671487001702e-05, "clip_ratio/region_mean": 0.0017411224398529157, "epoch": 7.233236151603498, "grad_norm": 0.13139434158802032, "learning_rate": 5e-07, "loss": 0.0241, "step": 697 }, { "clip_ratio/high_max": 0.0022057738606235944, "clip_ratio/high_mean": 0.0008729064411454601, "clip_ratio/low_mean": 0.0007799801660439698, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0016528865889995359, "epoch": 7.242565597667639, "grad_norm": 0.1554511934518814, "learning_rate": 5e-07, "loss": -0.0172, "step": 698 }, { "clip_ratio/high_max": 0.0018054961910820566, "clip_ratio/high_mean": 0.0007166699615481775, "clip_ratio/low_mean": 0.000795950250903843, "clip_ratio/low_min": 1.559381234983448e-05, "clip_ratio/region_mean": 0.0015126201979001053, "epoch": 7.251895043731778, "grad_norm": 0.1261553019285202, "learning_rate": 5e-07, "loss": 0.004, "step": 699 }, { "clip_ratio/high_max": 0.0019120169308735058, "clip_ratio/high_mean": 0.0008420069179919665, "clip_ratio/low_mean": 0.0008093960932455957, "clip_ratio/low_min": 1.056993096426595e-05, "clip_ratio/region_mean": 0.0016514029957761522, "epoch": 7.261224489795918, "grad_norm": 0.14786681532859802, "learning_rate": 5e-07, "loss": 0.0211, "step": 700 }, { "clip_ratio/high_max": 0.0018282951132277958, "clip_ratio/high_mean": 0.0008071758820733521, "clip_ratio/low_mean": 0.0008855878204485634, "clip_ratio/low_min": 4.837744108954212e-05, "clip_ratio/region_mean": 0.0016927636825130321, "epoch": 7.270553935860058, "grad_norm": 0.12272375822067261, "learning_rate": 5e-07, "loss": -0.0143, "step": 701 }, { "clip_ratio/high_max": 0.0016603879448666703, "clip_ratio/high_mean": 0.0007173052727011964, "clip_ratio/low_mean": 0.0009723803977976786, "clip_ratio/low_min": 0.00010682493848435115, "clip_ratio/region_mean": 0.001689685661403928, "epoch": 7.279883381924198, "grad_norm": 0.1420699954032898, "learning_rate": 5e-07, "loss": 0.0099, "step": 702 }, { "clip_ratio/high_max": 0.001925295167893637, "clip_ratio/high_mean": 0.0008351935848622816, "clip_ratio/low_mean": 0.0006899478375999024, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015251414151862264, "epoch": 7.289212827988338, "grad_norm": 0.12215544283390045, "learning_rate": 5e-07, "loss": -0.032, "step": 703 }, { "clip_ratio/high_max": 0.0018904389362432994, "clip_ratio/high_mean": 0.0008024109883990604, "clip_ratio/low_mean": 0.0009188726271531777, "clip_ratio/low_min": 7.62834815759561e-05, "clip_ratio/region_mean": 0.0017212836391991004, "epoch": 7.298542274052478, "grad_norm": 0.12384752929210663, "learning_rate": 5e-07, "loss": 0.0058, "step": 704 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.027692522321428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 4095.0, "completions/mean_length": 675.99609375, "completions/mean_terminated_length": 578.5901489257812, "completions/min_length": 5.0, "completions/min_terminated_length": 5.0, "epoch": 7.307871720116618, "grad_norm": 0.14446282386779785, "learning_rate": 5e-07, "loss": -0.0016, "num_tokens": 433399632.0, "reward": 0.6047014594078064, "reward_std": 0.18499179184436798, "rewards/simpleverify_reward/mean": 0.6047014594078064, "rewards/simpleverify_reward/std": 0.4889232814311981, "step": 705 }, { "clip_ratio/high_max": 0.0019454658176982775, "clip_ratio/high_mean": 0.0007549388392362744, "clip_ratio/low_mean": 0.0005456961334857624, "clip_ratio/low_min": 3.1377716368297115e-05, "clip_ratio/region_mean": 0.0013006349654460791, "epoch": 7.317201166180758, "grad_norm": 0.12797780334949493, "learning_rate": 5e-07, "loss": -0.0158, "step": 706 }, { "clip_ratio/high_max": 0.0020118865759286564, "clip_ratio/high_mean": 0.0007591765843244502, "clip_ratio/low_mean": 0.0005664901655109134, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013256667225505225, "epoch": 7.326530612244898, "grad_norm": 0.13046789169311523, "learning_rate": 5e-07, "loss": 0.0107, "step": 707 }, { "clip_ratio/high_max": 0.0018233523878734559, "clip_ratio/high_mean": 0.0006695105003018398, "clip_ratio/low_mean": 0.0006337708855426172, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013032813476456795, "epoch": 7.335860058309038, "grad_norm": 0.13999217748641968, "learning_rate": 5e-07, "loss": 0.0484, "step": 708 }, { "clip_ratio/high_max": 0.002224668649432715, "clip_ratio/high_mean": 0.0008859037352522137, "clip_ratio/low_mean": 0.0004783982903973083, "clip_ratio/low_min": 1.1287700544926338e-05, "clip_ratio/region_mean": 0.001364302010188112, "epoch": 7.345189504373177, "grad_norm": 0.14023464918136597, "learning_rate": 5e-07, "loss": -0.0424, "step": 709 }, { "clip_ratio/high_max": 0.0019594880068325438, "clip_ratio/high_mean": 0.0007790784547978546, "clip_ratio/low_mean": 0.0006434627885028021, "clip_ratio/low_min": 7.534349970228504e-05, "clip_ratio/region_mean": 0.0014225412232917733, "epoch": 7.354518950437318, "grad_norm": 0.1435527503490448, "learning_rate": 5e-07, "loss": 0.0318, "step": 710 }, { "clip_ratio/high_max": 0.0018542874677223153, "clip_ratio/high_mean": 0.0006604698501178063, "clip_ratio/low_mean": 0.0005571776600845624, "clip_ratio/low_min": 3.2108968298416585e-05, "clip_ratio/region_mean": 0.0012176474992884323, "epoch": 7.363848396501457, "grad_norm": 0.14496690034866333, "learning_rate": 5e-07, "loss": 0.0263, "step": 711 }, { "clip_ratio/high_max": 0.0017640680089243688, "clip_ratio/high_mean": 0.0007880048833612818, "clip_ratio/low_mean": 0.0005195495286898222, "clip_ratio/low_min": 4.242579689162085e-05, "clip_ratio/region_mean": 0.0013075544193270616, "epoch": 7.373177842565598, "grad_norm": 0.1222783550620079, "learning_rate": 5e-07, "loss": -0.0504, "step": 712 }, { "clip_ratio/high_max": 0.0017686240826151334, "clip_ratio/high_mean": 0.000745764399653126, "clip_ratio/low_mean": 0.0006078406513552181, "clip_ratio/low_min": 1.2395874364301562e-05, "clip_ratio/region_mean": 0.0013536050428228918, "epoch": 7.382507288629737, "grad_norm": 0.13105787336826324, "learning_rate": 5e-07, "loss": 0.021, "step": 713 }, { "clip_ratio/high_max": 0.001861372929852223, "clip_ratio/high_mean": 0.0007954978536872659, "clip_ratio/low_mean": 0.0006796167272113962, "clip_ratio/low_min": 4.459998854144942e-05, "clip_ratio/region_mean": 0.001475114571803715, "epoch": 7.391836734693878, "grad_norm": 0.1105453222990036, "learning_rate": 5e-07, "loss": 0.0107, "step": 714 }, { "clip_ratio/high_max": 0.0019048139402002562, "clip_ratio/high_mean": 0.0007538733516412321, "clip_ratio/low_mean": 0.0006360745264828438, "clip_ratio/low_min": 1.0968760761898011e-05, "clip_ratio/region_mean": 0.001389947905408917, "epoch": 7.401166180758017, "grad_norm": 0.1273125261068344, "learning_rate": 5e-07, "loss": 0.0097, "step": 715 }, { "clip_ratio/high_max": 0.0019757725203817245, "clip_ratio/high_mean": 0.0008484673362545436, "clip_ratio/low_mean": 0.0005936947072768817, "clip_ratio/low_min": 1.4852661479380913e-05, "clip_ratio/region_mean": 0.001442162047169404, "epoch": 7.410495626822158, "grad_norm": 0.1268361359834671, "learning_rate": 5e-07, "loss": -0.0373, "step": 716 }, { "clip_ratio/high_max": 0.0019480034898151644, "clip_ratio/high_mean": 0.0007283143950189697, "clip_ratio/low_mean": 0.0006360183006108855, "clip_ratio/low_min": 1.0713061783462763e-05, "clip_ratio/region_mean": 0.0013643326674355194, "epoch": 7.419825072886297, "grad_norm": 0.12334601581096649, "learning_rate": 5e-07, "loss": -0.0184, "step": 717 }, { "clip_ratio/high_max": 0.0021732188906753436, "clip_ratio/high_mean": 0.0008191020588128595, "clip_ratio/low_mean": 0.0006967223871470196, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001515824449597858, "epoch": 7.429154518950437, "grad_norm": 0.13267731666564941, "learning_rate": 5e-07, "loss": -0.0058, "step": 718 }, { "clip_ratio/high_max": 0.0016788371285656467, "clip_ratio/high_mean": 0.0008359386756637832, "clip_ratio/low_mean": 0.0006471426659118151, "clip_ratio/low_min": 2.471449533913983e-05, "clip_ratio/region_mean": 0.00148308136704145, "epoch": 7.438483965014577, "grad_norm": 0.12179408967494965, "learning_rate": 5e-07, "loss": 0.0132, "step": 719 }, { "clip_ratio/high_max": 0.0018742068496067077, "clip_ratio/high_mean": 0.0007756817667541327, "clip_ratio/low_mean": 0.0007622609737154562, "clip_ratio/low_min": 1.8805476429406554e-05, "clip_ratio/region_mean": 0.0015379427350126207, "epoch": 7.447813411078717, "grad_norm": 0.1389371007680893, "learning_rate": 5e-07, "loss": 0.0153, "step": 720 }, { "clip_ratio/high_max": 0.0021130163950147107, "clip_ratio/high_mean": 0.0008692275223438628, "clip_ratio/low_mean": 0.0007846824501029914, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001653910007007653, "epoch": 7.457142857142857, "grad_norm": 0.13422991335391998, "learning_rate": 5e-07, "loss": -0.0089, "step": 721 }, { "clip_ratio/high_max": 0.002003047695325222, "clip_ratio/high_mean": 0.0008043483776418725, "clip_ratio/low_mean": 0.0008470849970763084, "clip_ratio/low_min": 0.00010062601268145954, "clip_ratio/region_mean": 0.001651433398365043, "epoch": 7.466472303206997, "grad_norm": 0.13871368765830994, "learning_rate": 5e-07, "loss": 0.0037, "step": 722 }, { "clip_ratio/high_max": 0.0021919717801210936, "clip_ratio/high_mean": 0.0008454814978904324, "clip_ratio/low_mean": 0.0008017318396014161, "clip_ratio/low_min": 1.3249947187432554e-05, "clip_ratio/region_mean": 0.0016472132774651982, "epoch": 7.475801749271137, "grad_norm": 0.12449768930673599, "learning_rate": 5e-07, "loss": 0.0176, "step": 723 }, { "clip_ratio/high_max": 0.001863447621872183, "clip_ratio/high_mean": 0.0008843653176882071, "clip_ratio/low_mean": 0.0007725716423010454, "clip_ratio/low_min": 3.674849358503707e-05, "clip_ratio/region_mean": 0.001656936961808242, "epoch": 7.485131195335277, "grad_norm": 0.12994295358657837, "learning_rate": 5e-07, "loss": -0.0373, "step": 724 }, { "clip_ratio/high_max": 0.0021050807954452466, "clip_ratio/high_mean": 0.0008482579905830789, "clip_ratio/low_mean": 0.0009240567014785483, "clip_ratio/low_min": 0.00012506757320807083, "clip_ratio/region_mean": 0.0017723147248034365, "epoch": 7.494460641399417, "grad_norm": 0.12953343987464905, "learning_rate": 5e-07, "loss": -0.0181, "step": 725 }, { "clip_ratio/high_max": 0.0017944640185305616, "clip_ratio/high_mean": 0.0007998018700163811, "clip_ratio/low_mean": 0.000762979376304429, "clip_ratio/low_min": 2.0345052689663135e-05, "clip_ratio/region_mean": 0.0015627812463208102, "epoch": 7.503790087463557, "grad_norm": 0.12040599435567856, "learning_rate": 5e-07, "loss": -0.0185, "step": 726 }, { "clip_ratio/high_max": 0.0020859700089204125, "clip_ratio/high_mean": 0.0008585448522353545, "clip_ratio/low_mean": 0.0009634615380491596, "clip_ratio/low_min": 5.7259550885646604e-05, "clip_ratio/region_mean": 0.0018220063975604717, "epoch": 7.513119533527696, "grad_norm": 0.13498982787132263, "learning_rate": 5e-07, "loss": 0.0023, "step": 727 }, { "clip_ratio/high_max": 0.0018648655168362893, "clip_ratio/high_mean": 0.0007935203175293282, "clip_ratio/low_mean": 0.000890058729055454, "clip_ratio/low_min": 6.995448711677454e-05, "clip_ratio/region_mean": 0.0016835790374898352, "epoch": 7.522448979591837, "grad_norm": 0.17624223232269287, "learning_rate": 5e-07, "loss": 0.0076, "step": 728 }, { "clip_ratio/high_max": 0.0018461643885530066, "clip_ratio/high_mean": 0.0007683982366870623, "clip_ratio/low_mean": 0.0007140281149986549, "clip_ratio/low_min": 1.468170103180455e-05, "clip_ratio/region_mean": 0.0014824263562331907, "epoch": 7.531778425655976, "grad_norm": 0.12624236941337585, "learning_rate": 5e-07, "loss": 0.0069, "step": 729 }, { "clip_ratio/high_max": 0.0020535213261609897, "clip_ratio/high_mean": 0.0007838863293727627, "clip_ratio/low_mean": 0.0007990113754203776, "clip_ratio/low_min": 5.619240255327895e-05, "clip_ratio/region_mean": 0.001582897690241225, "epoch": 7.541107871720117, "grad_norm": 0.12022233009338379, "learning_rate": 5e-07, "loss": -0.019, "step": 730 }, { "clip_ratio/high_max": 0.0021773394910269417, "clip_ratio/high_mean": 0.0008901856144802878, "clip_ratio/low_mean": 0.0007737879404885462, "clip_ratio/low_min": 1.842027631937526e-05, "clip_ratio/region_mean": 0.001663973554968834, "epoch": 7.550437317784256, "grad_norm": 0.12152099609375, "learning_rate": 5e-07, "loss": -0.0229, "step": 731 }, { "clip_ratio/high_max": 0.0021601863772957586, "clip_ratio/high_mean": 0.0008570047993998742, "clip_ratio/low_mean": 0.0008865304225764703, "clip_ratio/low_min": 3.9732972254569177e-05, "clip_ratio/region_mean": 0.0017435352274333127, "epoch": 7.559766763848397, "grad_norm": 0.1368025690317154, "learning_rate": 5e-07, "loss": 0.0181, "step": 732 }, { "clip_ratio/high_max": 0.0019977843112428673, "clip_ratio/high_mean": 0.0008270120470115216, "clip_ratio/low_mean": 0.0007251530078065116, "clip_ratio/low_min": 3.926278259314131e-05, "clip_ratio/region_mean": 0.0015521650420851074, "epoch": 7.569096209912536, "grad_norm": 0.13405822217464447, "learning_rate": 5e-07, "loss": -0.0406, "step": 733 }, { "clip_ratio/high_max": 0.0015988540144462604, "clip_ratio/high_mean": 0.0007071381951391231, "clip_ratio/low_mean": 0.0009434996281925123, "clip_ratio/low_min": 8.474888090859167e-05, "clip_ratio/region_mean": 0.001650637794227805, "epoch": 7.578425655976677, "grad_norm": 0.13117656111717224, "learning_rate": 5e-07, "loss": 0.0268, "step": 734 }, { "clip_ratio/high_max": 0.0019325747634866275, "clip_ratio/high_mean": 0.0007927963433758123, "clip_ratio/low_mean": 0.000843464928038884, "clip_ratio/low_min": 0.00011496978459035745, "clip_ratio/region_mean": 0.0016362612732336856, "epoch": 7.587755102040816, "grad_norm": 0.13740456104278564, "learning_rate": 5e-07, "loss": 0.0272, "step": 735 }, { "clip_ratio/high_max": 0.0019856835424434394, "clip_ratio/high_mean": 0.0008542969135305611, "clip_ratio/low_mean": 0.000708143827978347, "clip_ratio/low_min": 3.2998945243889466e-05, "clip_ratio/region_mean": 0.001562440738780424, "epoch": 7.597084548104956, "grad_norm": 0.1249769777059555, "learning_rate": 5e-07, "loss": -0.0375, "step": 736 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0271693638392857, "completions/max_length": 4096.0, "completions/max_terminated_length": 4026.0, "completions/mean_length": 662.48779296875, "completions/mean_terminated_length": 566.5961303710938, "completions/min_length": 5.0, "completions/min_terminated_length": 5.0, "epoch": 7.606413994169096, "grad_norm": 0.13599242269992828, "learning_rate": 5e-07, "loss": 0.0319, "num_tokens": 452082378.0, "reward": 0.6130022406578064, "reward_std": 0.1714005321264267, "rewards/simpleverify_reward/mean": 0.6130022406578064, "rewards/simpleverify_reward/std": 0.48707160353660583, "step": 737 }, { "clip_ratio/high_max": 0.0018266130009578774, "clip_ratio/high_mean": 0.0006380230815921095, "clip_ratio/low_mean": 0.0005552276325033745, "clip_ratio/low_min": 1.252254060091218e-05, "clip_ratio/region_mean": 0.001193250707729021, "epoch": 7.615743440233236, "grad_norm": 0.12986230850219727, "learning_rate": 5e-07, "loss": 0.0426, "step": 738 }, { "clip_ratio/high_max": 0.001885077785118483, "clip_ratio/high_mean": 0.0006413157479983056, "clip_ratio/low_mean": 0.0004694556491813273, "clip_ratio/low_min": 1.7720441974233836e-05, "clip_ratio/region_mean": 0.0011107714053650852, "epoch": 7.625072886297376, "grad_norm": 0.12614381313323975, "learning_rate": 5e-07, "loss": 0.0002, "step": 739 }, { "clip_ratio/high_max": 0.001657494627579581, "clip_ratio/high_mean": 0.0007215668656499474, "clip_ratio/low_mean": 0.0005095221540614148, "clip_ratio/low_min": 1.699293170531746e-05, "clip_ratio/region_mean": 0.0012310890342632774, "epoch": 7.634402332361516, "grad_norm": 0.14001907408237457, "learning_rate": 5e-07, "loss": -0.012, "step": 740 }, { "clip_ratio/high_max": 0.0017627689740038477, "clip_ratio/high_mean": 0.0006685961579933064, "clip_ratio/low_mean": 0.0005919535560678924, "clip_ratio/low_min": 1.097261247196002e-05, "clip_ratio/region_mean": 0.0012605497213371564, "epoch": 7.643731778425656, "grad_norm": 0.12695817649364471, "learning_rate": 5e-07, "loss": 0.022, "step": 741 }, { "clip_ratio/high_max": 0.0018067084638460074, "clip_ratio/high_mean": 0.0007294486531463917, "clip_ratio/low_mean": 0.0006259334113565274, "clip_ratio/low_min": 2.407336978649255e-05, "clip_ratio/region_mean": 0.0013553820754168555, "epoch": 7.653061224489796, "grad_norm": 0.12787620723247528, "learning_rate": 5e-07, "loss": -0.008, "step": 742 }, { "clip_ratio/high_max": 0.001757364527293248, "clip_ratio/high_mean": 0.0007101648570824182, "clip_ratio/low_mean": 0.0005680982112608035, "clip_ratio/low_min": 3.826420470431913e-05, "clip_ratio/region_mean": 0.0012782630983565468, "epoch": 7.662390670553936, "grad_norm": 0.11964112520217896, "learning_rate": 5e-07, "loss": 0.0184, "step": 743 }, { "clip_ratio/high_max": 0.0018028208942268975, "clip_ratio/high_mean": 0.0007331542637984967, "clip_ratio/low_mean": 0.0005740113401770941, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013071656139800325, "epoch": 7.671720116618076, "grad_norm": 0.1311533898115158, "learning_rate": 5e-07, "loss": -0.0185, "step": 744 }, { "clip_ratio/high_max": 0.0017416124537703581, "clip_ratio/high_mean": 0.0007445253977493849, "clip_ratio/low_mean": 0.0005325547672327957, "clip_ratio/low_min": 3.052503234357573e-05, "clip_ratio/region_mean": 0.0012770801804435905, "epoch": 7.681049562682215, "grad_norm": 0.13213203847408295, "learning_rate": 5e-07, "loss": -0.0005, "step": 745 }, { "clip_ratio/high_max": 0.0018410193442832679, "clip_ratio/high_mean": 0.0007374847227765713, "clip_ratio/low_mean": 0.0005851309033459984, "clip_ratio/low_min": 1.3839680832461454e-05, "clip_ratio/region_mean": 0.0013226156224845909, "epoch": 7.690379008746356, "grad_norm": 0.12901537120342255, "learning_rate": 5e-07, "loss": 0.0195, "step": 746 }, { "clip_ratio/high_max": 0.0018852336761483457, "clip_ratio/high_mean": 0.0007789949304424226, "clip_ratio/low_mean": 0.0005910275144742627, "clip_ratio/low_min": 4.944667671225034e-05, "clip_ratio/region_mean": 0.0013700224408239592, "epoch": 7.699708454810495, "grad_norm": 0.13171111047267914, "learning_rate": 5e-07, "loss": 0.0047, "step": 747 }, { "clip_ratio/high_max": 0.0018927500968857203, "clip_ratio/high_mean": 0.0007695140853911653, "clip_ratio/low_mean": 0.0006303899353952147, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001399904052959755, "epoch": 7.709037900874636, "grad_norm": 0.1360795646905899, "learning_rate": 5e-07, "loss": -0.0206, "step": 748 }, { "clip_ratio/high_max": 0.002077238510537427, "clip_ratio/high_mean": 0.0008524779332219623, "clip_ratio/low_mean": 0.0006749470485374331, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001527425010863226, "epoch": 7.718367346938775, "grad_norm": 0.12428892403841019, "learning_rate": 5e-07, "loss": -0.0042, "step": 749 }, { "clip_ratio/high_max": 0.0016942210495471954, "clip_ratio/high_mean": 0.0006639294824708486, "clip_ratio/low_mean": 0.0005651463243339094, "clip_ratio/low_min": 2.690486508072354e-05, "clip_ratio/region_mean": 0.0012290757986193057, "epoch": 7.727696793002916, "grad_norm": 0.13301442563533783, "learning_rate": 5e-07, "loss": -0.0104, "step": 750 }, { "clip_ratio/high_max": 0.0018931256017822307, "clip_ratio/high_mean": 0.0008106359982775757, "clip_ratio/low_mean": 0.0005749204510721029, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013855564793630037, "epoch": 7.737026239067055, "grad_norm": 0.1255762279033661, "learning_rate": 5e-07, "loss": -0.0155, "step": 751 }, { "clip_ratio/high_max": 0.0018287595921719912, "clip_ratio/high_mean": 0.000737269203455071, "clip_ratio/low_mean": 0.0006408727103917045, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001378141918394249, "epoch": 7.746355685131196, "grad_norm": 0.13148367404937744, "learning_rate": 5e-07, "loss": -0.012, "step": 752 }, { "clip_ratio/high_max": 0.0026908420040854253, "clip_ratio/high_mean": 0.000892487612873083, "clip_ratio/low_mean": 0.0005560777626669733, "clip_ratio/low_min": 1.5111218999663834e-05, "clip_ratio/region_mean": 0.0014485653991869185, "epoch": 7.755685131195335, "grad_norm": 0.13098150491714478, "learning_rate": 5e-07, "loss": -0.0298, "step": 753 }, { "clip_ratio/high_max": 0.001953427363332594, "clip_ratio/high_mean": 0.0008005052932276158, "clip_ratio/low_mean": 0.0007478125598936458, "clip_ratio/low_min": 6.833010229456704e-05, "clip_ratio/region_mean": 0.0015483178140129894, "epoch": 7.765014577259475, "grad_norm": 0.13069024682044983, "learning_rate": 5e-07, "loss": -0.0248, "step": 754 }, { "clip_ratio/high_max": 0.0019519149645930156, "clip_ratio/high_mean": 0.0007842837003408931, "clip_ratio/low_mean": 0.0006940334733371856, "clip_ratio/low_min": 6.79055328873801e-05, "clip_ratio/region_mean": 0.0014783171645831317, "epoch": 7.774344023323615, "grad_norm": 0.13007690012454987, "learning_rate": 5e-07, "loss": 0.0209, "step": 755 }, { "clip_ratio/high_max": 0.001864210338681005, "clip_ratio/high_mean": 0.000790376630902756, "clip_ratio/low_mean": 0.0006029960823070724, "clip_ratio/low_min": 1.0738831406342797e-05, "clip_ratio/region_mean": 0.0013933727459516376, "epoch": 7.783673469387755, "grad_norm": 0.12407657504081726, "learning_rate": 5e-07, "loss": -0.0478, "step": 756 }, { "clip_ratio/high_max": 0.0020951392871211283, "clip_ratio/high_mean": 0.000765114658861421, "clip_ratio/low_mean": 0.0007832832125131972, "clip_ratio/low_min": 1.25502010632772e-05, "clip_ratio/region_mean": 0.0015483978713746183, "epoch": 7.793002915451895, "grad_norm": 0.12409961223602295, "learning_rate": 5e-07, "loss": -0.0053, "step": 757 }, { "clip_ratio/high_max": 0.0016902227616810706, "clip_ratio/high_mean": 0.0006564016966876807, "clip_ratio/low_mean": 0.0007956806748552481, "clip_ratio/low_min": 8.7569106653973e-05, "clip_ratio/region_mean": 0.0014520823824568652, "epoch": 7.802332361516035, "grad_norm": 0.13650920987129211, "learning_rate": 5e-07, "loss": 0.0443, "step": 758 }, { "clip_ratio/high_max": 0.002121476049069315, "clip_ratio/high_mean": 0.0008105687975330511, "clip_ratio/low_mean": 0.0006972007413423853, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015077695279615, "epoch": 7.811661807580175, "grad_norm": 0.12293445318937302, "learning_rate": 5e-07, "loss": 0.0346, "step": 759 }, { "clip_ratio/high_max": 0.002017543913098052, "clip_ratio/high_mean": 0.000777669584749674, "clip_ratio/low_mean": 0.0005847528263984714, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013624224193335976, "epoch": 7.820991253644315, "grad_norm": 0.1222648099064827, "learning_rate": 5e-07, "loss": -0.0508, "step": 760 }, { "clip_ratio/high_max": 0.0020435613914742135, "clip_ratio/high_mean": 0.0008739793720451416, "clip_ratio/low_mean": 0.0007913542558526387, "clip_ratio/low_min": 6.124878927948885e-05, "clip_ratio/region_mean": 0.0016653336278977804, "epoch": 7.830320699708455, "grad_norm": 0.12571874260902405, "learning_rate": 5e-07, "loss": -0.0035, "step": 761 }, { "clip_ratio/high_max": 0.0018492736198822968, "clip_ratio/high_mean": 0.0008388848218601197, "clip_ratio/low_mean": 0.000766529054089915, "clip_ratio/low_min": 3.5145116271451116e-05, "clip_ratio/region_mean": 0.0016054138832259923, "epoch": 7.839650145772595, "grad_norm": 0.13505345582962036, "learning_rate": 5e-07, "loss": -0.0378, "step": 762 }, { "clip_ratio/high_max": 0.0018561322376626777, "clip_ratio/high_mean": 0.0007745738016637915, "clip_ratio/low_mean": 0.0008199448384402785, "clip_ratio/low_min": 5.558661450777436e-05, "clip_ratio/region_mean": 0.0015945186096359976, "epoch": 7.848979591836734, "grad_norm": 0.13730567693710327, "learning_rate": 5e-07, "loss": 0.0105, "step": 763 }, { "clip_ratio/high_max": 0.001871157881396357, "clip_ratio/high_mean": 0.0007553885698143858, "clip_ratio/low_mean": 0.0007302996582438936, "clip_ratio/low_min": 3.667001874418929e-05, "clip_ratio/region_mean": 0.001485688266257057, "epoch": 7.858309037900875, "grad_norm": 0.15619690716266632, "learning_rate": 5e-07, "loss": 0.0215, "step": 764 }, { "clip_ratio/high_max": 0.0015900580256129615, "clip_ratio/high_mean": 0.0006798801468903548, "clip_ratio/low_mean": 0.0007596706700496725, "clip_ratio/low_min": 4.857270323554985e-05, "clip_ratio/region_mean": 0.0014395508405868895, "epoch": 7.867638483965014, "grad_norm": 0.12826617062091827, "learning_rate": 5e-07, "loss": 0.015, "step": 765 }, { "clip_ratio/high_max": 0.001944733037817059, "clip_ratio/high_mean": 0.0008505684500050847, "clip_ratio/low_mean": 0.0007201975749921985, "clip_ratio/low_min": 1.4927155461919028e-05, "clip_ratio/region_mean": 0.0015707660750194918, "epoch": 7.876967930029155, "grad_norm": 0.12400184571743011, "learning_rate": 5e-07, "loss": -0.0266, "step": 766 }, { "clip_ratio/high_max": 0.002174273118725978, "clip_ratio/high_mean": 0.0008796147812972777, "clip_ratio/low_mean": 0.0007535659515269799, "clip_ratio/low_min": 1.5470297512365505e-05, "clip_ratio/region_mean": 0.001633180734643247, "epoch": 7.886297376093294, "grad_norm": 0.138499915599823, "learning_rate": 5e-07, "loss": -0.0314, "step": 767 }, { "clip_ratio/high_max": 0.0019930949056288227, "clip_ratio/high_mean": 0.0007961335381878598, "clip_ratio/low_mean": 0.0007440550125465961, "clip_ratio/low_min": 3.90107979910681e-05, "clip_ratio/region_mean": 0.0015401885393657722, "epoch": 7.895626822157435, "grad_norm": 0.12443424761295319, "learning_rate": 5e-07, "loss": -0.0151, "step": 768 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0303431919642857, "completions/max_length": 4096.0, "completions/max_terminated_length": 4094.0, "completions/mean_length": 683.8461303710938, "completions/mean_terminated_length": 577.0706176757812, "completions/min_length": 108.0, "completions/min_terminated_length": 108.0, "epoch": 8.00932944606414, "grad_norm": 0.1394636482000351, "learning_rate": 5e-07, "loss": -0.0165, "num_tokens": 471026127.0, "reward": 0.6114327907562256, "reward_std": 0.1766977161169052, "rewards/simpleverify_reward/mean": 0.6114327311515808, "rewards/simpleverify_reward/std": 0.48743313550949097, "step": 769 }, { "clip_ratio/high_max": 0.0019190556049579754, "clip_ratio/high_mean": 0.0007378833252005279, "clip_ratio/low_mean": 0.0003826498223133967, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011205331393284723, "epoch": 8.018658892128279, "grad_norm": 0.12635411322116852, "learning_rate": 5e-07, "loss": -0.0059, "step": 770 }, { "clip_ratio/high_max": 0.002092363443807699, "clip_ratio/high_mean": 0.0008388076348637696, "clip_ratio/low_mean": 0.0005617848082692944, "clip_ratio/low_min": 1.4114724763203412e-05, "clip_ratio/region_mean": 0.001400592467689421, "epoch": 8.02798833819242, "grad_norm": 0.13650532066822052, "learning_rate": 5e-07, "loss": -0.0121, "step": 771 }, { "clip_ratio/high_max": 0.0016134693287312984, "clip_ratio/high_mean": 0.0007194171739683952, "clip_ratio/low_mean": 0.0005725819910367136, "clip_ratio/low_min": 2.619831320771482e-05, "clip_ratio/region_mean": 0.001291999167733593, "epoch": 8.03731778425656, "grad_norm": 0.13805267214775085, "learning_rate": 5e-07, "loss": 0.0234, "step": 772 }, { "clip_ratio/high_max": 0.00192030388279818, "clip_ratio/high_mean": 0.0008274582705780631, "clip_ratio/low_mean": 0.0005672292736562667, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013946875624242239, "epoch": 8.0466472303207, "grad_norm": 0.11857301741838455, "learning_rate": 5e-07, "loss": -0.0461, "step": 773 }, { "clip_ratio/high_max": 0.0019483103533275425, "clip_ratio/high_mean": 0.0008711993468750734, "clip_ratio/low_mean": 0.0005562479664149578, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014274473360273987, "epoch": 8.055976676384839, "grad_norm": 0.12111105024814606, "learning_rate": 5e-07, "loss": -0.0431, "step": 774 }, { "clip_ratio/high_max": 0.0017527498421259224, "clip_ratio/high_mean": 0.0007841778569854796, "clip_ratio/low_mean": 0.0006305834631348262, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014147613219392952, "epoch": 8.06530612244898, "grad_norm": 0.13266827166080475, "learning_rate": 5e-07, "loss": -0.0202, "step": 775 }, { "clip_ratio/high_max": 0.001788480585673824, "clip_ratio/high_mean": 0.0008464172369713197, "clip_ratio/low_mean": 0.0005744681529904483, "clip_ratio/low_min": 1.097839412977919e-05, "clip_ratio/region_mean": 0.0014208853826858103, "epoch": 8.07463556851312, "grad_norm": 0.12604965269565582, "learning_rate": 5e-07, "loss": -0.0188, "step": 776 }, { "clip_ratio/high_max": 0.0021331867610570043, "clip_ratio/high_mean": 0.0008180353379430017, "clip_ratio/low_mean": 0.0005397652112151263, "clip_ratio/low_min": 2.524688079574844e-05, "clip_ratio/region_mean": 0.0013578005818999372, "epoch": 8.08396501457726, "grad_norm": 0.1360141485929489, "learning_rate": 5e-07, "loss": 0.0053, "step": 777 }, { "clip_ratio/high_max": 0.0019434315036050975, "clip_ratio/high_mean": 0.0008066536247497424, "clip_ratio/low_mean": 0.0005882760960957967, "clip_ratio/low_min": 4.125806844967883e-05, "clip_ratio/region_mean": 0.0013949297390354332, "epoch": 8.093294460641399, "grad_norm": 0.12722811102867126, "learning_rate": 5e-07, "loss": -0.0168, "step": 778 }, { "clip_ratio/high_max": 0.001753666190779768, "clip_ratio/high_mean": 0.0008105289161903784, "clip_ratio/low_mean": 0.0006266799246077426, "clip_ratio/low_min": 4.111165981157683e-05, "clip_ratio/region_mean": 0.0014372088480740786, "epoch": 8.102623906705539, "grad_norm": 0.13995255529880524, "learning_rate": 5e-07, "loss": 0.0025, "step": 779 }, { "clip_ratio/high_max": 0.001795403972209897, "clip_ratio/high_mean": 0.0007410571315631387, "clip_ratio/low_mean": 0.000691268276568735, "clip_ratio/low_min": 1.391052774124546e-05, "clip_ratio/region_mean": 0.0014323254217742942, "epoch": 8.11195335276968, "grad_norm": 0.14007779955863953, "learning_rate": 5e-07, "loss": -0.0053, "step": 780 }, { "clip_ratio/high_max": 0.001902902869915124, "clip_ratio/high_mean": 0.0008214407298510196, "clip_ratio/low_mean": 0.0006735991482855752, "clip_ratio/low_min": 6.32187020528363e-05, "clip_ratio/region_mean": 0.001495039865403669, "epoch": 8.12128279883382, "grad_norm": 0.13752102851867676, "learning_rate": 5e-07, "loss": -0.021, "step": 781 }, { "clip_ratio/high_max": 0.0020010443186038174, "clip_ratio/high_mean": 0.0007935262583487201, "clip_ratio/low_mean": 0.0006918598446645774, "clip_ratio/low_min": 6.283664515649434e-05, "clip_ratio/region_mean": 0.0014853861030132975, "epoch": 8.130612244897959, "grad_norm": 0.1346053034067154, "learning_rate": 5e-07, "loss": 0.0433, "step": 782 }, { "clip_ratio/high_max": 0.002006042552238796, "clip_ratio/high_mean": 0.0008443733204330783, "clip_ratio/low_mean": 0.0006268073702813126, "clip_ratio/low_min": 9.941148164216429e-06, "clip_ratio/region_mean": 0.0014711807125422638, "epoch": 8.139941690962099, "grad_norm": 0.12955701351165771, "learning_rate": 5e-07, "loss": -0.0306, "step": 783 }, { "clip_ratio/high_max": 0.001968941571249161, "clip_ratio/high_mean": 0.0007443742051691515, "clip_ratio/low_mean": 0.0007589341548737139, "clip_ratio/low_min": 2.117208714480512e-05, "clip_ratio/region_mean": 0.0015033083545858972, "epoch": 8.14927113702624, "grad_norm": 0.13942204415798187, "learning_rate": 5e-07, "loss": -0.0147, "step": 784 }, { "clip_ratio/high_max": 0.001969128647033358, "clip_ratio/high_mean": 0.0007942139145598048, "clip_ratio/low_mean": 0.0007555311131000053, "clip_ratio/low_min": 7.616404036525637e-05, "clip_ratio/region_mean": 0.0015497450513066724, "epoch": 8.15860058309038, "grad_norm": 0.13383273780345917, "learning_rate": 5e-07, "loss": 0.0249, "step": 785 }, { "clip_ratio/high_max": 0.0016492052491230424, "clip_ratio/high_mean": 0.0006815330980316503, "clip_ratio/low_mean": 0.0007789115629748267, "clip_ratio/low_min": 1.4633575119660236e-05, "clip_ratio/region_mean": 0.0014604446841985919, "epoch": 8.167930029154519, "grad_norm": 0.14017353951931, "learning_rate": 5e-07, "loss": 0.021, "step": 786 }, { "clip_ratio/high_max": 0.0018411266319162678, "clip_ratio/high_mean": 0.0007374915021500783, "clip_ratio/low_mean": 0.0006901588167238515, "clip_ratio/low_min": 1.6233765563811176e-05, "clip_ratio/region_mean": 0.0014276503134169616, "epoch": 8.177259475218658, "grad_norm": 0.11585410684347153, "learning_rate": 5e-07, "loss": 0.0171, "step": 787 }, { "clip_ratio/high_max": 0.0019193734624423087, "clip_ratio/high_mean": 0.0007489777854061686, "clip_ratio/low_mean": 0.0006159508102427935, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013649285756400786, "epoch": 8.186588921282798, "grad_norm": 0.13112685084342957, "learning_rate": 5e-07, "loss": -0.032, "step": 788 }, { "clip_ratio/high_max": 0.001729588631860679, "clip_ratio/high_mean": 0.0006656769546680152, "clip_ratio/low_mean": 0.0007440154040523339, "clip_ratio/low_min": 5.2019086069776677e-05, "clip_ratio/region_mean": 0.0014096923478064127, "epoch": 8.19591836734694, "grad_norm": 0.11401207000017166, "learning_rate": 5e-07, "loss": 0.0381, "step": 789 }, { "clip_ratio/high_max": 0.002201643081207294, "clip_ratio/high_mean": 0.0008262639403255889, "clip_ratio/low_mean": 0.0008316520579683129, "clip_ratio/low_min": 3.793930409301538e-05, "clip_ratio/region_mean": 0.0016579160146648064, "epoch": 8.205247813411079, "grad_norm": 0.1404971182346344, "learning_rate": 5e-07, "loss": -0.0118, "step": 790 }, { "clip_ratio/high_max": 0.0019865247995767277, "clip_ratio/high_mean": 0.0008889189102774253, "clip_ratio/low_mean": 0.0006356614048854681, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015245803151628934, "epoch": 8.214577259475218, "grad_norm": 0.13233354687690735, "learning_rate": 5e-07, "loss": -0.0242, "step": 791 }, { "clip_ratio/high_max": 0.002030518797255354, "clip_ratio/high_mean": 0.0008143621980707394, "clip_ratio/low_mean": 0.0007275681837199954, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001541930378152756, "epoch": 8.223906705539358, "grad_norm": 0.14032001793384552, "learning_rate": 5e-07, "loss": -0.0238, "step": 792 }, { "clip_ratio/high_max": 0.002130035361915361, "clip_ratio/high_mean": 0.0008317740721395239, "clip_ratio/low_mean": 0.0008447771733699483, "clip_ratio/low_min": 4.933819491270697e-05, "clip_ratio/region_mean": 0.00167655123732402, "epoch": 8.2332361516035, "grad_norm": 0.14910650253295898, "learning_rate": 5e-07, "loss": 0.0336, "step": 793 }, { "clip_ratio/high_max": 0.0018239282653667033, "clip_ratio/high_mean": 0.000775136417360045, "clip_ratio/low_mean": 0.0007694121431995882, "clip_ratio/low_min": 0.00010303506223863224, "clip_ratio/region_mean": 0.001544548555102665, "epoch": 8.242565597667639, "grad_norm": 0.12835274636745453, "learning_rate": 5e-07, "loss": -0.0189, "step": 794 }, { "clip_ratio/high_max": 0.0020807060245715547, "clip_ratio/high_mean": 0.0008437320957455086, "clip_ratio/low_mean": 0.0007250835678860312, "clip_ratio/low_min": 9.922209756041411e-06, "clip_ratio/region_mean": 0.0015688156345277093, "epoch": 8.251895043731778, "grad_norm": 0.12312004715204239, "learning_rate": 5e-07, "loss": 0.0015, "step": 795 }, { "clip_ratio/high_max": 0.0018185959852417, "clip_ratio/high_mean": 0.0007757988096273039, "clip_ratio/low_mean": 0.0008335095080838073, "clip_ratio/low_min": 4.899079067399725e-05, "clip_ratio/region_mean": 0.0016093083104351535, "epoch": 8.261224489795918, "grad_norm": 0.11698324978351593, "learning_rate": 5e-07, "loss": 0.0381, "step": 796 }, { "clip_ratio/high_max": 0.00205194455338642, "clip_ratio/high_mean": 0.0008393405660171993, "clip_ratio/low_mean": 0.0007041422541078646, "clip_ratio/low_min": 4.0525834265281446e-05, "clip_ratio/region_mean": 0.0015434828310390003, "epoch": 8.270553935860057, "grad_norm": 0.13196368515491486, "learning_rate": 5e-07, "loss": -0.0013, "step": 797 }, { "clip_ratio/high_max": 0.0018412385106785223, "clip_ratio/high_mean": 0.0007622814118803944, "clip_ratio/low_mean": 0.0006426726813515415, "clip_ratio/low_min": 1.467480615247041e-05, "clip_ratio/region_mean": 0.0014049540914129466, "epoch": 8.279883381924199, "grad_norm": 0.1365317851305008, "learning_rate": 5e-07, "loss": -0.0359, "step": 798 }, { "clip_ratio/high_max": 0.002075249271001667, "clip_ratio/high_mean": 0.0008029841337702237, "clip_ratio/low_mean": 0.0007580886576761259, "clip_ratio/low_min": 4.8738011173554696e-05, "clip_ratio/region_mean": 0.0015610727787134238, "epoch": 8.289212827988338, "grad_norm": 0.12130186706781387, "learning_rate": 5e-07, "loss": -0.023, "step": 799 }, { "clip_ratio/high_max": 0.002087272943754215, "clip_ratio/high_mean": 0.0008904191327019362, "clip_ratio/low_mean": 0.0007238742437039036, "clip_ratio/low_min": 3.0869990041537676e-05, "clip_ratio/region_mean": 0.001614293421880575, "epoch": 8.298542274052478, "grad_norm": 0.12477964162826538, "learning_rate": 5e-07, "loss": -0.0251, "step": 800 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.028948102678571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 4055.0, "completions/mean_length": 665.6625366210938, "completions/mean_terminated_length": 563.4004516601562, "completions/min_length": 107.0, "completions/min_terminated_length": 107.0, "epoch": 8.307871720116617, "grad_norm": 0.14373984932899475, "learning_rate": 5e-07, "loss": 0.0078, "num_tokens": 489563954.0, "reward": 0.6207798719406128, "reward_std": 0.17745546996593475, "rewards/simpleverify_reward/mean": 0.6207798719406128, "rewards/simpleverify_reward/std": 0.4852014482021332, "step": 801 }, { "clip_ratio/high_max": 0.0017846253504103515, "clip_ratio/high_mean": 0.000720587071555201, "clip_ratio/low_mean": 0.0004033915829495527, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011239786763326265, "epoch": 8.317201166180759, "grad_norm": 0.12868689000606537, "learning_rate": 5e-07, "loss": -0.0169, "step": 802 }, { "clip_ratio/high_max": 0.001919325706694508, "clip_ratio/high_mean": 0.0008310047924169339, "clip_ratio/low_mean": 0.000542378546015243, "clip_ratio/low_min": 1.0766580089693889e-05, "clip_ratio/region_mean": 0.0013733833220612723, "epoch": 8.326530612244898, "grad_norm": 0.13509519398212433, "learning_rate": 5e-07, "loss": -0.0051, "step": 803 }, { "clip_ratio/high_max": 0.0019370091322343796, "clip_ratio/high_mean": 0.0007024985643511172, "clip_ratio/low_mean": 0.0005750586360591114, "clip_ratio/low_min": 1.9586335838539526e-05, "clip_ratio/region_mean": 0.00127755718131084, "epoch": 8.335860058309038, "grad_norm": 0.146087646484375, "learning_rate": 5e-07, "loss": 0.0084, "step": 804 }, { "clip_ratio/high_max": 0.0018647622564458288, "clip_ratio/high_mean": 0.0007883668477006722, "clip_ratio/low_mean": 0.0006177608838697779, "clip_ratio/low_min": 1.3329067769518588e-05, "clip_ratio/region_mean": 0.0014061276961001568, "epoch": 8.345189504373177, "grad_norm": 0.13925005495548248, "learning_rate": 5e-07, "loss": -0.0047, "step": 805 }, { "clip_ratio/high_max": 0.0017853803947218694, "clip_ratio/high_mean": 0.0007931824457045877, "clip_ratio/low_mean": 0.0005289710516080959, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013221534973126836, "epoch": 8.354518950437317, "grad_norm": 0.1427600383758545, "learning_rate": 5e-07, "loss": -0.0127, "step": 806 }, { "clip_ratio/high_max": 0.0019473125576041639, "clip_ratio/high_mean": 0.0007206095378933242, "clip_ratio/low_mean": 0.0006070363724575145, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013276458921609446, "epoch": 8.363848396501458, "grad_norm": 0.16260258853435516, "learning_rate": 5e-07, "loss": -0.0102, "step": 807 }, { "clip_ratio/high_max": 0.002173138673242647, "clip_ratio/high_mean": 0.0009000747559184674, "clip_ratio/low_mean": 0.0006138351591289393, "clip_ratio/low_min": 4.623762197297765e-05, "clip_ratio/region_mean": 0.0015139099014049862, "epoch": 8.373177842565598, "grad_norm": 0.13936519622802734, "learning_rate": 5e-07, "loss": -0.0485, "step": 808 }, { "clip_ratio/high_max": 0.0021088282264827285, "clip_ratio/high_mean": 0.0007840760154067539, "clip_ratio/low_mean": 0.000612933670709026, "clip_ratio/low_min": 3.700652996485587e-05, "clip_ratio/region_mean": 0.0013970096661068965, "epoch": 8.382507288629737, "grad_norm": 0.13707216084003448, "learning_rate": 5e-07, "loss": 0.0073, "step": 809 }, { "clip_ratio/high_max": 0.0019994621907244436, "clip_ratio/high_mean": 0.0008079940816969611, "clip_ratio/low_mean": 0.0005429392845144321, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013509333912224974, "epoch": 8.391836734693877, "grad_norm": 0.13411925733089447, "learning_rate": 5e-07, "loss": -0.0206, "step": 810 }, { "clip_ratio/high_max": 0.0019325854700582568, "clip_ratio/high_mean": 0.0008295658972201636, "clip_ratio/low_mean": 0.0006171386048663408, "clip_ratio/low_min": 4.551026904664468e-05, "clip_ratio/region_mean": 0.0014467044748016633, "epoch": 8.401166180758018, "grad_norm": 0.13991990685462952, "learning_rate": 5e-07, "loss": 0.0003, "step": 811 }, { "clip_ratio/high_max": 0.001760114886565134, "clip_ratio/high_mean": 0.0008273670018752455, "clip_ratio/low_mean": 0.0006509369904961204, "clip_ratio/low_min": 1.746297857607715e-05, "clip_ratio/region_mean": 0.0014783039696339983, "epoch": 8.410495626822158, "grad_norm": 0.12844477593898773, "learning_rate": 5e-07, "loss": -0.0076, "step": 812 }, { "clip_ratio/high_max": 0.0017940218021976762, "clip_ratio/high_mean": 0.0007558224406238878, "clip_ratio/low_mean": 0.0007106367565938854, "clip_ratio/low_min": 2.9802985409332905e-05, "clip_ratio/region_mean": 0.0014664591908513103, "epoch": 8.419825072886297, "grad_norm": 0.13220706582069397, "learning_rate": 5e-07, "loss": 0.0171, "step": 813 }, { "clip_ratio/high_max": 0.0020020942538394593, "clip_ratio/high_mean": 0.0007942228894535219, "clip_ratio/low_mean": 0.0006690963564324193, "clip_ratio/low_min": 0.00011255280060140649, "clip_ratio/region_mean": 0.0014633192258770578, "epoch": 8.429154518950437, "grad_norm": 0.135232076048851, "learning_rate": 5e-07, "loss": 0.0251, "step": 814 }, { "clip_ratio/high_max": 0.001886857789941132, "clip_ratio/high_mean": 0.0007793831355229486, "clip_ratio/low_mean": 0.0007052756373013835, "clip_ratio/low_min": 1.1968594662903342e-05, "clip_ratio/region_mean": 0.001484658754634438, "epoch": 8.438483965014576, "grad_norm": 0.13630563020706177, "learning_rate": 5e-07, "loss": 0.0132, "step": 815 }, { "clip_ratio/high_max": 0.0016996378399198875, "clip_ratio/high_mean": 0.0007053961935525876, "clip_ratio/low_mean": 0.0006812772044213489, "clip_ratio/low_min": 2.153740842913976e-05, "clip_ratio/region_mean": 0.0013866734043403994, "epoch": 8.447813411078718, "grad_norm": 0.13325923681259155, "learning_rate": 5e-07, "loss": 0.0175, "step": 816 }, { "clip_ratio/high_max": 0.0020938017769367434, "clip_ratio/high_mean": 0.0007650214383829734, "clip_ratio/low_mean": 0.0007420352494591498, "clip_ratio/low_min": 7.22075164958369e-05, "clip_ratio/region_mean": 0.001507056673290208, "epoch": 8.457142857142857, "grad_norm": 0.12129195034503937, "learning_rate": 5e-07, "loss": -0.005, "step": 817 }, { "clip_ratio/high_max": 0.0019980037686764263, "clip_ratio/high_mean": 0.0008252007883129409, "clip_ratio/low_mean": 0.0006575218030775432, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001482722564105643, "epoch": 8.466472303206997, "grad_norm": 0.13669809699058533, "learning_rate": 5e-07, "loss": -0.0494, "step": 818 }, { "clip_ratio/high_max": 0.002200307513703592, "clip_ratio/high_mean": 0.0008442564849246992, "clip_ratio/low_mean": 0.0007729924654995557, "clip_ratio/low_min": 0.00011668423758237623, "clip_ratio/region_mean": 0.0016172490031749476, "epoch": 8.475801749271136, "grad_norm": 0.13213346898555756, "learning_rate": 5e-07, "loss": 0.0293, "step": 819 }, { "clip_ratio/high_max": 0.0022741283537470736, "clip_ratio/high_mean": 0.0009540514656691812, "clip_ratio/low_mean": 0.0007532401159551227, "clip_ratio/low_min": 4.702698061009869e-05, "clip_ratio/region_mean": 0.0017072915943572298, "epoch": 8.485131195335278, "grad_norm": 0.13983261585235596, "learning_rate": 5e-07, "loss": 0.0093, "step": 820 }, { "clip_ratio/high_max": 0.0019967573643953074, "clip_ratio/high_mean": 0.0008171971530828159, "clip_ratio/low_mean": 0.0006883468731757603, "clip_ratio/low_min": 1.6738082194933668e-05, "clip_ratio/region_mean": 0.0015055440162541345, "epoch": 8.494460641399417, "grad_norm": 0.12925821542739868, "learning_rate": 5e-07, "loss": -0.0432, "step": 821 }, { "clip_ratio/high_max": 0.0020240224039298482, "clip_ratio/high_mean": 0.0008136288088280708, "clip_ratio/low_mean": 0.0008015233470359817, "clip_ratio/low_min": 6.786995982110966e-05, "clip_ratio/region_mean": 0.0016151521485880949, "epoch": 8.503790087463557, "grad_norm": 0.14422331750392914, "learning_rate": 5e-07, "loss": -0.015, "step": 822 }, { "clip_ratio/high_max": 0.0017591604409972206, "clip_ratio/high_mean": 0.0007471633671229938, "clip_ratio/low_mean": 0.0008441882218903629, "clip_ratio/low_min": 4.7684072342235595e-05, "clip_ratio/region_mean": 0.0015913516544969752, "epoch": 8.513119533527696, "grad_norm": 0.1240537092089653, "learning_rate": 5e-07, "loss": 0.0488, "step": 823 }, { "clip_ratio/high_max": 0.0019030074181500822, "clip_ratio/high_mean": 0.0007360860199696617, "clip_ratio/low_mean": 0.0007285277624760056, "clip_ratio/low_min": 2.2463286768470425e-05, "clip_ratio/region_mean": 0.0014646137751697097, "epoch": 8.522448979591836, "grad_norm": 0.13675837218761444, "learning_rate": 5e-07, "loss": 0.011, "step": 824 }, { "clip_ratio/high_max": 0.0021265347713779192, "clip_ratio/high_mean": 0.0007984602598298807, "clip_ratio/low_mean": 0.0006725205603288487, "clip_ratio/low_min": 1.628028076083865e-05, "clip_ratio/region_mean": 0.0014709807946928777, "epoch": 8.531778425655977, "grad_norm": 0.12705661356449127, "learning_rate": 5e-07, "loss": -0.0192, "step": 825 }, { "clip_ratio/high_max": 0.0019770076833083294, "clip_ratio/high_mean": 0.0008529667593393242, "clip_ratio/low_mean": 0.0008049933167058043, "clip_ratio/low_min": 2.0545694496831857e-05, "clip_ratio/region_mean": 0.0016579600487602875, "epoch": 8.541107871720117, "grad_norm": 0.13024455308914185, "learning_rate": 5e-07, "loss": 0.0035, "step": 826 }, { "clip_ratio/high_max": 0.002104375933413394, "clip_ratio/high_mean": 0.0009194729791488498, "clip_ratio/low_mean": 0.000684273842125549, "clip_ratio/low_min": 4.116525724384701e-05, "clip_ratio/region_mean": 0.0016037468121794518, "epoch": 8.550437317784256, "grad_norm": 0.13043439388275146, "learning_rate": 5e-07, "loss": -0.0101, "step": 827 }, { "clip_ratio/high_max": 0.0023560234112665057, "clip_ratio/high_mean": 0.0008967582652985584, "clip_ratio/low_mean": 0.0005952318515483057, "clip_ratio/low_min": 2.8132033548899926e-05, "clip_ratio/region_mean": 0.0014919901404937264, "epoch": 8.559766763848396, "grad_norm": 0.12981277704238892, "learning_rate": 5e-07, "loss": -0.0016, "step": 828 }, { "clip_ratio/high_max": 0.0021311673808668274, "clip_ratio/high_mean": 0.0008856263484631199, "clip_ratio/low_mean": 0.000703894439538999, "clip_ratio/low_min": 5.173770750843687e-05, "clip_ratio/region_mean": 0.0015895207761786878, "epoch": 8.569096209912537, "grad_norm": 0.1488371044397354, "learning_rate": 5e-07, "loss": 0.003, "step": 829 }, { "clip_ratio/high_max": 0.0019573866338760126, "clip_ratio/high_mean": 0.0007415474319714122, "clip_ratio/low_mean": 0.0007652242202311754, "clip_ratio/low_min": 2.436647264403291e-05, "clip_ratio/region_mean": 0.0015067716958583333, "epoch": 8.578425655976677, "grad_norm": 0.13440746068954468, "learning_rate": 5e-07, "loss": 0.0155, "step": 830 }, { "clip_ratio/high_max": 0.0016827628169266973, "clip_ratio/high_mean": 0.0007023885555099696, "clip_ratio/low_mean": 0.0006761288314010017, "clip_ratio/low_min": 4.829276167583885e-05, "clip_ratio/region_mean": 0.0013785173614451196, "epoch": 8.587755102040816, "grad_norm": 0.13991880416870117, "learning_rate": 5e-07, "loss": -0.0239, "step": 831 }, { "clip_ratio/high_max": 0.0020875827794952784, "clip_ratio/high_mean": 0.0008547638335585361, "clip_ratio/low_mean": 0.000754608335228113, "clip_ratio/low_min": 1.1330674169585109e-05, "clip_ratio/region_mean": 0.0016093721787910908, "epoch": 8.597084548104956, "grad_norm": 0.13838982582092285, "learning_rate": 5e-07, "loss": -0.0014, "step": 832 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0330636160714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 4028.0, "completions/mean_length": 688.3814697265625, "completions/mean_terminated_length": 571.860595703125, "completions/min_length": 5.0, "completions/min_terminated_length": 5.0, "epoch": 8.606413994169095, "grad_norm": 0.13278871774673462, "learning_rate": 5e-07, "loss": -0.0204, "num_tokens": 508236777.0, "reward": 0.615234375, "reward_std": 0.17091946303844452, "rewards/simpleverify_reward/mean": 0.615234375, "rewards/simpleverify_reward/std": 0.48654836416244507, "step": 833 }, { "clip_ratio/high_max": 0.0020279996206227224, "clip_ratio/high_mean": 0.0007826467353879707, "clip_ratio/low_mean": 0.0005238888443273026, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001306535577896284, "epoch": 8.615743440233237, "grad_norm": 0.1288401335477829, "learning_rate": 5e-07, "loss": 0.0181, "step": 834 }, { "clip_ratio/high_max": 0.0017460692979511805, "clip_ratio/high_mean": 0.0007158365970099112, "clip_ratio/low_mean": 0.0006141469057183713, "clip_ratio/low_min": 2.3447757484973408e-05, "clip_ratio/region_mean": 0.001329983500909293, "epoch": 8.625072886297376, "grad_norm": 0.1259901374578476, "learning_rate": 5e-07, "loss": 0.0069, "step": 835 }, { "clip_ratio/high_max": 0.0018817018499248661, "clip_ratio/high_mean": 0.000793457964391564, "clip_ratio/low_mean": 0.0006574198405360221, "clip_ratio/low_min": 2.1987687432556413e-05, "clip_ratio/region_mean": 0.0014508778403978795, "epoch": 8.634402332361516, "grad_norm": 0.13653546571731567, "learning_rate": 5e-07, "loss": -0.0215, "step": 836 }, { "clip_ratio/high_max": 0.0016827657163958065, "clip_ratio/high_mean": 0.0007368241804215359, "clip_ratio/low_mean": 0.0005744870290982362, "clip_ratio/low_min": 7.901523531472776e-05, "clip_ratio/region_mean": 0.0013113112363498658, "epoch": 8.643731778425655, "grad_norm": 0.13388781249523163, "learning_rate": 5e-07, "loss": 0.0365, "step": 837 }, { "clip_ratio/high_max": 0.0015813795507710893, "clip_ratio/high_mean": 0.0005702072903659428, "clip_ratio/low_mean": 0.0005160731561772991, "clip_ratio/low_min": 2.5368089154653717e-05, "clip_ratio/region_mean": 0.0010862804447242524, "epoch": 8.653061224489797, "grad_norm": 0.13274253904819489, "learning_rate": 5e-07, "loss": 0.0095, "step": 838 }, { "clip_ratio/high_max": 0.002160514035494998, "clip_ratio/high_mean": 0.0008241908635682194, "clip_ratio/low_mean": 0.0005554605750148767, "clip_ratio/low_min": 2.0347593817859888e-05, "clip_ratio/region_mean": 0.0013796514249406755, "epoch": 8.662390670553936, "grad_norm": 0.15386709570884705, "learning_rate": 5e-07, "loss": -0.0726, "step": 839 }, { "clip_ratio/high_max": 0.0019947329128626734, "clip_ratio/high_mean": 0.0007594075796077959, "clip_ratio/low_mean": 0.0005517105255421484, "clip_ratio/low_min": 1.4630149962613359e-05, "clip_ratio/region_mean": 0.0013111180887790397, "epoch": 8.671720116618076, "grad_norm": 0.136003777384758, "learning_rate": 5e-07, "loss": -0.0297, "step": 840 }, { "clip_ratio/high_max": 0.0018153917990275659, "clip_ratio/high_mean": 0.0007201449643616797, "clip_ratio/low_mean": 0.000546955362551671, "clip_ratio/low_min": 1.6233765563811176e-05, "clip_ratio/region_mean": 0.001267100353288697, "epoch": 8.681049562682215, "grad_norm": 0.13887770473957062, "learning_rate": 5e-07, "loss": 0.0289, "step": 841 }, { "clip_ratio/high_max": 0.0014818114177614916, "clip_ratio/high_mean": 0.000599599279667018, "clip_ratio/low_mean": 0.0006518700538435951, "clip_ratio/low_min": 4.929538317810511e-05, "clip_ratio/region_mean": 0.0012514693298726343, "epoch": 8.690379008746355, "grad_norm": 0.1375890076160431, "learning_rate": 5e-07, "loss": 0.0398, "step": 842 }, { "clip_ratio/high_max": 0.001542226431411109, "clip_ratio/high_mean": 0.0006956333800189896, "clip_ratio/low_mean": 0.0006521989798784489, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013478323708113749, "epoch": 8.699708454810496, "grad_norm": 0.11311003565788269, "learning_rate": 5e-07, "loss": 0.0195, "step": 843 }, { "clip_ratio/high_max": 0.0019438035815255716, "clip_ratio/high_mean": 0.0008266453132819152, "clip_ratio/low_mean": 0.0006123228595242836, "clip_ratio/low_min": 1.904326563817449e-05, "clip_ratio/region_mean": 0.0014389681891771033, "epoch": 8.709037900874636, "grad_norm": 0.13151466846466064, "learning_rate": 5e-07, "loss": -0.0162, "step": 844 }, { "clip_ratio/high_max": 0.0018700793334573973, "clip_ratio/high_mean": 0.0006907752413098933, "clip_ratio/low_mean": 0.000547097074559133, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012378723004076164, "epoch": 8.718367346938775, "grad_norm": 0.13064418733119965, "learning_rate": 5e-07, "loss": 0.0245, "step": 845 }, { "clip_ratio/high_max": 0.0018923502211691812, "clip_ratio/high_mean": 0.0007365757792285876, "clip_ratio/low_mean": 0.00064025960273284, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013768354183412157, "epoch": 8.727696793002915, "grad_norm": 0.15131869912147522, "learning_rate": 5e-07, "loss": -0.0063, "step": 846 }, { "clip_ratio/high_max": 0.002000587461225223, "clip_ratio/high_mean": 0.000791814145486569, "clip_ratio/low_mean": 0.0005147508054506034, "clip_ratio/low_min": 4.576527499011718e-05, "clip_ratio/region_mean": 0.0013065649327472784, "epoch": 8.737026239067056, "grad_norm": 0.12046334892511368, "learning_rate": 5e-07, "loss": -0.011, "step": 847 }, { "clip_ratio/high_max": 0.0020245042178430595, "clip_ratio/high_mean": 0.0007793967879479169, "clip_ratio/low_mean": 0.000522854348673718, "clip_ratio/low_min": 2.149243482563179e-05, "clip_ratio/region_mean": 0.0013022511739109177, "epoch": 8.746355685131196, "grad_norm": 0.12489102780818939, "learning_rate": 5e-07, "loss": -0.044, "step": 848 }, { "clip_ratio/high_max": 0.0018592833912407514, "clip_ratio/high_mean": 0.0007412920931528788, "clip_ratio/low_mean": 0.0006021478284310433, "clip_ratio/low_min": 3.408926113479538e-05, "clip_ratio/region_mean": 0.0013434398715617135, "epoch": 8.755685131195335, "grad_norm": 0.1299588978290558, "learning_rate": 5e-07, "loss": -0.0187, "step": 849 }, { "clip_ratio/high_max": 0.0016929438352235593, "clip_ratio/high_mean": 0.000634989059108193, "clip_ratio/low_mean": 0.0005528201891138451, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011878092591359746, "epoch": 8.765014577259475, "grad_norm": 0.13043871521949768, "learning_rate": 5e-07, "loss": -0.0288, "step": 850 }, { "clip_ratio/high_max": 0.0015294950280804187, "clip_ratio/high_mean": 0.0005973727265882189, "clip_ratio/low_mean": 0.0008329753236466786, "clip_ratio/low_min": 7.028906748018926e-05, "clip_ratio/region_mean": 0.001430348045687424, "epoch": 8.774344023323614, "grad_norm": 0.1354091912508011, "learning_rate": 5e-07, "loss": 0.0308, "step": 851 }, { "clip_ratio/high_max": 0.002431817123579094, "clip_ratio/high_mean": 0.0008864804494805867, "clip_ratio/low_mean": 0.0006342982251226204, "clip_ratio/low_min": 5.875864371773787e-05, "clip_ratio/region_mean": 0.0015207786673272494, "epoch": 8.783673469387756, "grad_norm": 0.12643390893936157, "learning_rate": 5e-07, "loss": -0.0684, "step": 852 }, { "clip_ratio/high_max": 0.0019100528916169424, "clip_ratio/high_mean": 0.0007581573900097283, "clip_ratio/low_mean": 0.0007423452188959345, "clip_ratio/low_min": 1.270841767109232e-05, "clip_ratio/region_mean": 0.0015005026216385886, "epoch": 8.793002915451895, "grad_norm": 0.13532866537570953, "learning_rate": 5e-07, "loss": -0.0272, "step": 853 }, { "clip_ratio/high_max": 0.0019115519389742985, "clip_ratio/high_mean": 0.0007835065971448785, "clip_ratio/low_mean": 0.0007358292696153512, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001519335841294378, "epoch": 8.802332361516035, "grad_norm": 0.14631466567516327, "learning_rate": 5e-07, "loss": 0.038, "step": 854 }, { "clip_ratio/high_max": 0.0018609875041875057, "clip_ratio/high_mean": 0.0007155822167987935, "clip_ratio/low_mean": 0.0006063324635761091, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013219146894698497, "epoch": 8.811661807580174, "grad_norm": 0.13169145584106445, "learning_rate": 5e-07, "loss": -0.0299, "step": 855 }, { "clip_ratio/high_max": 0.0019228184282837901, "clip_ratio/high_mean": 0.0007716766767771333, "clip_ratio/low_mean": 0.0007351735239353729, "clip_ratio/low_min": 1.7908309018821456e-05, "clip_ratio/region_mean": 0.0015068501816131175, "epoch": 8.820991253644316, "grad_norm": 0.12114932388067245, "learning_rate": 5e-07, "loss": -0.0134, "step": 856 }, { "clip_ratio/high_max": 0.0019953275623265654, "clip_ratio/high_mean": 0.0008595272174716229, "clip_ratio/low_mean": 0.0006717155938531505, "clip_ratio/low_min": 2.5938990802387707e-05, "clip_ratio/region_mean": 0.0015312428076867945, "epoch": 8.830320699708455, "grad_norm": 1.8799233436584473, "learning_rate": 5e-07, "loss": 0.009, "step": 857 }, { "clip_ratio/high_max": 0.002088390800054185, "clip_ratio/high_mean": 0.0007566984604636673, "clip_ratio/low_mean": 0.000688044480739336, "clip_ratio/low_min": 1.2588116987899411e-05, "clip_ratio/region_mean": 0.0014447429348365404, "epoch": 8.839650145772595, "grad_norm": 0.12069337069988251, "learning_rate": 5e-07, "loss": 0.002, "step": 858 }, { "clip_ratio/high_max": 0.0019465526893327478, "clip_ratio/high_mean": 0.0007313184323720634, "clip_ratio/low_mean": 0.0008276054113594, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.00155892383918399, "epoch": 8.848979591836734, "grad_norm": 0.12219434231519699, "learning_rate": 5e-07, "loss": 0.0068, "step": 859 }, { "clip_ratio/high_max": 0.0018146892616641708, "clip_ratio/high_mean": 0.0007778431054248358, "clip_ratio/low_mean": 0.0007935641460790066, "clip_ratio/low_min": 6.83999987813877e-05, "clip_ratio/region_mean": 0.001571407265146263, "epoch": 8.858309037900874, "grad_norm": 0.13540683686733246, "learning_rate": 5e-07, "loss": -0.0071, "step": 860 }, { "clip_ratio/high_max": 0.0015102748293429613, "clip_ratio/high_mean": 0.0006638288059548358, "clip_ratio/low_mean": 0.0007877899843151681, "clip_ratio/low_min": 2.0681667592725717e-05, "clip_ratio/region_mean": 0.0014516188166453503, "epoch": 8.867638483965015, "grad_norm": 0.12847977876663208, "learning_rate": 5e-07, "loss": 0.0132, "step": 861 }, { "clip_ratio/high_max": 0.0017181826297019143, "clip_ratio/high_mean": 0.0006958041467441944, "clip_ratio/low_mean": 0.0007267704077094095, "clip_ratio/low_min": 8.747375431994442e-06, "clip_ratio/region_mean": 0.0014225745508156251, "epoch": 8.876967930029155, "grad_norm": 0.15326878428459167, "learning_rate": 5e-07, "loss": -0.0213, "step": 862 }, { "clip_ratio/high_max": 0.0020340102128102444, "clip_ratio/high_mean": 0.0007582021607959177, "clip_ratio/low_mean": 0.000957312959144474, "clip_ratio/low_min": 9.433001923753181e-05, "clip_ratio/region_mean": 0.0017155151035694871, "epoch": 8.886297376093294, "grad_norm": 0.1440555602312088, "learning_rate": 5e-07, "loss": 0.0203, "step": 863 }, { "clip_ratio/high_max": 0.0019764304015552625, "clip_ratio/high_mean": 0.0007602619243698427, "clip_ratio/low_mean": 0.0009317880176240578, "clip_ratio/low_min": 4.4324704504106194e-05, "clip_ratio/region_mean": 0.0016920499474508688, "epoch": 8.895626822157434, "grad_norm": 0.13154377043247223, "learning_rate": 5e-07, "loss": -0.0089, "step": 864 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0326450892857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 4066.0, "completions/mean_length": 692.5234985351562, "completions/mean_terminated_length": 577.6671142578125, "completions/min_length": 83.0, "completions/min_terminated_length": 83.0, "epoch": 9.00932944606414, "grad_norm": 0.14694395661354065, "learning_rate": 5e-07, "loss": -0.0381, "num_tokens": 527161080.0, "reward": 0.6107003688812256, "reward_std": 0.18123261630535126, "rewards/simpleverify_reward/mean": 0.6107003092765808, "rewards/simpleverify_reward/std": 0.4875999987125397, "step": 865 }, { "clip_ratio/high_max": 0.001744835804856848, "clip_ratio/high_mean": 0.0007376246176136192, "clip_ratio/low_mean": 0.0005304792493916466, "clip_ratio/low_min": 1.2037750821036752e-05, "clip_ratio/region_mean": 0.001268103857000824, "epoch": 9.018658892128279, "grad_norm": 0.1420261561870575, "learning_rate": 5e-07, "loss": 0.003, "step": 866 }, { "clip_ratio/high_max": 0.0017449141159886494, "clip_ratio/high_mean": 0.0007399589358101366, "clip_ratio/low_mean": 0.0005967069537291536, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013366658640734386, "epoch": 9.02798833819242, "grad_norm": 0.13632728159427643, "learning_rate": 5e-07, "loss": -0.0145, "step": 867 }, { "clip_ratio/high_max": 0.001663191451370949, "clip_ratio/high_mean": 0.0006313407575362362, "clip_ratio/low_mean": 0.0005732837717005168, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012046245028614067, "epoch": 9.03731778425656, "grad_norm": 0.13583429157733917, "learning_rate": 5e-07, "loss": 0.032, "step": 868 }, { "clip_ratio/high_max": 0.0018597211928863544, "clip_ratio/high_mean": 0.0008124405703711091, "clip_ratio/low_mean": 0.0005289227829052834, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013413633387244772, "epoch": 9.0466472303207, "grad_norm": 0.13684703409671783, "learning_rate": 5e-07, "loss": -0.0327, "step": 869 }, { "clip_ratio/high_max": 0.0019069834015681408, "clip_ratio/high_mean": 0.0007749852502456633, "clip_ratio/low_mean": 0.0004997815985916532, "clip_ratio/low_min": 1.156122834800044e-05, "clip_ratio/region_mean": 0.001274766862479737, "epoch": 9.055976676384839, "grad_norm": 0.12057965993881226, "learning_rate": 5e-07, "loss": -0.0217, "step": 870 }, { "clip_ratio/high_max": 0.0018559238233137876, "clip_ratio/high_mean": 0.0007179983549576718, "clip_ratio/low_mean": 0.0006143311047708266, "clip_ratio/low_min": 1.2575452274177223e-05, "clip_ratio/region_mean": 0.0013323294442670885, "epoch": 9.06530612244898, "grad_norm": 0.14592739939689636, "learning_rate": 5e-07, "loss": 0.0038, "step": 871 }, { "clip_ratio/high_max": 0.001943149200087646, "clip_ratio/high_mean": 0.0007987646094989032, "clip_ratio/low_mean": 0.00047760808729435666, "clip_ratio/low_min": 1.1277516932750586e-05, "clip_ratio/region_mean": 0.001276372673601145, "epoch": 9.07463556851312, "grad_norm": 0.1294470578432083, "learning_rate": 5e-07, "loss": -0.0418, "step": 872 }, { "clip_ratio/high_max": 0.0022237944904190954, "clip_ratio/high_mean": 0.0008814769826130942, "clip_ratio/low_mean": 0.0006343162840494188, "clip_ratio/low_min": 3.57792141585378e-05, "clip_ratio/region_mean": 0.0015157932648435235, "epoch": 9.08396501457726, "grad_norm": 0.1385689228773117, "learning_rate": 5e-07, "loss": -0.0294, "step": 873 }, { "clip_ratio/high_max": 0.0016879135655472055, "clip_ratio/high_mean": 0.0006486183101515053, "clip_ratio/low_mean": 0.0006204499441082589, "clip_ratio/low_min": 1.6901027265703306e-05, "clip_ratio/region_mean": 0.001269068285182584, "epoch": 9.093294460641399, "grad_norm": 1.5891700983047485, "learning_rate": 5e-07, "loss": 0.0275, "step": 874 }, { "clip_ratio/high_max": 0.0017674374012131011, "clip_ratio/high_mean": 0.0007034009340713965, "clip_ratio/low_mean": 0.0006060824225642136, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001309483326622285, "epoch": 9.102623906705539, "grad_norm": 0.14349429309368134, "learning_rate": 5e-07, "loss": 0.0155, "step": 875 }, { "clip_ratio/high_max": 0.0017161509094876237, "clip_ratio/high_mean": 0.0008205525246012257, "clip_ratio/low_mean": 0.0007267993823916186, "clip_ratio/low_min": 3.468119939498138e-05, "clip_ratio/region_mean": 0.0015473519306397066, "epoch": 9.11195335276968, "grad_norm": 0.1431819498538971, "learning_rate": 5e-07, "loss": -0.0136, "step": 876 }, { "clip_ratio/high_max": 0.0017955430921574589, "clip_ratio/high_mean": 0.0007004843373579206, "clip_ratio/low_mean": 0.000672166464937618, "clip_ratio/low_min": 3.804788866546005e-05, "clip_ratio/region_mean": 0.001372650822304422, "epoch": 9.12128279883382, "grad_norm": 0.15355883538722992, "learning_rate": 5e-07, "loss": 0.001, "step": 877 }, { "clip_ratio/high_max": 0.0021200265873630997, "clip_ratio/high_mean": 0.0008176510127668735, "clip_ratio/low_mean": 0.0007436506602971349, "clip_ratio/low_min": 4.4516367779579014e-05, "clip_ratio/region_mean": 0.0015613016585120931, "epoch": 9.130612244897959, "grad_norm": 0.1726544052362442, "learning_rate": 5e-07, "loss": 0.0388, "step": 878 }, { "clip_ratio/high_max": 0.0019937539400416426, "clip_ratio/high_mean": 0.0009213013800035696, "clip_ratio/low_mean": 0.0007186429884313839, "clip_ratio/low_min": 4.077625726495171e-05, "clip_ratio/region_mean": 0.0016399443775299005, "epoch": 9.139941690962099, "grad_norm": 0.14601272344589233, "learning_rate": 5e-07, "loss": 0.008, "step": 879 }, { "clip_ratio/high_max": 0.0018745163979474455, "clip_ratio/high_mean": 0.0007275085526998737, "clip_ratio/low_mean": 0.0006626534031966003, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013901619640819263, "epoch": 9.14927113702624, "grad_norm": 0.12860162556171417, "learning_rate": 5e-07, "loss": 0.0081, "step": 880 }, { "clip_ratio/high_max": 0.0020943533891113475, "clip_ratio/high_mean": 0.000889690109033836, "clip_ratio/low_mean": 0.0007052454511722317, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015949355292832479, "epoch": 9.15860058309038, "grad_norm": 0.14849497377872467, "learning_rate": 5e-07, "loss": -0.0462, "step": 881 }, { "clip_ratio/high_max": 0.001961084763024701, "clip_ratio/high_mean": 0.0007564042152807815, "clip_ratio/low_mean": 0.0007970373208081583, "clip_ratio/low_min": 6.138733715488343e-05, "clip_ratio/region_mean": 0.0015534415360889398, "epoch": 9.167930029154519, "grad_norm": 0.14154089987277985, "learning_rate": 5e-07, "loss": 0.0255, "step": 882 }, { "clip_ratio/high_max": 0.0019217698791180737, "clip_ratio/high_mean": 0.0007700910318817478, "clip_ratio/low_mean": 0.0007188081253843848, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014888991572661325, "epoch": 9.177259475218658, "grad_norm": 0.1456442028284073, "learning_rate": 5e-07, "loss": -0.0177, "step": 883 }, { "clip_ratio/high_max": 0.002209458594734315, "clip_ratio/high_mean": 0.0008096970250335289, "clip_ratio/low_mean": 0.0007799125141900731, "clip_ratio/low_min": 3.702257527038455e-05, "clip_ratio/region_mean": 0.0015896095683274325, "epoch": 9.186588921282798, "grad_norm": 0.1312471330165863, "learning_rate": 5e-07, "loss": -0.0145, "step": 884 }, { "clip_ratio/high_max": 0.002136474948201794, "clip_ratio/high_mean": 0.0008361391774087679, "clip_ratio/low_mean": 0.0006215101111592958, "clip_ratio/low_min": 1.362843431707006e-05, "clip_ratio/region_mean": 0.001457649272197159, "epoch": 9.19591836734694, "grad_norm": 0.12041537463665009, "learning_rate": 5e-07, "loss": -0.0279, "step": 885 }, { "clip_ratio/high_max": 0.0022893831846886314, "clip_ratio/high_mean": 0.000878734568686923, "clip_ratio/low_mean": 0.0006974879961489933, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015762225448270328, "epoch": 9.205247813411079, "grad_norm": 0.12647727131843567, "learning_rate": 5e-07, "loss": 0.012, "step": 886 }, { "clip_ratio/high_max": 0.0018773098818201106, "clip_ratio/high_mean": 0.0007097069910741993, "clip_ratio/low_mean": 0.0009023756474562106, "clip_ratio/low_min": 8.114319552987581e-05, "clip_ratio/region_mean": 0.0016120826294354629, "epoch": 9.214577259475218, "grad_norm": 0.13232111930847168, "learning_rate": 5e-07, "loss": 0.0216, "step": 887 }, { "clip_ratio/high_max": 0.0022027276536391582, "clip_ratio/high_mean": 0.0009485517748544225, "clip_ratio/low_mean": 0.0007195874823082704, "clip_ratio/low_min": 3.0369290470844135e-05, "clip_ratio/region_mean": 0.0016681392735335976, "epoch": 9.223906705539358, "grad_norm": 0.138541117310524, "learning_rate": 5e-07, "loss": -0.0077, "step": 888 }, { "clip_ratio/high_max": 0.0021457075999933295, "clip_ratio/high_mean": 0.0008888232678145869, "clip_ratio/low_mean": 0.0007444947495969245, "clip_ratio/low_min": 2.7202183446206618e-05, "clip_ratio/region_mean": 0.0016333180137735326, "epoch": 9.2332361516035, "grad_norm": 0.15563063323497772, "learning_rate": 5e-07, "loss": -0.0321, "step": 889 }, { "clip_ratio/high_max": 0.0018073712053592317, "clip_ratio/high_mean": 0.0007847326342016459, "clip_ratio/low_mean": 0.000813609107353841, "clip_ratio/low_min": 5.45513203178416e-05, "clip_ratio/region_mean": 0.0015983417069946881, "epoch": 9.242565597667639, "grad_norm": 0.30303457379341125, "learning_rate": 5e-07, "loss": 0.0098, "step": 890 }, { "clip_ratio/high_max": 0.0021340306266210973, "clip_ratio/high_mean": 0.0008950051742431242, "clip_ratio/low_mean": 0.0007977985751494998, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0016928038021433167, "epoch": 9.251895043731778, "grad_norm": 0.12827488780021667, "learning_rate": 5e-07, "loss": -0.0282, "step": 891 }, { "clip_ratio/high_max": 0.002064979271381162, "clip_ratio/high_mean": 0.0008316371277032886, "clip_ratio/low_mean": 0.0008728579687158344, "clip_ratio/low_min": 8.686340697749984e-05, "clip_ratio/region_mean": 0.0017044950800482184, "epoch": 9.261224489795918, "grad_norm": 0.14296743273735046, "learning_rate": 5e-07, "loss": 0.0298, "step": 892 }, { "clip_ratio/high_max": 0.0020762808781000786, "clip_ratio/high_mean": 0.0008992056700662943, "clip_ratio/low_mean": 0.0008720594814803917, "clip_ratio/low_min": 8.746061939746141e-05, "clip_ratio/region_mean": 0.0017712651460897177, "epoch": 9.270553935860057, "grad_norm": 0.15919671952724457, "learning_rate": 5e-07, "loss": -0.0021, "step": 893 }, { "clip_ratio/high_max": 0.002097555421642028, "clip_ratio/high_mean": 0.000873241182489437, "clip_ratio/low_mean": 0.0009208782885252731, "clip_ratio/low_min": 6.450590080930851e-05, "clip_ratio/region_mean": 0.0017941195219464134, "epoch": 9.279883381924199, "grad_norm": 0.18690970540046692, "learning_rate": 5e-07, "loss": -0.0027, "step": 894 }, { "clip_ratio/high_max": 0.002126875115209259, "clip_ratio/high_mean": 0.0008998936937132385, "clip_ratio/low_mean": 0.0007544086038251407, "clip_ratio/low_min": 4.546934815152781e-05, "clip_ratio/region_mean": 0.0016543022866244428, "epoch": 9.289212827988338, "grad_norm": 3.0126302242279053, "learning_rate": 5e-07, "loss": -0.025, "step": 895 }, { "clip_ratio/high_max": 0.001895465607958613, "clip_ratio/high_mean": 0.0007543589945271378, "clip_ratio/low_mean": 0.0007968300778884441, "clip_ratio/low_min": 5.329045325197512e-05, "clip_ratio/region_mean": 0.0015511890524066985, "epoch": 9.298542274052478, "grad_norm": 0.19741466641426086, "learning_rate": 5e-07, "loss": -0.015, "step": 896 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0369349888392857, "completions/max_length": 4096.0, "completions/max_terminated_length": 4093.0, "completions/mean_length": 697.8496704101562, "completions/mean_terminated_length": 567.5254516601562, "completions/min_length": 90.0, "completions/min_terminated_length": 90.0, "epoch": 9.307871720116617, "grad_norm": 0.16500337421894073, "learning_rate": 5e-07, "loss": 0.0045, "num_tokens": 545692495.0, "reward": 0.6212332844734192, "reward_std": 0.1661071628332138, "rewards/simpleverify_reward/mean": 0.6212332844734192, "rewards/simpleverify_reward/std": 0.48508837819099426, "step": 897 }, { "clip_ratio/high_max": 0.00208497356652515, "clip_ratio/high_mean": 0.0007419041949106031, "clip_ratio/low_mean": 0.000527104746652185, "clip_ratio/low_min": 1.2770739886036608e-05, "clip_ratio/region_mean": 0.0012690089497482404, "epoch": 9.317201166180759, "grad_norm": 0.14821922779083252, "learning_rate": 5e-07, "loss": 0.0206, "step": 898 }, { "clip_ratio/high_max": 0.0017162142103188671, "clip_ratio/high_mean": 0.0007732222857157467, "clip_ratio/low_mean": 0.0005544371124415193, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013276593927002978, "epoch": 9.326530612244898, "grad_norm": 0.13075503706932068, "learning_rate": 5e-07, "loss": 0.0068, "step": 899 }, { "clip_ratio/high_max": 0.0017382663590979064, "clip_ratio/high_mean": 0.0006795225153837237, "clip_ratio/low_mean": 0.0005427398737083422, "clip_ratio/low_min": 2.5221952455467544e-05, "clip_ratio/region_mean": 0.0012222624063724652, "epoch": 9.335860058309038, "grad_norm": 0.1328532099723816, "learning_rate": 5e-07, "loss": -0.0299, "step": 900 }, { "clip_ratio/high_max": 0.0018321468778594863, "clip_ratio/high_mean": 0.0007033621914160904, "clip_ratio/low_mean": 0.0005498392438312294, "clip_ratio/low_min": 4.7123701733653434e-05, "clip_ratio/region_mean": 0.0012532014297903515, "epoch": 9.345189504373177, "grad_norm": 0.12325987219810486, "learning_rate": 5e-07, "loss": 0.0141, "step": 901 }, { "clip_ratio/high_max": 0.0023787885656929575, "clip_ratio/high_mean": 0.0008683340765855974, "clip_ratio/low_mean": 0.0006161248365970096, "clip_ratio/low_min": 2.2506301320390776e-05, "clip_ratio/region_mean": 0.0014844589459244162, "epoch": 9.354518950437317, "grad_norm": 0.1385040432214737, "learning_rate": 5e-07, "loss": -0.0381, "step": 902 }, { "clip_ratio/high_max": 0.0017709983403619844, "clip_ratio/high_mean": 0.0006993462848186027, "clip_ratio/low_mean": 0.0005992174665152561, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012985637367819436, "epoch": 9.363848396501458, "grad_norm": 0.13174234330654144, "learning_rate": 5e-07, "loss": 0.0027, "step": 903 }, { "clip_ratio/high_max": 0.001748946731822798, "clip_ratio/high_mean": 0.0006740967146470211, "clip_ratio/low_mean": 0.000702355242538033, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013764519680989906, "epoch": 9.373177842565598, "grad_norm": 0.122036412358284, "learning_rate": 5e-07, "loss": 0.0151, "step": 904 }, { "clip_ratio/high_max": 0.001896878966363147, "clip_ratio/high_mean": 0.0007699415900788154, "clip_ratio/low_mean": 0.0006115311025496339, "clip_ratio/low_min": 8.441625413979637e-05, "clip_ratio/region_mean": 0.001381472702632891, "epoch": 9.382507288629737, "grad_norm": 0.14133530855178833, "learning_rate": 5e-07, "loss": -0.0169, "step": 905 }, { "clip_ratio/high_max": 0.001971877347386908, "clip_ratio/high_mean": 0.0007145216259232257, "clip_ratio/low_mean": 0.0006498169241240248, "clip_ratio/low_min": 5.0542510507511906e-05, "clip_ratio/region_mean": 0.0013643385718751233, "epoch": 9.391836734693877, "grad_norm": 0.14545395970344543, "learning_rate": 5e-07, "loss": -0.0046, "step": 906 }, { "clip_ratio/high_max": 0.0019543000307749026, "clip_ratio/high_mean": 0.0007521685965912184, "clip_ratio/low_mean": 0.0007822984098311281, "clip_ratio/low_min": 4.656623968912754e-05, "clip_ratio/region_mean": 0.0015344670100603253, "epoch": 9.401166180758018, "grad_norm": 0.13991057872772217, "learning_rate": 5e-07, "loss": -0.0011, "step": 907 }, { "clip_ratio/high_max": 0.001686782852630131, "clip_ratio/high_mean": 0.0007010462013568031, "clip_ratio/low_mean": 0.0007054073121253168, "clip_ratio/low_min": 4.448268919077236e-05, "clip_ratio/region_mean": 0.0014064534952922259, "epoch": 9.410495626822158, "grad_norm": 0.13578104972839355, "learning_rate": 5e-07, "loss": 0.0036, "step": 908 }, { "clip_ratio/high_max": 0.002244865958346054, "clip_ratio/high_mean": 0.0008195545669877902, "clip_ratio/low_mean": 0.0005865112907486036, "clip_ratio/low_min": 3.9212507545016706e-05, "clip_ratio/region_mean": 0.0014060658759262878, "epoch": 9.419825072886297, "grad_norm": 0.13060711324214935, "learning_rate": 5e-07, "loss": -0.0418, "step": 909 }, { "clip_ratio/high_max": 0.0019251395569881424, "clip_ratio/high_mean": 0.0007530309212597786, "clip_ratio/low_mean": 0.0007464147129212506, "clip_ratio/low_min": 6.35145406704396e-05, "clip_ratio/region_mean": 0.0014994456214481033, "epoch": 9.429154518950437, "grad_norm": 0.13694122433662415, "learning_rate": 5e-07, "loss": 0.0143, "step": 910 }, { "clip_ratio/high_max": 0.002059784994344227, "clip_ratio/high_mean": 0.0009021068544825539, "clip_ratio/low_mean": 0.0006918013550603064, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015939082368277013, "epoch": 9.438483965014576, "grad_norm": 0.1282322257757187, "learning_rate": 5e-07, "loss": -0.0176, "step": 911 }, { "clip_ratio/high_max": 0.0019161122472723946, "clip_ratio/high_mean": 0.0008031593388295732, "clip_ratio/low_mean": 0.0006611137778236298, "clip_ratio/low_min": 2.372816925344523e-05, "clip_ratio/region_mean": 0.0014642730966443196, "epoch": 9.447813411078718, "grad_norm": 0.14036108553409576, "learning_rate": 5e-07, "loss": -0.0109, "step": 912 }, { "clip_ratio/high_max": 0.0022740262502338737, "clip_ratio/high_mean": 0.0008926647424232215, "clip_ratio/low_mean": 0.0006649859369645128, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015576506630168296, "epoch": 9.457142857142857, "grad_norm": 0.1474922150373459, "learning_rate": 5e-07, "loss": -0.0597, "step": 913 }, { "clip_ratio/high_max": 0.001958627653948497, "clip_ratio/high_mean": 0.0008059445954131661, "clip_ratio/low_mean": 0.0007782248430885375, "clip_ratio/low_min": 5.484798748511821e-05, "clip_ratio/region_mean": 0.001584169382113032, "epoch": 9.466472303206997, "grad_norm": 0.13583959639072418, "learning_rate": 5e-07, "loss": -0.0173, "step": 914 }, { "clip_ratio/high_max": 0.0019146808663208503, "clip_ratio/high_mean": 0.0007601491488458123, "clip_ratio/low_mean": 0.000814752569567645, "clip_ratio/low_min": 2.5489396648481488e-05, "clip_ratio/region_mean": 0.0015749017147754785, "epoch": 9.475801749271136, "grad_norm": 0.13228367269039154, "learning_rate": 5e-07, "loss": 0.0093, "step": 915 }, { "clip_ratio/high_max": 0.0020074738531548064, "clip_ratio/high_mean": 0.0007799239738233155, "clip_ratio/low_mean": 0.0007505002522520954, "clip_ratio/low_min": 1.551253444631584e-05, "clip_ratio/region_mean": 0.0015304242369893473, "epoch": 9.485131195335278, "grad_norm": 0.121339812874794, "learning_rate": 5e-07, "loss": 0.0009, "step": 916 }, { "clip_ratio/high_max": 0.0022152848359837662, "clip_ratio/high_mean": 0.0008223938111768803, "clip_ratio/low_mean": 0.0007310401797440136, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015534340382146183, "epoch": 9.494460641399417, "grad_norm": 0.13264836370944977, "learning_rate": 5e-07, "loss": -0.0459, "step": 917 }, { "clip_ratio/high_max": 0.0020358586334623396, "clip_ratio/high_mean": 0.0008395433869736735, "clip_ratio/low_mean": 0.0006624293750974175, "clip_ratio/low_min": 3.133155132672982e-05, "clip_ratio/region_mean": 0.0015019727652543224, "epoch": 9.503790087463557, "grad_norm": 0.1352965086698532, "learning_rate": 5e-07, "loss": -0.0462, "step": 918 }, { "clip_ratio/high_max": 0.002222477396571776, "clip_ratio/high_mean": 0.0007664322783966782, "clip_ratio/low_mean": 0.0008390557723032543, "clip_ratio/low_min": 1.2410643648763653e-05, "clip_ratio/region_mean": 0.0016054880434239749, "epoch": 9.513119533527696, "grad_norm": 0.13755203783512115, "learning_rate": 5e-07, "loss": 0.0259, "step": 919 }, { "clip_ratio/high_max": 0.0018671491452550981, "clip_ratio/high_mean": 0.0006701407455693698, "clip_ratio/low_mean": 0.0008167190844687866, "clip_ratio/low_min": 0.00011972831180173671, "clip_ratio/region_mean": 0.0014868598264001776, "epoch": 9.522448979591836, "grad_norm": 0.1369713991880417, "learning_rate": 5e-07, "loss": 0.0292, "step": 920 }, { "clip_ratio/high_max": 0.001963796639756765, "clip_ratio/high_mean": 0.0008030694589251652, "clip_ratio/low_mean": 0.0010093374548887368, "clip_ratio/low_min": 9.865684933174634e-05, "clip_ratio/region_mean": 0.0018124069247278385, "epoch": 9.531778425655977, "grad_norm": 0.14614100754261017, "learning_rate": 5e-07, "loss": 0.0484, "step": 921 }, { "clip_ratio/high_max": 0.0019613255208241753, "clip_ratio/high_mean": 0.0007762286222714465, "clip_ratio/low_mean": 0.0006705166269966867, "clip_ratio/low_min": 1.1828160495497286e-05, "clip_ratio/region_mean": 0.0014467451837845147, "epoch": 9.541107871720117, "grad_norm": 0.1409694105386734, "learning_rate": 5e-07, "loss": -0.0297, "step": 922 }, { "clip_ratio/high_max": 0.001761309031280689, "clip_ratio/high_mean": 0.0006989764597165049, "clip_ratio/low_mean": 0.0007945443212520331, "clip_ratio/low_min": 6.886448863951955e-05, "clip_ratio/region_mean": 0.001493520769145107, "epoch": 9.550437317784256, "grad_norm": 0.12027593702077866, "learning_rate": 5e-07, "loss": 0.0118, "step": 923 }, { "clip_ratio/high_max": 0.002177358044718858, "clip_ratio/high_mean": 0.0008972754330898169, "clip_ratio/low_mean": 0.0007214517045213142, "clip_ratio/low_min": 1.9254466678830795e-05, "clip_ratio/region_mean": 0.0016187271467060782, "epoch": 9.559766763848396, "grad_norm": 0.14146292209625244, "learning_rate": 5e-07, "loss": -0.0532, "step": 924 }, { "clip_ratio/high_max": 0.0017654850307735614, "clip_ratio/high_mean": 0.0006859071827420848, "clip_ratio/low_mean": 0.0007953934527904494, "clip_ratio/low_min": 7.603249287058134e-05, "clip_ratio/region_mean": 0.0014813006309850607, "epoch": 9.569096209912537, "grad_norm": 0.13277766108512878, "learning_rate": 5e-07, "loss": 0.0295, "step": 925 }, { "clip_ratio/high_max": 0.002375846954237204, "clip_ratio/high_mean": 0.0009189938846247969, "clip_ratio/low_mean": 0.0006622727596550249, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015812666097190231, "epoch": 9.578425655976677, "grad_norm": 0.1364227831363678, "learning_rate": 5e-07, "loss": -0.023, "step": 926 }, { "clip_ratio/high_max": 0.001753222655679565, "clip_ratio/high_mean": 0.0006964689837332116, "clip_ratio/low_mean": 0.0008165313665813301, "clip_ratio/low_min": 3.729784475581255e-05, "clip_ratio/region_mean": 0.001513000333943637, "epoch": 9.587755102040816, "grad_norm": 0.12774112820625305, "learning_rate": 5e-07, "loss": 0.0086, "step": 927 }, { "clip_ratio/high_max": 0.0021121780628163833, "clip_ratio/high_mean": 0.0008473458319713245, "clip_ratio/low_mean": 0.0007533198622695636, "clip_ratio/low_min": 1.2074960977770388e-05, "clip_ratio/region_mean": 0.0016006656987883616, "epoch": 9.597084548104956, "grad_norm": 0.13681043684482574, "learning_rate": 5e-07, "loss": -0.0179, "step": 928 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0353306361607143, "completions/max_length": 4096.0, "completions/max_terminated_length": 4016.0, "completions/mean_length": 694.4688720703125, "completions/mean_terminated_length": 569.8890991210938, "completions/min_length": 102.0, "completions/min_terminated_length": 102.0, "epoch": 9.606413994169095, "grad_norm": 0.14943301677703857, "learning_rate": 5e-07, "loss": 0.0061, "num_tokens": 564291505.0, "reward": 0.62158203125, "reward_std": 0.16962604224681854, "rewards/simpleverify_reward/mean": 0.62158203125, "rewards/simpleverify_reward/std": 0.4850010573863983, "step": 929 }, { "clip_ratio/high_max": 0.0020585054953699, "clip_ratio/high_mean": 0.0008490477503073635, "clip_ratio/low_mean": 0.00046277349019874237, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013118212300469168, "epoch": 9.615743440233237, "grad_norm": 0.1449139267206192, "learning_rate": 5e-07, "loss": -0.0427, "step": 930 }, { "clip_ratio/high_max": 0.0018555475435277913, "clip_ratio/high_mean": 0.0007545525168097811, "clip_ratio/low_mean": 0.0004450509040907491, "clip_ratio/low_min": 2.0418163330759853e-05, "clip_ratio/region_mean": 0.0011996034409094136, "epoch": 9.625072886297376, "grad_norm": 0.14341290295124054, "learning_rate": 5e-07, "loss": -0.0551, "step": 931 }, { "clip_ratio/high_max": 0.001923353600432165, "clip_ratio/high_mean": 0.0006351993870339356, "clip_ratio/low_mean": 0.0004939263262713212, "clip_ratio/low_min": 1.5031265320430975e-05, "clip_ratio/region_mean": 0.001129125685110921, "epoch": 9.634402332361516, "grad_norm": 0.12484217435121536, "learning_rate": 5e-07, "loss": 0.0093, "step": 932 }, { "clip_ratio/high_max": 0.002065938839223236, "clip_ratio/high_mean": 0.0008374109202122781, "clip_ratio/low_mean": 0.0005779530674772104, "clip_ratio/low_min": 2.8168475182610564e-05, "clip_ratio/region_mean": 0.001415363989508478, "epoch": 9.643731778425655, "grad_norm": 0.13747797906398773, "learning_rate": 5e-07, "loss": -0.034, "step": 933 }, { "clip_ratio/high_max": 0.002399373392108828, "clip_ratio/high_mean": 0.000916432876692852, "clip_ratio/low_mean": 0.0005016930426791077, "clip_ratio/low_min": 1.8195050870417617e-05, "clip_ratio/region_mean": 0.0014181259466568008, "epoch": 9.653061224489797, "grad_norm": 0.1276339888572693, "learning_rate": 5e-07, "loss": -0.0375, "step": 934 }, { "clip_ratio/high_max": 0.001693681173492223, "clip_ratio/high_mean": 0.0007800841285643401, "clip_ratio/low_mean": 0.0005909547776354884, "clip_ratio/low_min": 1.577486182213761e-05, "clip_ratio/region_mean": 0.0013710388739127666, "epoch": 9.662390670553936, "grad_norm": 0.13311585783958435, "learning_rate": 5e-07, "loss": -0.0176, "step": 935 }, { "clip_ratio/high_max": 0.0019539673448889516, "clip_ratio/high_mean": 0.0007609392760059563, "clip_ratio/low_mean": 0.0004513159724410798, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012122552652726881, "epoch": 9.671720116618076, "grad_norm": 0.13482138514518738, "learning_rate": 5e-07, "loss": -0.0124, "step": 936 }, { "clip_ratio/high_max": 0.0017975914015551098, "clip_ratio/high_mean": 0.0006781400807085447, "clip_ratio/low_mean": 0.0005643472904921509, "clip_ratio/low_min": 1.2176114978501573e-05, "clip_ratio/region_mean": 0.0012424873530108016, "epoch": 9.681049562682215, "grad_norm": 0.11173351854085922, "learning_rate": 5e-07, "loss": -0.0211, "step": 937 }, { "clip_ratio/high_max": 0.001955748019099701, "clip_ratio/high_mean": 0.0007745647326373728, "clip_ratio/low_mean": 0.000596387773839524, "clip_ratio/low_min": 1.6855447029229254e-05, "clip_ratio/region_mean": 0.0013709525410376955, "epoch": 9.690379008746355, "grad_norm": 0.14919140934944153, "learning_rate": 5e-07, "loss": -0.0034, "step": 938 }, { "clip_ratio/high_max": 0.0020012957575090695, "clip_ratio/high_mean": 0.0007908103571026004, "clip_ratio/low_mean": 0.0006293119986366946, "clip_ratio/low_min": 3.2160529372049496e-05, "clip_ratio/region_mean": 0.001420122353010811, "epoch": 9.699708454810496, "grad_norm": 0.13003972172737122, "learning_rate": 5e-07, "loss": -0.0015, "step": 939 }, { "clip_ratio/high_max": 0.0017780220550775994, "clip_ratio/high_mean": 0.0006281714590841148, "clip_ratio/low_mean": 0.000554786284169495, "clip_ratio/low_min": 2.488208883733023e-05, "clip_ratio/region_mean": 0.00118295776337618, "epoch": 9.709037900874636, "grad_norm": 0.12353357672691345, "learning_rate": 5e-07, "loss": 0.0068, "step": 940 }, { "clip_ratio/high_max": 0.0017900122584251221, "clip_ratio/high_mean": 0.0007245437282108469, "clip_ratio/low_mean": 0.0005875097140233265, "clip_ratio/low_min": 1.547795909573324e-05, "clip_ratio/region_mean": 0.0013120534786139615, "epoch": 9.718367346938775, "grad_norm": 0.13515785336494446, "learning_rate": 5e-07, "loss": -0.0035, "step": 941 }, { "clip_ratio/high_max": 0.0018726427551882807, "clip_ratio/high_mean": 0.0007911117190815276, "clip_ratio/low_mean": 0.0005066563617219799, "clip_ratio/low_min": 1.0768435458885506e-05, "clip_ratio/region_mean": 0.0012977681108168326, "epoch": 9.727696793002915, "grad_norm": 0.1351008415222168, "learning_rate": 5e-07, "loss": -0.0216, "step": 942 }, { "clip_ratio/high_max": 0.0019059389269386884, "clip_ratio/high_mean": 0.0007631808166479459, "clip_ratio/low_mean": 0.0006725524399371352, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014357332584040705, "epoch": 9.737026239067056, "grad_norm": 0.13949613273143768, "learning_rate": 5e-07, "loss": 0.0088, "step": 943 }, { "clip_ratio/high_max": 0.00214333589974558, "clip_ratio/high_mean": 0.0008487924751534592, "clip_ratio/low_mean": 0.0007469043721357593, "clip_ratio/low_min": 1.3227513591118623e-05, "clip_ratio/region_mean": 0.0015956968163663987, "epoch": 9.746355685131196, "grad_norm": 0.1598367840051651, "learning_rate": 5e-07, "loss": 0.0015, "step": 944 }, { "clip_ratio/high_max": 0.002098067579936469, "clip_ratio/high_mean": 0.0007750037548248656, "clip_ratio/low_mean": 0.0006169214902911335, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013919252560299356, "epoch": 9.755685131195335, "grad_norm": 0.13326671719551086, "learning_rate": 5e-07, "loss": 0.0097, "step": 945 }, { "clip_ratio/high_max": 0.0017791140162444208, "clip_ratio/high_mean": 0.0006700083649775479, "clip_ratio/low_mean": 0.0005863839760422707, "clip_ratio/low_min": 8.124268788378686e-06, "clip_ratio/region_mean": 0.0012563923592097126, "epoch": 9.765014577259475, "grad_norm": 0.13112953305244446, "learning_rate": 5e-07, "loss": 0.0271, "step": 946 }, { "clip_ratio/high_max": 0.001949566099938238, "clip_ratio/high_mean": 0.0008142179922288051, "clip_ratio/low_mean": 0.0006761260046914686, "clip_ratio/low_min": 4.8966572649078444e-05, "clip_ratio/region_mean": 0.001490343998739263, "epoch": 9.774344023323614, "grad_norm": 0.13427422940731049, "learning_rate": 5e-07, "loss": -0.0063, "step": 947 }, { "clip_ratio/high_max": 0.001998032530536875, "clip_ratio/high_mean": 0.0007756294144201092, "clip_ratio/low_mean": 0.0006736245468346169, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014492539594357368, "epoch": 9.783673469387756, "grad_norm": 0.14399856328964233, "learning_rate": 5e-07, "loss": -0.0175, "step": 948 }, { "clip_ratio/high_max": 0.002190260809584288, "clip_ratio/high_mean": 0.0007902279066911433, "clip_ratio/low_mean": 0.0006908125888003269, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014810405227763113, "epoch": 9.793002915451895, "grad_norm": 0.13400205969810486, "learning_rate": 5e-07, "loss": -0.0142, "step": 949 }, { "clip_ratio/high_max": 0.0014916533218638506, "clip_ratio/high_mean": 0.0006076128502172651, "clip_ratio/low_mean": 0.0007162695701481425, "clip_ratio/low_min": 5.5635184253333136e-05, "clip_ratio/region_mean": 0.0013238824212749023, "epoch": 9.802332361516035, "grad_norm": 0.1326238363981247, "learning_rate": 5e-07, "loss": 0.0351, "step": 950 }, { "clip_ratio/high_max": 0.002017289900322794, "clip_ratio/high_mean": 0.0007279414239746984, "clip_ratio/low_mean": 0.0007267230466823094, "clip_ratio/low_min": 3.689647746796254e-05, "clip_ratio/region_mean": 0.0014546644924848806, "epoch": 9.811661807580174, "grad_norm": 0.13126307725906372, "learning_rate": 5e-07, "loss": 0.0259, "step": 951 }, { "clip_ratio/high_max": 0.0016870054059836548, "clip_ratio/high_mean": 0.0006524204927700339, "clip_ratio/low_mean": 0.0006396531434802455, "clip_ratio/low_min": 1.584685560374055e-05, "clip_ratio/region_mean": 0.0012920736407977529, "epoch": 9.820991253644316, "grad_norm": 0.1347253918647766, "learning_rate": 5e-07, "loss": -0.0215, "step": 952 }, { "clip_ratio/high_max": 0.002118910000717733, "clip_ratio/high_mean": 0.0007622050543432124, "clip_ratio/low_mean": 0.0006829552125964256, "clip_ratio/low_min": 1.5703517419751734e-05, "clip_ratio/region_mean": 0.001445160265575396, "epoch": 9.830320699708455, "grad_norm": 0.1360371708869934, "learning_rate": 5e-07, "loss": 0.0017, "step": 953 }, { "clip_ratio/high_max": 0.0019871636905008927, "clip_ratio/high_mean": 0.0008062295146373799, "clip_ratio/low_mean": 0.0007188720328485942, "clip_ratio/low_min": 1.2475049516069703e-05, "clip_ratio/region_mean": 0.0015251015975081827, "epoch": 9.839650145772595, "grad_norm": 0.14313144981861115, "learning_rate": 5e-07, "loss": -0.0148, "step": 954 }, { "clip_ratio/high_max": 0.0017480465467087924, "clip_ratio/high_mean": 0.0006947482397663407, "clip_ratio/low_mean": 0.0006134087361715501, "clip_ratio/low_min": 4.611614076566184e-05, "clip_ratio/region_mean": 0.0013081569595669862, "epoch": 9.848979591836734, "grad_norm": 0.13254930078983307, "learning_rate": 5e-07, "loss": 0.0019, "step": 955 }, { "clip_ratio/high_max": 0.0018196956370957196, "clip_ratio/high_mean": 0.0007149697521526832, "clip_ratio/low_mean": 0.0006966499704503804, "clip_ratio/low_min": 2.4492624106642324e-05, "clip_ratio/region_mean": 0.0014116196798568126, "epoch": 9.858309037900874, "grad_norm": 0.134227454662323, "learning_rate": 5e-07, "loss": 0.0408, "step": 956 }, { "clip_ratio/high_max": 0.0018777365621645004, "clip_ratio/high_mean": 0.0008497167127643479, "clip_ratio/low_mean": 0.000648834633466322, "clip_ratio/low_min": 4.801799695997033e-05, "clip_ratio/region_mean": 0.001498551355325617, "epoch": 9.867638483965015, "grad_norm": 0.1397497057914734, "learning_rate": 5e-07, "loss": -0.012, "step": 957 }, { "clip_ratio/high_max": 0.0019242881098762155, "clip_ratio/high_mean": 0.0006846301394034526, "clip_ratio/low_mean": 0.0006960920795791026, "clip_ratio/low_min": 8.346165668626782e-05, "clip_ratio/region_mean": 0.0013807222385366913, "epoch": 9.876967930029155, "grad_norm": 0.13312602043151855, "learning_rate": 5e-07, "loss": -0.013, "step": 958 }, { "clip_ratio/high_max": 0.0022061428608139977, "clip_ratio/high_mean": 0.0008091808376775589, "clip_ratio/low_mean": 0.0007266870470630238, "clip_ratio/low_min": 1.8312335669179447e-05, "clip_ratio/region_mean": 0.001535867868369678, "epoch": 9.886297376093294, "grad_norm": 0.1286960393190384, "learning_rate": 5e-07, "loss": -0.0148, "step": 959 }, { "clip_ratio/high_max": 0.0022292063586064614, "clip_ratio/high_mean": 0.0008799662682577036, "clip_ratio/low_mean": 0.0007677799803786911, "clip_ratio/low_min": 3.7462537875398993e-05, "clip_ratio/region_mean": 0.001647746204980649, "epoch": 9.895626822157434, "grad_norm": 0.1428099423646927, "learning_rate": 5e-07, "loss": -0.0137, "step": 960 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0387137276785714, "completions/max_length": 4096.0, "completions/max_terminated_length": 4085.0, "completions/mean_length": 711.0504760742188, "completions/mean_terminated_length": 574.7289428710938, "completions/min_length": 109.0, "completions/min_terminated_length": 109.0, "epoch": 10.00932944606414, "grad_norm": 0.1482914537191391, "learning_rate": 5e-07, "loss": -0.0076, "num_tokens": 583038719.0, "reward": 0.6181640625, "reward_std": 0.1676177680492401, "rewards/simpleverify_reward/mean": 0.6181640625, "rewards/simpleverify_reward/std": 0.485845148563385, "step": 961 }, { "clip_ratio/high_max": 0.0014511571534967516, "clip_ratio/high_mean": 0.0006007591291563585, "clip_ratio/low_mean": 0.000583924573220429, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011846837151097134, "epoch": 10.018658892128279, "grad_norm": 0.1330353170633316, "learning_rate": 5e-07, "loss": 0.0197, "step": 962 }, { "clip_ratio/high_max": 0.002013041033933405, "clip_ratio/high_mean": 0.0007985793799889507, "clip_ratio/low_mean": 0.0005015481401642319, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013001275438000448, "epoch": 10.02798833819242, "grad_norm": 0.13179592788219452, "learning_rate": 5e-07, "loss": -0.0161, "step": 963 }, { "clip_ratio/high_max": 0.001933584160724422, "clip_ratio/high_mean": 0.0007724281422269996, "clip_ratio/low_mean": 0.0005243070909273229, "clip_ratio/low_min": 2.935896918643266e-05, "clip_ratio/region_mean": 0.0012967352449777536, "epoch": 10.03731778425656, "grad_norm": 0.17235971987247467, "learning_rate": 5e-07, "loss": 0.0052, "step": 964 }, { "clip_ratio/high_max": 0.001984117705433164, "clip_ratio/high_mean": 0.0007646331760042813, "clip_ratio/low_mean": 0.00046315276995301247, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001227785942319315, "epoch": 10.0466472303207, "grad_norm": 0.1388254016637802, "learning_rate": 5e-07, "loss": -0.0111, "step": 965 }, { "clip_ratio/high_max": 0.0016178419391508214, "clip_ratio/high_mean": 0.0005963624971627723, "clip_ratio/low_mean": 0.0005812742056150455, "clip_ratio/low_min": 2.6438796339789405e-05, "clip_ratio/region_mean": 0.0011776367136917543, "epoch": 10.055976676384839, "grad_norm": 0.12994271516799927, "learning_rate": 5e-07, "loss": 0.0023, "step": 966 }, { "clip_ratio/high_max": 0.0018985330789291766, "clip_ratio/high_mean": 0.0008029719720070716, "clip_ratio/low_mean": 0.0006150170756882289, "clip_ratio/low_min": 8.215036359615624e-06, "clip_ratio/region_mean": 0.0014179889803926926, "epoch": 10.06530612244898, "grad_norm": 0.14004948735237122, "learning_rate": 5e-07, "loss": 0.0071, "step": 967 }, { "clip_ratio/high_max": 0.0017391503097314853, "clip_ratio/high_mean": 0.0006063000200811075, "clip_ratio/low_mean": 0.0005641503194055986, "clip_ratio/low_min": 1.1690984138112981e-05, "clip_ratio/region_mean": 0.001170450352219632, "epoch": 10.07463556851312, "grad_norm": 0.13857519626617432, "learning_rate": 5e-07, "loss": -0.0121, "step": 968 }, { "clip_ratio/high_max": 0.0016839306299516466, "clip_ratio/high_mean": 0.0007195120542746736, "clip_ratio/low_mean": 0.0005331526326699532, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001252664695130079, "epoch": 10.08396501457726, "grad_norm": 0.12864743173122406, "learning_rate": 5e-07, "loss": -0.01, "step": 969 }, { "clip_ratio/high_max": 0.0018581742988317274, "clip_ratio/high_mean": 0.0006772632405045442, "clip_ratio/low_mean": 0.0005133849376761646, "clip_ratio/low_min": 1.5100265954970382e-05, "clip_ratio/region_mean": 0.0011906481631740462, "epoch": 10.093294460641399, "grad_norm": 0.13292279839515686, "learning_rate": 5e-07, "loss": -0.0222, "step": 970 }, { "clip_ratio/high_max": 0.0022837458236608654, "clip_ratio/high_mean": 0.0008355511290574213, "clip_ratio/low_mean": 0.0006205639656400308, "clip_ratio/low_min": 1.3876554476155434e-05, "clip_ratio/region_mean": 0.0014561151037923992, "epoch": 10.102623906705539, "grad_norm": 0.13277961313724518, "learning_rate": 5e-07, "loss": -0.0135, "step": 971 }, { "clip_ratio/high_max": 0.0017953918722923845, "clip_ratio/high_mean": 0.0007341441087191924, "clip_ratio/low_mean": 0.0005633859755107551, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012975300815014634, "epoch": 10.11195335276968, "grad_norm": 0.1484268307685852, "learning_rate": 5e-07, "loss": -0.0057, "step": 972 }, { "clip_ratio/high_max": 0.002038004560745321, "clip_ratio/high_mean": 0.0007471644203178585, "clip_ratio/low_mean": 0.0006050532410881715, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013522176668629982, "epoch": 10.12128279883382, "grad_norm": 0.1441996693611145, "learning_rate": 5e-07, "loss": -0.0042, "step": 973 }, { "clip_ratio/high_max": 0.0019873841520166025, "clip_ratio/high_mean": 0.0007914689422250376, "clip_ratio/low_mean": 0.0006796195557399187, "clip_ratio/low_min": 1.5103914847713895e-05, "clip_ratio/region_mean": 0.0014710885152453557, "epoch": 10.130612244897959, "grad_norm": 0.23575547337532043, "learning_rate": 5e-07, "loss": 0.0018, "step": 974 }, { "clip_ratio/high_max": 0.0017791615391615778, "clip_ratio/high_mean": 0.0006833413845015457, "clip_ratio/low_mean": 0.000719283394573722, "clip_ratio/low_min": 1.8237526091979817e-05, "clip_ratio/region_mean": 0.0014026247881702147, "epoch": 10.139941690962099, "grad_norm": 0.1417093127965927, "learning_rate": 5e-07, "loss": -0.0033, "step": 975 }, { "clip_ratio/high_max": 0.0017980669581447728, "clip_ratio/high_mean": 0.0006768622533854796, "clip_ratio/low_mean": 0.0005711795820388943, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012480418190534692, "epoch": 10.14927113702624, "grad_norm": 0.15688735246658325, "learning_rate": 5e-07, "loss": 0.0024, "step": 976 }, { "clip_ratio/high_max": 0.0020923371557728387, "clip_ratio/high_mean": 0.000895838576980168, "clip_ratio/low_mean": 0.0007859704928705469, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0016818090516608208, "epoch": 10.15860058309038, "grad_norm": 0.1447150558233261, "learning_rate": 5e-07, "loss": -0.0051, "step": 977 }, { "clip_ratio/high_max": 0.0017986433049372863, "clip_ratio/high_mean": 0.0007415126365231117, "clip_ratio/low_mean": 0.0006522827225126093, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013937953626736999, "epoch": 10.167930029154519, "grad_norm": 0.12789848446846008, "learning_rate": 5e-07, "loss": -0.0127, "step": 978 }, { "clip_ratio/high_max": 0.0018022793374257162, "clip_ratio/high_mean": 0.0007300972501980141, "clip_ratio/low_mean": 0.0007883173639129382, "clip_ratio/low_min": 4.240210637362907e-05, "clip_ratio/region_mean": 0.0015184145740931854, "epoch": 10.177259475218658, "grad_norm": 0.1326007843017578, "learning_rate": 5e-07, "loss": -0.0095, "step": 979 }, { "clip_ratio/high_max": 0.0020491593386395834, "clip_ratio/high_mean": 0.0008419340847467538, "clip_ratio/low_mean": 0.0007749363812763477, "clip_ratio/low_min": 4.309109408495715e-05, "clip_ratio/region_mean": 0.0016168704896699637, "epoch": 10.186588921282798, "grad_norm": 0.2029799073934555, "learning_rate": 5e-07, "loss": -0.0075, "step": 980 }, { "clip_ratio/high_max": 0.0017810118515626527, "clip_ratio/high_mean": 0.0007409077134070685, "clip_ratio/low_mean": 0.0005995303517920547, "clip_ratio/low_min": 3.1301466151489876e-05, "clip_ratio/region_mean": 0.0013404380697465967, "epoch": 10.19591836734694, "grad_norm": 0.13170070946216583, "learning_rate": 5e-07, "loss": 0.0081, "step": 981 }, { "clip_ratio/high_max": 0.0019389778972254135, "clip_ratio/high_mean": 0.0007107145665941061, "clip_ratio/low_mean": 0.0007900116170276306, "clip_ratio/low_min": 5.025629980082158e-05, "clip_ratio/region_mean": 0.0015007261572463904, "epoch": 10.205247813411079, "grad_norm": 0.1302778124809265, "learning_rate": 5e-07, "loss": -0.0035, "step": 982 }, { "clip_ratio/high_max": 0.002283992427692283, "clip_ratio/high_mean": 0.0008935151872719871, "clip_ratio/low_mean": 0.0008091219806374284, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0017026371715473942, "epoch": 10.214577259475218, "grad_norm": 0.14124439656734467, "learning_rate": 5e-07, "loss": 0.0044, "step": 983 }, { "clip_ratio/high_max": 0.001996717204747256, "clip_ratio/high_mean": 0.0007622658740729094, "clip_ratio/low_mean": 0.0007718779870629078, "clip_ratio/low_min": 4.678204186348012e-05, "clip_ratio/region_mean": 0.0015341438629548065, "epoch": 10.223906705539358, "grad_norm": 0.1483250856399536, "learning_rate": 5e-07, "loss": 0.0159, "step": 984 }, { "clip_ratio/high_max": 0.002005677659326466, "clip_ratio/high_mean": 0.0007950480212457478, "clip_ratio/low_mean": 0.0006861439615022391, "clip_ratio/low_min": 2.5357427148264833e-05, "clip_ratio/region_mean": 0.0014811920045758598, "epoch": 10.2332361516035, "grad_norm": 0.11564185470342636, "learning_rate": 5e-07, "loss": -0.031, "step": 985 }, { "clip_ratio/high_max": 0.0017799370507418644, "clip_ratio/high_mean": 0.0008126050524879247, "clip_ratio/low_mean": 0.000667565440380713, "clip_ratio/low_min": 1.6702299035387114e-05, "clip_ratio/region_mean": 0.0014801704892306589, "epoch": 10.242565597667639, "grad_norm": 0.13608692586421967, "learning_rate": 5e-07, "loss": -0.0365, "step": 986 }, { "clip_ratio/high_max": 0.0019887651142198592, "clip_ratio/high_mean": 0.0008119200174405705, "clip_ratio/low_mean": 0.0007435059815179557, "clip_ratio/low_min": 2.050320563284913e-05, "clip_ratio/region_mean": 0.0015554260426142719, "epoch": 10.251895043731778, "grad_norm": 0.12019148468971252, "learning_rate": 5e-07, "loss": -0.0011, "step": 987 }, { "clip_ratio/high_max": 0.0022061859781388193, "clip_ratio/high_mean": 0.0008345423248101724, "clip_ratio/low_mean": 0.0008408105913986219, "clip_ratio/low_min": 2.9875716791138984e-05, "clip_ratio/region_mean": 0.0016753529198467731, "epoch": 10.261224489795918, "grad_norm": 0.15217579901218414, "learning_rate": 5e-07, "loss": 0.0165, "step": 988 }, { "clip_ratio/high_max": 0.002258148648252245, "clip_ratio/high_mean": 0.0009260256447305437, "clip_ratio/low_mean": 0.0006769677474949276, "clip_ratio/low_min": 1.851303386501968e-05, "clip_ratio/region_mean": 0.0016029933831305243, "epoch": 10.270553935860057, "grad_norm": 0.13128134608268738, "learning_rate": 5e-07, "loss": -0.0258, "step": 989 }, { "clip_ratio/high_max": 0.0023626669935765676, "clip_ratio/high_mean": 0.001026653470034944, "clip_ratio/low_mean": 0.0007777493174216943, "clip_ratio/low_min": 3.1175537515082397e-05, "clip_ratio/region_mean": 0.0018044027856376488, "epoch": 10.279883381924199, "grad_norm": 0.13195563852787018, "learning_rate": 5e-07, "loss": -0.0587, "step": 990 }, { "clip_ratio/high_max": 0.0019498035544529557, "clip_ratio/high_mean": 0.0008261242928710999, "clip_ratio/low_mean": 0.000773270057834452, "clip_ratio/low_min": 2.5714873117976822e-05, "clip_ratio/region_mean": 0.0015993943343346473, "epoch": 10.289212827988338, "grad_norm": 0.13425655663013458, "learning_rate": 5e-07, "loss": -0.0255, "step": 991 }, { "clip_ratio/high_max": 0.0019187703328498174, "clip_ratio/high_mean": 0.0008186761051547364, "clip_ratio/low_mean": 0.0008146489526552614, "clip_ratio/low_min": 7.235332850541454e-05, "clip_ratio/region_mean": 0.0016333250496245455, "epoch": 10.298542274052478, "grad_norm": 0.14621105790138245, "learning_rate": 5e-07, "loss": 0.0306, "step": 992 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0389927455357143, "completions/max_length": 4096.0, "completions/max_terminated_length": 4087.0, "completions/mean_length": 699.0481567382812, "completions/mean_terminated_length": 561.21728515625, "completions/min_length": 96.0, "completions/min_terminated_length": 96.0, "epoch": 10.307871720116617, "grad_norm": 0.14883773028850555, "learning_rate": 5e-07, "loss": 0.0191, "num_tokens": 601332644.0, "reward": 0.6280343532562256, "reward_std": 0.1652083396911621, "rewards/simpleverify_reward/mean": 0.6280342936515808, "rewards/simpleverify_reward/std": 0.48333775997161865, "step": 993 }, { "clip_ratio/high_max": 0.0019757249428948853, "clip_ratio/high_mean": 0.0006477999231719878, "clip_ratio/low_mean": 0.0004277320886103553, "clip_ratio/low_min": 2.7406240405980498e-05, "clip_ratio/region_mean": 0.0010755320145108271, "epoch": 10.317201166180759, "grad_norm": 0.1489086002111435, "learning_rate": 5e-07, "loss": 0.0448, "step": 994 }, { "clip_ratio/high_max": 0.001452194897865411, "clip_ratio/high_mean": 0.0006149913833723986, "clip_ratio/low_mean": 0.0005085922202852089, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011235836027481128, "epoch": 10.326530612244898, "grad_norm": 0.14431190490722656, "learning_rate": 5e-07, "loss": 0.0237, "step": 995 }, { "clip_ratio/high_max": 0.0018562287841632497, "clip_ratio/high_mean": 0.000695064822139102, "clip_ratio/low_mean": 0.00047537249065499054, "clip_ratio/low_min": 1.3168983969080728e-05, "clip_ratio/region_mean": 0.001170437326436513, "epoch": 10.335860058309038, "grad_norm": 0.13901445269584656, "learning_rate": 5e-07, "loss": -0.009, "step": 996 }, { "clip_ratio/high_max": 0.001671604099101387, "clip_ratio/high_mean": 0.0005722917821913143, "clip_ratio/low_mean": 0.0005149181442902773, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010872099483094644, "epoch": 10.345189504373177, "grad_norm": 0.1381191462278366, "learning_rate": 5e-07, "loss": -0.0083, "step": 997 }, { "clip_ratio/high_max": 0.0017770457634469494, "clip_ratio/high_mean": 0.0007165961742430227, "clip_ratio/low_mean": 0.0004829505051020533, "clip_ratio/low_min": 2.7113691430713516e-05, "clip_ratio/region_mean": 0.0011995467029919382, "epoch": 10.354518950437317, "grad_norm": 0.12785251438617706, "learning_rate": 5e-07, "loss": -0.0086, "step": 998 }, { "clip_ratio/high_max": 0.0017718054623401258, "clip_ratio/high_mean": 0.0007282294918695698, "clip_ratio/low_mean": 0.0005373874992073979, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012656170001719147, "epoch": 10.363848396501458, "grad_norm": 0.13126778602600098, "learning_rate": 5e-07, "loss": -0.024, "step": 999 }, { "clip_ratio/high_max": 0.0014432882726396201, "clip_ratio/high_mean": 0.0006599990656468435, "clip_ratio/low_mean": 0.000535233178652561, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011952322638535406, "epoch": 10.373177842565598, "grad_norm": 0.1442474126815796, "learning_rate": 5e-07, "loss": 0.0187, "step": 1000 }, { "clip_ratio/high_max": 0.0014674445337732323, "clip_ratio/high_mean": 0.0006420781082852045, "clip_ratio/low_mean": 0.0006811917101003928, "clip_ratio/low_min": 1.3346145351533778e-05, "clip_ratio/region_mean": 0.0013232698111096397, "epoch": 10.382507288629737, "grad_norm": 0.16292619705200195, "learning_rate": 5e-07, "loss": 0.0233, "step": 1001 }, { "clip_ratio/high_max": 0.001945061048900243, "clip_ratio/high_mean": 0.0007748352845737827, "clip_ratio/low_mean": 0.0005475741381815169, "clip_ratio/low_min": 1.594794593984261e-05, "clip_ratio/region_mean": 0.001322409418207826, "epoch": 10.391836734693877, "grad_norm": 0.12913285195827484, "learning_rate": 5e-07, "loss": -0.0276, "step": 1002 }, { "clip_ratio/high_max": 0.001820303932618117, "clip_ratio/high_mean": 0.0006718960357829928, "clip_ratio/low_mean": 0.0005284491567181249, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012003451956843492, "epoch": 10.401166180758018, "grad_norm": 0.13680559396743774, "learning_rate": 5e-07, "loss": 0.0028, "step": 1003 }, { "clip_ratio/high_max": 0.0016108590534713585, "clip_ratio/high_mean": 0.0006828634323028382, "clip_ratio/low_mean": 0.0005203466062084772, "clip_ratio/low_min": 1.5326140783145092e-05, "clip_ratio/region_mean": 0.0012032100166834425, "epoch": 10.410495626822158, "grad_norm": 0.1289168894290924, "learning_rate": 5e-07, "loss": -0.0036, "step": 1004 }, { "clip_ratio/high_max": 0.0020242250411683926, "clip_ratio/high_mean": 0.0007621741337970889, "clip_ratio/low_mean": 0.000506460517499363, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012686346817645244, "epoch": 10.419825072886297, "grad_norm": 0.1206679567694664, "learning_rate": 5e-07, "loss": -0.0195, "step": 1005 }, { "clip_ratio/high_max": 0.0020017462375108153, "clip_ratio/high_mean": 0.0007575316321890568, "clip_ratio/low_mean": 0.0005393446444941219, "clip_ratio/low_min": 2.2776968762627803e-05, "clip_ratio/region_mean": 0.00129687624212238, "epoch": 10.429154518950437, "grad_norm": 0.15056654810905457, "learning_rate": 5e-07, "loss": -0.0601, "step": 1006 }, { "clip_ratio/high_max": 0.0016944548988249153, "clip_ratio/high_mean": 0.000735917225938465, "clip_ratio/low_mean": 0.0005318847306625685, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012678019193117507, "epoch": 10.438483965014576, "grad_norm": 0.13715694844722748, "learning_rate": 5e-07, "loss": -0.0507, "step": 1007 }, { "clip_ratio/high_max": 0.0018956651038024575, "clip_ratio/high_mean": 0.0007972155217430554, "clip_ratio/low_mean": 0.0006046693069947651, "clip_ratio/low_min": 1.4357913642015774e-05, "clip_ratio/region_mean": 0.0014018848451087251, "epoch": 10.447813411078718, "grad_norm": 0.14530053734779358, "learning_rate": 5e-07, "loss": -0.0492, "step": 1008 }, { "clip_ratio/high_max": 0.0018654814302863088, "clip_ratio/high_mean": 0.0007772675235173665, "clip_ratio/low_mean": 0.0007229707080114167, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015002382642705925, "epoch": 10.457142857142857, "grad_norm": 0.13870126008987427, "learning_rate": 5e-07, "loss": -0.0183, "step": 1009 }, { "clip_ratio/high_max": 0.001725452228129143, "clip_ratio/high_mean": 0.0006678644158455427, "clip_ratio/low_mean": 0.0005916816371609457, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012595460466400255, "epoch": 10.466472303206997, "grad_norm": 0.12722940742969513, "learning_rate": 5e-07, "loss": -0.022, "step": 1010 }, { "clip_ratio/high_max": 0.0021918176953477086, "clip_ratio/high_mean": 0.0009209420040861005, "clip_ratio/low_mean": 0.0007396966611850075, "clip_ratio/low_min": 6.200150255608605e-05, "clip_ratio/region_mean": 0.0016606386707280762, "epoch": 10.475801749271136, "grad_norm": 0.15431827306747437, "learning_rate": 5e-07, "loss": 0.0141, "step": 1011 }, { "clip_ratio/high_max": 0.0020237446442479268, "clip_ratio/high_mean": 0.0007353658093052218, "clip_ratio/low_mean": 0.0006408685530914227, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013762343623966444, "epoch": 10.485131195335278, "grad_norm": 0.1253776103258133, "learning_rate": 5e-07, "loss": -0.0146, "step": 1012 }, { "clip_ratio/high_max": 0.0016251952438324224, "clip_ratio/high_mean": 0.0006564857321791351, "clip_ratio/low_mean": 0.000810947579338972, "clip_ratio/low_min": 1.5281173546100035e-05, "clip_ratio/region_mean": 0.0014674333142465912, "epoch": 10.494460641399417, "grad_norm": 0.14376436173915863, "learning_rate": 5e-07, "loss": 0.0235, "step": 1013 }, { "clip_ratio/high_max": 0.0024046567268669605, "clip_ratio/high_mean": 0.0008111114057101076, "clip_ratio/low_mean": 0.0007283444265340222, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015394558140542358, "epoch": 10.503790087463557, "grad_norm": 0.15170089900493622, "learning_rate": 5e-07, "loss": -0.029, "step": 1014 }, { "clip_ratio/high_max": 0.0021907896734774113, "clip_ratio/high_mean": 0.0008475391859974479, "clip_ratio/low_mean": 0.0007618425188411493, "clip_ratio/low_min": 4.003351568826474e-05, "clip_ratio/region_mean": 0.0016093817248474807, "epoch": 10.513119533527696, "grad_norm": 0.16130492091178894, "learning_rate": 5e-07, "loss": -0.0084, "step": 1015 }, { "clip_ratio/high_max": 0.0017721356780384667, "clip_ratio/high_mean": 0.0007767589249851881, "clip_ratio/low_mean": 0.0006639303437623312, "clip_ratio/low_min": 1.0634677892085165e-05, "clip_ratio/region_mean": 0.0014406892914848868, "epoch": 10.522448979591836, "grad_norm": 0.15019811689853668, "learning_rate": 5e-07, "loss": -0.0021, "step": 1016 }, { "clip_ratio/high_max": 0.0017382219702994917, "clip_ratio/high_mean": 0.0007489700456062565, "clip_ratio/low_mean": 0.0006065730767659261, "clip_ratio/low_min": 1.6162399333552457e-05, "clip_ratio/region_mean": 0.001355543136014603, "epoch": 10.531778425655977, "grad_norm": 0.1398898810148239, "learning_rate": 5e-07, "loss": 0.0142, "step": 1017 }, { "clip_ratio/high_max": 0.0021992822075844742, "clip_ratio/high_mean": 0.0008846152850310318, "clip_ratio/low_mean": 0.0006956845700187841, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015802998605067842, "epoch": 10.541107871720117, "grad_norm": 0.13600106537342072, "learning_rate": 5e-07, "loss": -0.0142, "step": 1018 }, { "clip_ratio/high_max": 0.0016490687703480944, "clip_ratio/high_mean": 0.0006735122597092413, "clip_ratio/low_mean": 0.0006995879102760227, "clip_ratio/low_min": 1.369412802887382e-05, "clip_ratio/region_mean": 0.0013731001963606104, "epoch": 10.550437317784256, "grad_norm": 0.12347526103258133, "learning_rate": 5e-07, "loss": -0.0002, "step": 1019 }, { "clip_ratio/high_max": 0.002375255491642747, "clip_ratio/high_mean": 0.0009420694186701439, "clip_ratio/low_mean": 0.0006784171519029769, "clip_ratio/low_min": 3.9648974052397534e-05, "clip_ratio/region_mean": 0.001620486546016764, "epoch": 10.559766763848396, "grad_norm": 0.14200089871883392, "learning_rate": 5e-07, "loss": -0.0343, "step": 1020 }, { "clip_ratio/high_max": 0.002064399726805277, "clip_ratio/high_mean": 0.0008404677064390853, "clip_ratio/low_mean": 0.0007296084640984191, "clip_ratio/low_min": 1.8108068616129458e-05, "clip_ratio/region_mean": 0.0015700761723564938, "epoch": 10.569096209912537, "grad_norm": 0.11412409693002701, "learning_rate": 5e-07, "loss": -0.0289, "step": 1021 }, { "clip_ratio/high_max": 0.0017391865512763616, "clip_ratio/high_mean": 0.000804563695055549, "clip_ratio/low_mean": 0.0007914602629170986, "clip_ratio/low_min": 4.0498193811799865e-05, "clip_ratio/region_mean": 0.0015960239761625417, "epoch": 10.578425655976677, "grad_norm": 0.14169536530971527, "learning_rate": 5e-07, "loss": -0.0095, "step": 1022 }, { "clip_ratio/high_max": 0.002141435752491816, "clip_ratio/high_mean": 0.0008073508233792381, "clip_ratio/low_mean": 0.0007887564506745548, "clip_ratio/low_min": 3.7918324778729584e-05, "clip_ratio/region_mean": 0.001596107307705097, "epoch": 10.587755102040816, "grad_norm": 0.10858577489852905, "learning_rate": 5e-07, "loss": 0.0048, "step": 1023 }, { "clip_ratio/high_max": 0.0023772287167957984, "clip_ratio/high_mean": 0.0009086141217267141, "clip_ratio/low_mean": 0.000803870858362643, "clip_ratio/low_min": 1.625487675482873e-05, "clip_ratio/region_mean": 0.0017124850273830816, "epoch": 10.597084548104956, "grad_norm": 0.8651822209358215, "learning_rate": 5e-07, "loss": 0.0094, "step": 1024 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0403529575892857, "completions/max_length": 4096.0, "completions/max_terminated_length": 4084.0, "completions/mean_length": 708.0781860351562, "completions/mean_terminated_length": 565.6167602539062, "completions/min_length": 94.0, "completions/min_terminated_length": 94.0, "epoch": 10.606413994169095, "grad_norm": 0.14579981565475464, "learning_rate": 5e-07, "loss": -0.031, "num_tokens": 619748021.0, "reward": 0.636056125164032, "reward_std": 0.17029517889022827, "rewards/simpleverify_reward/mean": 0.6360560655593872, "rewards/simpleverify_reward/std": 0.4811411499977112, "step": 1025 }, { "clip_ratio/high_max": 0.0020679934168583713, "clip_ratio/high_mean": 0.0007174483162089018, "clip_ratio/low_mean": 0.0005384724790928885, "clip_ratio/low_min": 1.5060240912134759e-05, "clip_ratio/region_mean": 0.0012559208080347162, "epoch": 10.615743440233237, "grad_norm": 0.1491379588842392, "learning_rate": 5e-07, "loss": 0.0251, "step": 1026 }, { "clip_ratio/high_max": 0.002436525173834525, "clip_ratio/high_mean": 0.0008527357895218302, "clip_ratio/low_mean": 0.0004854498411077657, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013381856660998892, "epoch": 10.625072886297376, "grad_norm": 0.1694987714290619, "learning_rate": 5e-07, "loss": -0.0022, "step": 1027 }, { "clip_ratio/high_max": 0.0018024017881543841, "clip_ratio/high_mean": 0.000755387443859945, "clip_ratio/low_mean": 0.0006262064198381267, "clip_ratio/low_min": 2.9941076718387194e-05, "clip_ratio/region_mean": 0.0013815939128107857, "epoch": 10.634402332361516, "grad_norm": 0.1447501927614212, "learning_rate": 5e-07, "loss": -0.0022, "step": 1028 }, { "clip_ratio/high_max": 0.0017274639030802064, "clip_ratio/high_mean": 0.0006637021924689179, "clip_ratio/low_mean": 0.0004614546787706786, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.00112515687578707, "epoch": 10.643731778425655, "grad_norm": 0.12542028725147247, "learning_rate": 5e-07, "loss": 0.005, "step": 1029 }, { "clip_ratio/high_max": 0.001863049175881315, "clip_ratio/high_mean": 0.0008223540335166035, "clip_ratio/low_mean": 0.0005551724307224504, "clip_ratio/low_min": 1.3799955922877416e-05, "clip_ratio/region_mean": 0.0013775264378637075, "epoch": 10.653061224489797, "grad_norm": 0.1833355873823166, "learning_rate": 5e-07, "loss": 0.0095, "step": 1030 }, { "clip_ratio/high_max": 0.0019724341364053544, "clip_ratio/high_mean": 0.000808511440482107, "clip_ratio/low_mean": 0.0005950926224613795, "clip_ratio/low_min": 3.609586929087527e-05, "clip_ratio/region_mean": 0.0014036040593055077, "epoch": 10.662390670553936, "grad_norm": 0.14000459015369415, "learning_rate": 5e-07, "loss": 0.0029, "step": 1031 }, { "clip_ratio/high_max": 0.001761169911333127, "clip_ratio/high_mean": 0.0007575304261990823, "clip_ratio/low_mean": 0.0005770945135736838, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013346249143069144, "epoch": 10.671720116618076, "grad_norm": 0.1429080367088318, "learning_rate": 5e-07, "loss": -0.0186, "step": 1032 }, { "clip_ratio/high_max": 0.0017320218285021838, "clip_ratio/high_mean": 0.000708829049472115, "clip_ratio/low_mean": 0.000643640289126779, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013524693422368728, "epoch": 10.681049562682215, "grad_norm": 0.1363886147737503, "learning_rate": 5e-07, "loss": 0.0331, "step": 1033 }, { "clip_ratio/high_max": 0.0018978336192958523, "clip_ratio/high_mean": 0.0007340164956985973, "clip_ratio/low_mean": 0.0005862757179784239, "clip_ratio/low_min": 4.31308781116968e-05, "clip_ratio/region_mean": 0.0013202922527852934, "epoch": 10.690379008746355, "grad_norm": 0.1250576227903366, "learning_rate": 5e-07, "loss": 0.0067, "step": 1034 }, { "clip_ratio/high_max": 0.0020643000934796873, "clip_ratio/high_mean": 0.0007368618134933058, "clip_ratio/low_mean": 0.0005645763831125805, "clip_ratio/low_min": 2.6899075237452053e-05, "clip_ratio/region_mean": 0.0013014381875109393, "epoch": 10.699708454810496, "grad_norm": 0.1328788846731186, "learning_rate": 5e-07, "loss": -0.0021, "step": 1035 }, { "clip_ratio/high_max": 0.0019925524247810245, "clip_ratio/high_mean": 0.0007828689340385608, "clip_ratio/low_mean": 0.0006042015322691441, "clip_ratio/low_min": 3.509347516228445e-05, "clip_ratio/region_mean": 0.0013870704533474054, "epoch": 10.709037900874636, "grad_norm": 0.15645809471607208, "learning_rate": 5e-07, "loss": -0.043, "step": 1036 }, { "clip_ratio/high_max": 0.0020377580513013527, "clip_ratio/high_mean": 0.0007965609038365074, "clip_ratio/low_mean": 0.0005065286618446407, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013030895606789272, "epoch": 10.718367346938775, "grad_norm": 0.1402151882648468, "learning_rate": 5e-07, "loss": -0.021, "step": 1037 }, { "clip_ratio/high_max": 0.0025048101306310855, "clip_ratio/high_mean": 0.000938799657888012, "clip_ratio/low_mean": 0.0005270031088002725, "clip_ratio/low_min": 2.671510992513504e-05, "clip_ratio/region_mean": 0.001465802786697168, "epoch": 10.727696793002915, "grad_norm": 0.12550733983516693, "learning_rate": 5e-07, "loss": -0.0296, "step": 1038 }, { "clip_ratio/high_max": 0.0019512096187099814, "clip_ratio/high_mean": 0.0008388826145164785, "clip_ratio/low_mean": 0.000650782089905988, "clip_ratio/low_min": 5.469183361128671e-05, "clip_ratio/region_mean": 0.0014896647080604453, "epoch": 10.737026239067056, "grad_norm": 0.14326675236225128, "learning_rate": 5e-07, "loss": 0.0089, "step": 1039 }, { "clip_ratio/high_max": 0.002283817932038801, "clip_ratio/high_mean": 0.0008505298610543832, "clip_ratio/low_mean": 0.0005846665744684287, "clip_ratio/low_min": 1.2457643606467173e-05, "clip_ratio/region_mean": 0.0014351964346133173, "epoch": 10.746355685131196, "grad_norm": 0.22832846641540527, "learning_rate": 5e-07, "loss": -0.0253, "step": 1040 }, { "clip_ratio/high_max": 0.0018367011289228685, "clip_ratio/high_mean": 0.000861708616866963, "clip_ratio/low_mean": 0.0006039875124770333, "clip_ratio/low_min": 4.220244591124356e-05, "clip_ratio/region_mean": 0.001465696084778756, "epoch": 10.755685131195335, "grad_norm": 0.14271403849124908, "learning_rate": 5e-07, "loss": -0.0547, "step": 1041 }, { "clip_ratio/high_max": 0.001894215376523789, "clip_ratio/high_mean": 0.0008228187816712307, "clip_ratio/low_mean": 0.0005685680571332341, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001391386835166486, "epoch": 10.765014577259475, "grad_norm": 0.1687183976173401, "learning_rate": 5e-07, "loss": -0.0446, "step": 1042 }, { "clip_ratio/high_max": 0.002098751563607948, "clip_ratio/high_mean": 0.0007585975999973016, "clip_ratio/low_mean": 0.0006321743367152521, "clip_ratio/low_min": 3.302509867353365e-05, "clip_ratio/region_mean": 0.0013907719621784054, "epoch": 10.774344023323614, "grad_norm": 0.13925650715827942, "learning_rate": 5e-07, "loss": 0.0077, "step": 1043 }, { "clip_ratio/high_max": 0.0017967020903597586, "clip_ratio/high_mean": 0.0007380446277238661, "clip_ratio/low_mean": 0.0007036207643977832, "clip_ratio/low_min": 1.3938447409600485e-05, "clip_ratio/region_mean": 0.00144166539030266, "epoch": 10.783673469387756, "grad_norm": 0.14606180787086487, "learning_rate": 5e-07, "loss": -0.0093, "step": 1044 }, { "clip_ratio/high_max": 0.0020957278829882853, "clip_ratio/high_mean": 0.000863166027556872, "clip_ratio/low_mean": 0.0006713048496749252, "clip_ratio/low_min": 2.3686588065174874e-05, "clip_ratio/region_mean": 0.001534470880869776, "epoch": 10.793002915451895, "grad_norm": 0.1352251172065735, "learning_rate": 5e-07, "loss": -0.0619, "step": 1045 }, { "clip_ratio/high_max": 0.0022570568035007454, "clip_ratio/high_mean": 0.0009316149407823104, "clip_ratio/low_mean": 0.000604257031227462, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015358719901996665, "epoch": 10.802332361516035, "grad_norm": 0.13093852996826172, "learning_rate": 5e-07, "loss": -0.0398, "step": 1046 }, { "clip_ratio/high_max": 0.0020004655452794395, "clip_ratio/high_mean": 0.0007461699397026678, "clip_ratio/low_mean": 0.000791938886322896, "clip_ratio/low_min": 1.270841767109232e-05, "clip_ratio/region_mean": 0.0015381088305730373, "epoch": 10.811661807580174, "grad_norm": 0.14617879688739777, "learning_rate": 5e-07, "loss": 0.015, "step": 1047 }, { "clip_ratio/high_max": 0.0019755582834477536, "clip_ratio/high_mean": 0.0008833798710838892, "clip_ratio/low_mean": 0.0007235607645270647, "clip_ratio/low_min": 2.824712919391459e-05, "clip_ratio/region_mean": 0.0016069406410679221, "epoch": 10.820991253644316, "grad_norm": 0.15364792943000793, "learning_rate": 5e-07, "loss": -0.0117, "step": 1048 }, { "clip_ratio/high_max": 0.0021048339040135033, "clip_ratio/high_mean": 0.0007746496175968787, "clip_ratio/low_mean": 0.0006945629374968121, "clip_ratio/low_min": 2.4688919438631274e-05, "clip_ratio/region_mean": 0.0014692125732835848, "epoch": 10.830320699708455, "grad_norm": 0.14760959148406982, "learning_rate": 5e-07, "loss": 0.0147, "step": 1049 }, { "clip_ratio/high_max": 0.0019199399612261914, "clip_ratio/high_mean": 0.000829808750495431, "clip_ratio/low_mean": 0.0006730412960678223, "clip_ratio/low_min": 2.8656579161179252e-05, "clip_ratio/region_mean": 0.0015028500856715254, "epoch": 10.839650145772595, "grad_norm": 0.150906503200531, "learning_rate": 5e-07, "loss": -0.013, "step": 1050 }, { "clip_ratio/high_max": 0.0021513741157832555, "clip_ratio/high_mean": 0.0007763999929011334, "clip_ratio/low_mean": 0.000768714717196417, "clip_ratio/low_min": 2.529340417822823e-05, "clip_ratio/region_mean": 0.0015451146973646246, "epoch": 10.848979591836734, "grad_norm": 0.14360585808753967, "learning_rate": 5e-07, "loss": 0.0044, "step": 1051 }, { "clip_ratio/high_max": 0.0019986009174317587, "clip_ratio/high_mean": 0.000846270223519241, "clip_ratio/low_mean": 0.0008709453250048682, "clip_ratio/low_min": 4.993265065422747e-05, "clip_ratio/region_mean": 0.0017172155785374343, "epoch": 10.858309037900874, "grad_norm": 0.15588884055614471, "learning_rate": 5e-07, "loss": -0.0255, "step": 1052 }, { "clip_ratio/high_max": 0.002016129255935084, "clip_ratio/high_mean": 0.0008501117954438087, "clip_ratio/low_mean": 0.0008706540488674364, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0017207658165716566, "epoch": 10.867638483965015, "grad_norm": 0.132215216755867, "learning_rate": 5e-07, "loss": 0.0127, "step": 1053 }, { "clip_ratio/high_max": 0.002310458294232376, "clip_ratio/high_mean": 0.0008964506778283976, "clip_ratio/low_mean": 0.0007537277615483617, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0016501784702995792, "epoch": 10.876967930029155, "grad_norm": 0.1586279273033142, "learning_rate": 5e-07, "loss": -0.0602, "step": 1054 }, { "clip_ratio/high_max": 0.0019955292736995034, "clip_ratio/high_mean": 0.0008320371216541389, "clip_ratio/low_mean": 0.0006951648319954984, "clip_ratio/low_min": 4.0188644561567344e-05, "clip_ratio/region_mean": 0.0015272019627445843, "epoch": 10.886297376093294, "grad_norm": 0.15763358771800995, "learning_rate": 5e-07, "loss": 0.0333, "step": 1055 }, { "clip_ratio/high_max": 0.001993903650145512, "clip_ratio/high_mean": 0.000709815325535601, "clip_ratio/low_mean": 0.0006698602410324384, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001379675555654103, "epoch": 10.895626822157434, "grad_norm": 1.9941198825836182, "learning_rate": 5e-07, "loss": 0.0092, "step": 1056 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.041015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 4094.0, "completions/mean_length": 715.7525024414062, "completions/mean_terminated_length": 571.1796875, "completions/min_length": 91.0, "completions/min_terminated_length": 91.0, "epoch": 11.00932944606414, "grad_norm": 0.13095028698444366, "learning_rate": 5e-07, "loss": -0.0297, "num_tokens": 638333307.0, "reward": 0.6288016438484192, "reward_std": 0.16106314957141876, "rewards/simpleverify_reward/mean": 0.6288016438484192, "rewards/simpleverify_reward/std": 0.48313382267951965, "step": 1057 }, { "clip_ratio/high_max": 0.0018877132024499588, "clip_ratio/high_mean": 0.0007322869478230132, "clip_ratio/low_mean": 0.0004984923111805983, "clip_ratio/low_min": 1.07499135992839e-05, "clip_ratio/region_mean": 0.001230779253091896, "epoch": 11.018658892128279, "grad_norm": 0.14413012564182281, "learning_rate": 5e-07, "loss": -0.0431, "step": 1058 }, { "clip_ratio/high_max": 0.0016181234059331473, "clip_ratio/high_mean": 0.0006651324420090532, "clip_ratio/low_mean": 0.0004925880066366517, "clip_ratio/low_min": 1.1617100426519755e-05, "clip_ratio/region_mean": 0.0011577204531931784, "epoch": 11.02798833819242, "grad_norm": 0.12677621841430664, "learning_rate": 5e-07, "loss": -0.0285, "step": 1059 }, { "clip_ratio/high_max": 0.0014384660844370956, "clip_ratio/high_mean": 0.0005757498465754907, "clip_ratio/low_mean": 0.0005199054903641809, "clip_ratio/low_min": 8.96700112207327e-06, "clip_ratio/region_mean": 0.0010956553378491662, "epoch": 11.03731778425656, "grad_norm": 0.1349744200706482, "learning_rate": 5e-07, "loss": 0.0067, "step": 1060 }, { "clip_ratio/high_max": 0.002108248016156722, "clip_ratio/high_mean": 0.0007986619411894935, "clip_ratio/low_mean": 0.0005489621362357866, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013476240783347748, "epoch": 11.0466472303207, "grad_norm": 0.15082992613315582, "learning_rate": 5e-07, "loss": -0.0453, "step": 1061 }, { "clip_ratio/high_max": 0.0020006125487270765, "clip_ratio/high_mean": 0.0008071483862295281, "clip_ratio/low_mean": 0.000634717995126266, "clip_ratio/low_min": 2.8070962798665278e-05, "clip_ratio/region_mean": 0.0014418663340620697, "epoch": 11.055976676384839, "grad_norm": 0.1335686296224594, "learning_rate": 5e-07, "loss": -0.0116, "step": 1062 }, { "clip_ratio/high_max": 0.002146942417311948, "clip_ratio/high_mean": 0.0007895125472714426, "clip_ratio/low_mean": 0.0005581899586104555, "clip_ratio/low_min": 2.538586522859987e-05, "clip_ratio/region_mean": 0.0013477024986059405, "epoch": 11.06530612244898, "grad_norm": 0.14364776015281677, "learning_rate": 5e-07, "loss": -0.031, "step": 1063 }, { "clip_ratio/high_max": 0.0019369610490684863, "clip_ratio/high_mean": 0.0008237931060648407, "clip_ratio/low_mean": 0.0005728016822104109, "clip_ratio/low_min": 1.4849132639938034e-05, "clip_ratio/region_mean": 0.0013965947910037357, "epoch": 11.07463556851312, "grad_norm": 0.14283835887908936, "learning_rate": 5e-07, "loss": -0.0227, "step": 1064 }, { "clip_ratio/high_max": 0.002371647351537831, "clip_ratio/high_mean": 0.0007782747634337284, "clip_ratio/low_mean": 0.0006207975284269196, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001399072276399238, "epoch": 11.08396501457726, "grad_norm": 0.1450689733028412, "learning_rate": 5e-07, "loss": -0.0188, "step": 1065 }, { "clip_ratio/high_max": 0.0017708385857986286, "clip_ratio/high_mean": 0.0006873698785057059, "clip_ratio/low_mean": 0.0004988007003703387, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011861705497722141, "epoch": 11.093294460641399, "grad_norm": 0.14244131743907928, "learning_rate": 5e-07, "loss": -0.0445, "step": 1066 }, { "clip_ratio/high_max": 0.0016928910881688353, "clip_ratio/high_mean": 0.000680466338963015, "clip_ratio/low_mean": 0.0006913842516951263, "clip_ratio/low_min": 2.221432441729121e-05, "clip_ratio/region_mean": 0.001371850576106226, "epoch": 11.102623906705539, "grad_norm": 0.1418648213148117, "learning_rate": 5e-07, "loss": -0.0049, "step": 1067 }, { "clip_ratio/high_max": 0.0018911021543317474, "clip_ratio/high_mean": 0.0007939499864733079, "clip_ratio/low_mean": 0.0005748546573158819, "clip_ratio/low_min": 1.4305333024822176e-05, "clip_ratio/region_mean": 0.0013688046637980733, "epoch": 11.11195335276968, "grad_norm": 0.14807382225990295, "learning_rate": 5e-07, "loss": -0.0398, "step": 1068 }, { "clip_ratio/high_max": 0.001617703674128279, "clip_ratio/high_mean": 0.000648130415356718, "clip_ratio/low_mean": 0.0007047386552585522, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013528690506063867, "epoch": 11.12128279883382, "grad_norm": 0.1373041272163391, "learning_rate": 5e-07, "loss": 0.0131, "step": 1069 }, { "clip_ratio/high_max": 0.0017960518598556519, "clip_ratio/high_mean": 0.0007700274018134223, "clip_ratio/low_mean": 0.0005683893377863569, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013384167286858428, "epoch": 11.130612244897959, "grad_norm": 0.1610577255487442, "learning_rate": 5e-07, "loss": -0.0145, "step": 1070 }, { "clip_ratio/high_max": 0.0016221518380916677, "clip_ratio/high_mean": 0.0007000794666964794, "clip_ratio/low_mean": 0.0005211700563449995, "clip_ratio/low_min": 1.179245282401098e-05, "clip_ratio/region_mean": 0.0012212495275889523, "epoch": 11.139941690962099, "grad_norm": 0.13224004209041595, "learning_rate": 5e-07, "loss": -0.0168, "step": 1071 }, { "clip_ratio/high_max": 0.002033516137089464, "clip_ratio/high_mean": 0.0008373260689040762, "clip_ratio/low_mean": 0.0006977964476391207, "clip_ratio/low_min": 3.0466221687674988e-05, "clip_ratio/region_mean": 0.001535122501081787, "epoch": 11.14927113702624, "grad_norm": 0.1548290252685547, "learning_rate": 5e-07, "loss": 0.0082, "step": 1072 }, { "clip_ratio/high_max": 0.0017715744979796, "clip_ratio/high_mean": 0.0006960564369364874, "clip_ratio/low_mean": 0.0007031295372144086, "clip_ratio/low_min": 3.2885563996387646e-05, "clip_ratio/region_mean": 0.0013991859632369597, "epoch": 11.15860058309038, "grad_norm": 0.14280162751674652, "learning_rate": 5e-07, "loss": 0.015, "step": 1073 }, { "clip_ratio/high_max": 0.0015881775761954486, "clip_ratio/high_mean": 0.000623500295660051, "clip_ratio/low_mean": 0.0008116876415442675, "clip_ratio/low_min": 5.2345767471706495e-05, "clip_ratio/region_mean": 0.0014351879108289722, "epoch": 11.167930029154519, "grad_norm": 0.1344824880361557, "learning_rate": 5e-07, "loss": 0.0405, "step": 1074 }, { "clip_ratio/high_max": 0.0017035088130796794, "clip_ratio/high_mean": 0.0006752304343535798, "clip_ratio/low_mean": 0.0006609625106648309, "clip_ratio/low_min": 1.55899233504897e-05, "clip_ratio/region_mean": 0.0013361929486563895, "epoch": 11.177259475218658, "grad_norm": 0.23762647807598114, "learning_rate": 5e-07, "loss": 0.0085, "step": 1075 }, { "clip_ratio/high_max": 0.001970524896023562, "clip_ratio/high_mean": 0.0007409814552374883, "clip_ratio/low_mean": 0.0005931835821684217, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013341650337679312, "epoch": 11.186588921282798, "grad_norm": 0.15169082581996918, "learning_rate": 5e-07, "loss": -0.0088, "step": 1076 }, { "clip_ratio/high_max": 0.0019138674397254363, "clip_ratio/high_mean": 0.0006601982604479417, "clip_ratio/low_mean": 0.0007128174911485985, "clip_ratio/low_min": 2.3377595425699838e-05, "clip_ratio/region_mean": 0.001373015755234519, "epoch": 11.19591836734694, "grad_norm": 0.1360742151737213, "learning_rate": 5e-07, "loss": -0.0064, "step": 1077 }, { "clip_ratio/high_max": 0.0017338958168693352, "clip_ratio/high_mean": 0.0007001731064519845, "clip_ratio/low_mean": 0.0007329794316319749, "clip_ratio/low_min": 0.00011463736063888064, "clip_ratio/region_mean": 0.0014331525308080018, "epoch": 11.205247813411079, "grad_norm": 0.13238470256328583, "learning_rate": 5e-07, "loss": -0.0021, "step": 1078 }, { "clip_ratio/high_max": 0.0021386454391176812, "clip_ratio/high_mean": 0.0008100208342511905, "clip_ratio/low_mean": 0.0007585095863760216, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015685303733334877, "epoch": 11.214577259475218, "grad_norm": 0.1287229061126709, "learning_rate": 5e-07, "loss": -0.0138, "step": 1079 }, { "clip_ratio/high_max": 0.0020030125633638818, "clip_ratio/high_mean": 0.0007507017144234851, "clip_ratio/low_mean": 0.0006324435985334276, "clip_ratio/low_min": 1.8100203305948526e-05, "clip_ratio/region_mean": 0.001383145288855303, "epoch": 11.223906705539358, "grad_norm": 0.12339562177658081, "learning_rate": 5e-07, "loss": -0.0154, "step": 1080 }, { "clip_ratio/high_max": 0.0020279114542063326, "clip_ratio/high_mean": 0.0007222749954962637, "clip_ratio/low_mean": 0.0007339597796089947, "clip_ratio/low_min": 2.5688450477900915e-05, "clip_ratio/region_mean": 0.001456234746001428, "epoch": 11.2332361516035, "grad_norm": 0.13426657021045685, "learning_rate": 5e-07, "loss": -0.0046, "step": 1081 }, { "clip_ratio/high_max": 0.002046608322416432, "clip_ratio/high_mean": 0.0007965428176248679, "clip_ratio/low_mean": 0.0006747876150257071, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014713304371980485, "epoch": 11.242565597667639, "grad_norm": 0.14457504451274872, "learning_rate": 5e-07, "loss": -0.0074, "step": 1082 }, { "clip_ratio/high_max": 0.002348222413274925, "clip_ratio/high_mean": 0.0008246997567766812, "clip_ratio/low_mean": 0.0006932196147317882, "clip_ratio/low_min": 1.6208507076953538e-05, "clip_ratio/region_mean": 0.0015179194160737097, "epoch": 11.251895043731778, "grad_norm": 0.13436833024024963, "learning_rate": 5e-07, "loss": -0.0039, "step": 1083 }, { "clip_ratio/high_max": 0.0017761897870514076, "clip_ratio/high_mean": 0.0006809708038417739, "clip_ratio/low_mean": 0.0007319807991734706, "clip_ratio/low_min": 4.120313133171294e-05, "clip_ratio/region_mean": 0.001412951671227347, "epoch": 11.261224489795918, "grad_norm": 0.1286071538925171, "learning_rate": 5e-07, "loss": -0.0143, "step": 1084 }, { "clip_ratio/high_max": 0.0018379771172476467, "clip_ratio/high_mean": 0.0006842039874754846, "clip_ratio/low_mean": 0.0006993557003625028, "clip_ratio/low_min": 3.486896002868889e-05, "clip_ratio/region_mean": 0.0013835596946591977, "epoch": 11.270553935860057, "grad_norm": 0.142049640417099, "learning_rate": 5e-07, "loss": 0.0123, "step": 1085 }, { "clip_ratio/high_max": 0.001886083326098742, "clip_ratio/high_mean": 0.0007565348259959137, "clip_ratio/low_mean": 0.0008556454267818481, "clip_ratio/low_min": 2.5410196030861698e-05, "clip_ratio/region_mean": 0.0016121803055284545, "epoch": 11.279883381924199, "grad_norm": 0.14349786937236786, "learning_rate": 5e-07, "loss": -0.0182, "step": 1086 }, { "clip_ratio/high_max": 0.001977834486751817, "clip_ratio/high_mean": 0.0007948572601890191, "clip_ratio/low_mean": 0.0007179690965131158, "clip_ratio/low_min": 1.5439723938470706e-05, "clip_ratio/region_mean": 0.0015128263221413363, "epoch": 11.289212827988338, "grad_norm": 0.14981865882873535, "learning_rate": 5e-07, "loss": -0.0208, "step": 1087 }, { "clip_ratio/high_max": 0.002261653422465315, "clip_ratio/high_mean": 0.0009126145414484199, "clip_ratio/low_mean": 0.0007402022665701224, "clip_ratio/low_min": 3.764117809623713e-05, "clip_ratio/region_mean": 0.0016528168562217616, "epoch": 11.298542274052478, "grad_norm": 0.14399302005767822, "learning_rate": 5e-07, "loss": 0.0007, "step": 1088 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0398646763392857, "completions/max_length": 4096.0, "completions/max_terminated_length": 4090.0, "completions/mean_length": 708.8074951171875, "completions/mean_terminated_length": 568.1717529296875, "completions/min_length": 98.0, "completions/min_terminated_length": 98.0, "epoch": 11.307871720116617, "grad_norm": 0.15634222328662872, "learning_rate": 5e-07, "loss": 0.0142, "num_tokens": 656799507.0, "reward": 0.6313127875328064, "reward_std": 0.16642574965953827, "rewards/simpleverify_reward/mean": 0.6313127875328064, "rewards/simpleverify_reward/std": 0.48245733976364136, "step": 1089 }, { "clip_ratio/high_max": 0.001770523565937765, "clip_ratio/high_mean": 0.0006763890723959776, "clip_ratio/low_mean": 0.0005799944274258451, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012563834970933385, "epoch": 11.317201166180759, "grad_norm": 0.14226634800434113, "learning_rate": 5e-07, "loss": 0.0133, "step": 1090 }, { "clip_ratio/high_max": 0.0019110300709144212, "clip_ratio/high_mean": 0.0007139879908208968, "clip_ratio/low_mean": 0.0005970835436528432, "clip_ratio/low_min": 1.4201317753759213e-05, "clip_ratio/region_mean": 0.001311071522650309, "epoch": 11.326530612244898, "grad_norm": 0.1447312831878662, "learning_rate": 5e-07, "loss": -0.0065, "step": 1091 }, { "clip_ratio/high_max": 0.0017813494778238237, "clip_ratio/high_mean": 0.0007253384992509382, "clip_ratio/low_mean": 0.0004798490772373043, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012051875637553167, "epoch": 11.335860058309038, "grad_norm": 0.14886248111724854, "learning_rate": 5e-07, "loss": -0.0018, "step": 1092 }, { "clip_ratio/high_max": 0.002122566605976317, "clip_ratio/high_mean": 0.0007581340705655748, "clip_ratio/low_mean": 0.0005349525872588856, "clip_ratio/low_min": 1.1036552677978761e-05, "clip_ratio/region_mean": 0.0012930866796523333, "epoch": 11.345189504373177, "grad_norm": 0.1429101824760437, "learning_rate": 5e-07, "loss": -0.0191, "step": 1093 }, { "clip_ratio/high_max": 0.0018262463563587517, "clip_ratio/high_mean": 0.0006405950562111684, "clip_ratio/low_mean": 0.0005661522945956676, "clip_ratio/low_min": 1.367315690004034e-05, "clip_ratio/region_mean": 0.0012067473435308784, "epoch": 11.354518950437317, "grad_norm": 0.14199744164943695, "learning_rate": 5e-07, "loss": 0.0173, "step": 1094 }, { "clip_ratio/high_max": 0.0019949258712586015, "clip_ratio/high_mean": 0.0007604832862853073, "clip_ratio/low_mean": 0.0006525100352519075, "clip_ratio/low_min": 8.751209770707646e-05, "clip_ratio/region_mean": 0.0014129933006188367, "epoch": 11.363848396501458, "grad_norm": 0.13963162899017334, "learning_rate": 5e-07, "loss": 0.0166, "step": 1095 }, { "clip_ratio/high_max": 0.0018220013844256755, "clip_ratio/high_mean": 0.0007046148111840012, "clip_ratio/low_mean": 0.0005243567711659125, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001228971563250525, "epoch": 11.373177842565598, "grad_norm": 0.143700510263443, "learning_rate": 5e-07, "loss": -0.0429, "step": 1096 }, { "clip_ratio/high_max": 0.0019393429247429594, "clip_ratio/high_mean": 0.0008308457472594455, "clip_ratio/low_mean": 0.000544933103810763, "clip_ratio/low_min": 3.357206878717989e-05, "clip_ratio/region_mean": 0.0013757788474322297, "epoch": 11.382507288629737, "grad_norm": 0.12850846350193024, "learning_rate": 5e-07, "loss": -0.0307, "step": 1097 }, { "clip_ratio/high_max": 0.0018109733864548616, "clip_ratio/high_mean": 0.0007133937228900322, "clip_ratio/low_mean": 0.0006913238248671405, "clip_ratio/low_min": 3.3327407436445355e-05, "clip_ratio/region_mean": 0.0014047175500309095, "epoch": 11.391836734693877, "grad_norm": 0.15098464488983154, "learning_rate": 5e-07, "loss": -0.0064, "step": 1098 }, { "clip_ratio/high_max": 0.0019296212085464504, "clip_ratio/high_mean": 0.0007797264825057937, "clip_ratio/low_mean": 0.0006036392087480635, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013833657139912248, "epoch": 11.401166180758018, "grad_norm": 0.13267047703266144, "learning_rate": 5e-07, "loss": -0.0363, "step": 1099 }, { "clip_ratio/high_max": 0.0015801385961822234, "clip_ratio/high_mean": 0.0005933908387305564, "clip_ratio/low_mean": 0.000702895089489175, "clip_ratio/low_min": 8.356404850928811e-05, "clip_ratio/region_mean": 0.001296285914577311, "epoch": 11.410495626822158, "grad_norm": 0.14937469363212585, "learning_rate": 5e-07, "loss": 0.0422, "step": 1100 }, { "clip_ratio/high_max": 0.0016756635377532803, "clip_ratio/high_mean": 0.0006494856024801265, "clip_ratio/low_mean": 0.0006953692918614252, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013448549070744775, "epoch": 11.419825072886297, "grad_norm": 0.13652057945728302, "learning_rate": 5e-07, "loss": 0.022, "step": 1101 }, { "clip_ratio/high_max": 0.0018522211030358449, "clip_ratio/high_mean": 0.000769309650422656, "clip_ratio/low_mean": 0.0006529225065605715, "clip_ratio/low_min": 6.849783949292032e-05, "clip_ratio/region_mean": 0.0014222321697161533, "epoch": 11.429154518950437, "grad_norm": 0.14537279307842255, "learning_rate": 5e-07, "loss": -0.018, "step": 1102 }, { "clip_ratio/high_max": 0.0020641495648305863, "clip_ratio/high_mean": 0.0008304093735205242, "clip_ratio/low_mean": 0.0006496873374999268, "clip_ratio/low_min": 1.6983694877126254e-05, "clip_ratio/region_mean": 0.0014800966819166206, "epoch": 11.438483965014576, "grad_norm": 0.14504392445087433, "learning_rate": 5e-07, "loss": -0.0279, "step": 1103 }, { "clip_ratio/high_max": 0.0019302261480333982, "clip_ratio/high_mean": 0.0007737589976386516, "clip_ratio/low_mean": 0.000665729046886554, "clip_ratio/low_min": 3.0648572646896355e-05, "clip_ratio/region_mean": 0.0014394880490726791, "epoch": 11.447813411078718, "grad_norm": 0.12176617234945297, "learning_rate": 5e-07, "loss": -0.0171, "step": 1104 }, { "clip_ratio/high_max": 0.0021547684518736787, "clip_ratio/high_mean": 0.0007951012976263883, "clip_ratio/low_mean": 0.0006698856377624907, "clip_ratio/low_min": 9.394240714755142e-05, "clip_ratio/region_mean": 0.0014649869481218047, "epoch": 11.457142857142857, "grad_norm": 0.15117903053760529, "learning_rate": 5e-07, "loss": -0.0045, "step": 1105 }, { "clip_ratio/high_max": 0.0021588939598586876, "clip_ratio/high_mean": 0.0008443499264103593, "clip_ratio/low_mean": 0.0006181412481964799, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014624911345890723, "epoch": 11.466472303206997, "grad_norm": 0.15610629320144653, "learning_rate": 5e-07, "loss": -0.0129, "step": 1106 }, { "clip_ratio/high_max": 0.0017522367306810338, "clip_ratio/high_mean": 0.0006488915587397059, "clip_ratio/low_mean": 0.0006322821081994334, "clip_ratio/low_min": 3.133439804514637e-05, "clip_ratio/region_mean": 0.0012811736705771182, "epoch": 11.475801749271136, "grad_norm": 0.1493423879146576, "learning_rate": 5e-07, "loss": 0.0154, "step": 1107 }, { "clip_ratio/high_max": 0.0018733609686023556, "clip_ratio/high_mean": 0.0007288830438483274, "clip_ratio/low_mean": 0.0006803134001529543, "clip_ratio/low_min": 4.652965799323283e-05, "clip_ratio/region_mean": 0.001409196436725324, "epoch": 11.485131195335278, "grad_norm": 0.1300981193780899, "learning_rate": 5e-07, "loss": -0.0124, "step": 1108 }, { "clip_ratio/high_max": 0.0020505543870967813, "clip_ratio/high_mean": 0.0007179770900620497, "clip_ratio/low_mean": 0.0006853739068901632, "clip_ratio/low_min": 2.3093976778909564e-05, "clip_ratio/region_mean": 0.0014033510233275592, "epoch": 11.494460641399417, "grad_norm": 0.16492915153503418, "learning_rate": 5e-07, "loss": -0.0063, "step": 1109 }, { "clip_ratio/high_max": 0.0020102884045627434, "clip_ratio/high_mean": 0.0007138681176002137, "clip_ratio/low_mean": 0.0007436381947627524, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014575063032680191, "epoch": 11.503790087463557, "grad_norm": 0.145339235663414, "learning_rate": 5e-07, "loss": 0.0138, "step": 1110 }, { "clip_ratio/high_max": 0.0020141731001785956, "clip_ratio/high_mean": 0.0008023259915717063, "clip_ratio/low_mean": 0.000617916352894099, "clip_ratio/low_min": 5.082178904558532e-05, "clip_ratio/region_mean": 0.001420242406311445, "epoch": 11.513119533527696, "grad_norm": 0.13361357152462006, "learning_rate": 5e-07, "loss": -0.0567, "step": 1111 }, { "clip_ratio/high_max": 0.002138555617420934, "clip_ratio/high_mean": 0.0008788031882431824, "clip_ratio/low_mean": 0.0006014872606101562, "clip_ratio/low_min": 2.6670422812458128e-05, "clip_ratio/region_mean": 0.001480290447943844, "epoch": 11.522448979591836, "grad_norm": 0.1500701904296875, "learning_rate": 5e-07, "loss": -0.0264, "step": 1112 }, { "clip_ratio/high_max": 0.00199410749701201, "clip_ratio/high_mean": 0.0007815785920683993, "clip_ratio/low_mean": 0.0007335704249271657, "clip_ratio/low_min": 1.359582347504329e-05, "clip_ratio/region_mean": 0.001515149044280406, "epoch": 11.531778425655977, "grad_norm": 0.12330682575702667, "learning_rate": 5e-07, "loss": -0.0377, "step": 1113 }, { "clip_ratio/high_max": 0.002177603149903007, "clip_ratio/high_mean": 0.0008371457315661246, "clip_ratio/low_mean": 0.0007227875776152359, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015599332764395513, "epoch": 11.541107871720117, "grad_norm": 0.1405297964811325, "learning_rate": 5e-07, "loss": -0.0134, "step": 1114 }, { "clip_ratio/high_max": 0.0013534955760405865, "clip_ratio/high_mean": 0.0005922480904700933, "clip_ratio/low_mean": 0.0006880894043206354, "clip_ratio/low_min": 1.2192743270134088e-05, "clip_ratio/region_mean": 0.0012803374993382022, "epoch": 11.550437317784256, "grad_norm": 0.14250461757183075, "learning_rate": 5e-07, "loss": 0.0429, "step": 1115 }, { "clip_ratio/high_max": 0.001675851297477493, "clip_ratio/high_mean": 0.0007066646448947722, "clip_ratio/low_mean": 0.0007171690131144715, "clip_ratio/low_min": 5.0432739953976125e-05, "clip_ratio/region_mean": 0.001423833666194696, "epoch": 11.559766763848396, "grad_norm": 0.13664110004901886, "learning_rate": 5e-07, "loss": -0.0144, "step": 1116 }, { "clip_ratio/high_max": 0.0018616794841364026, "clip_ratio/high_mean": 0.0007572406630060868, "clip_ratio/low_mean": 0.0006734608059559832, "clip_ratio/low_min": 2.2329402781906538e-05, "clip_ratio/region_mean": 0.0014307014498626813, "epoch": 11.569096209912537, "grad_norm": 0.13541001081466675, "learning_rate": 5e-07, "loss": -0.0358, "step": 1117 }, { "clip_ratio/high_max": 0.0019040099941776134, "clip_ratio/high_mean": 0.0007274560330188251, "clip_ratio/low_mean": 0.0006970135727897286, "clip_ratio/low_min": 3.6985652513976675e-05, "clip_ratio/region_mean": 0.0014244695921661332, "epoch": 11.578425655976677, "grad_norm": 0.15037240087985992, "learning_rate": 5e-07, "loss": 0.0033, "step": 1118 }, { "clip_ratio/high_max": 0.001684893184574321, "clip_ratio/high_mean": 0.0007312338348128833, "clip_ratio/low_mean": 0.0006651114354099263, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001396345287503209, "epoch": 11.587755102040816, "grad_norm": 0.12710899114608765, "learning_rate": 5e-07, "loss": -0.0232, "step": 1119 }, { "clip_ratio/high_max": 0.0022113530212664045, "clip_ratio/high_mean": 0.0009032179732457735, "clip_ratio/low_mean": 0.0007382751564364298, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001641493130591698, "epoch": 11.597084548104956, "grad_norm": 0.14717306196689606, "learning_rate": 5e-07, "loss": -0.0374, "step": 1120 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.041294642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 4093.0, "completions/mean_length": 715.9531860351562, "completions/mean_terminated_length": 570.3631591796875, "completions/min_length": 107.0, "completions/min_terminated_length": 107.0, "epoch": 11.606413994169095, "grad_norm": 0.15439000725746155, "learning_rate": 5e-07, "loss": -0.0191, "num_tokens": 675361002.0, "reward": 0.6323591470718384, "reward_std": 0.16276735067367554, "rewards/simpleverify_reward/mean": 0.6323590874671936, "rewards/simpleverify_reward/std": 0.48217129707336426, "step": 1121 }, { "clip_ratio/high_max": 0.0017280469328397885, "clip_ratio/high_mean": 0.0006483283905254211, "clip_ratio/low_mean": 0.0004929414299112977, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011412698186177295, "epoch": 11.615743440233237, "grad_norm": 0.12546902894973755, "learning_rate": 5e-07, "loss": 0.0326, "step": 1122 }, { "clip_ratio/high_max": 0.001713400659355102, "clip_ratio/high_mean": 0.000650503081487841, "clip_ratio/low_mean": 0.0004559489420898899, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001106452036765404, "epoch": 11.625072886297376, "grad_norm": 0.13805554807186127, "learning_rate": 5e-07, "loss": -0.0501, "step": 1123 }, { "clip_ratio/high_max": 0.0019097403965133708, "clip_ratio/high_mean": 0.0007789847086314694, "clip_ratio/low_mean": 0.0005205051611483213, "clip_ratio/low_min": 1.4105167792877182e-05, "clip_ratio/region_mean": 0.0012994898497709073, "epoch": 11.634402332361516, "grad_norm": 0.16848637163639069, "learning_rate": 5e-07, "loss": -0.0389, "step": 1124 }, { "clip_ratio/high_max": 0.0019280151682323776, "clip_ratio/high_mean": 0.0007057116472424241, "clip_ratio/low_mean": 0.0005260915741018835, "clip_ratio/low_min": 2.523161037970567e-05, "clip_ratio/region_mean": 0.001231803234986728, "epoch": 11.643731778425655, "grad_norm": 0.14436425268650055, "learning_rate": 5e-07, "loss": -0.0193, "step": 1125 }, { "clip_ratio/high_max": 0.0016260028187389253, "clip_ratio/high_mean": 0.0006624483903578948, "clip_ratio/low_mean": 0.0005043463061156217, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001166794689197559, "epoch": 11.653061224489797, "grad_norm": 0.15829679369926453, "learning_rate": 5e-07, "loss": -0.002, "step": 1126 }, { "clip_ratio/high_max": 0.001756297097017523, "clip_ratio/high_mean": 0.0006722820789946127, "clip_ratio/low_mean": 0.0005661621662511607, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012384442452457733, "epoch": 11.662390670553936, "grad_norm": 0.1481861025094986, "learning_rate": 5e-07, "loss": -0.0242, "step": 1127 }, { "clip_ratio/high_max": 0.001995010032260325, "clip_ratio/high_mean": 0.000874515957548283, "clip_ratio/low_mean": 0.0005035840113123413, "clip_ratio/low_min": 1.8232205547974445e-05, "clip_ratio/region_mean": 0.001378099957946688, "epoch": 11.671720116618076, "grad_norm": 0.15008923411369324, "learning_rate": 5e-07, "loss": -0.0258, "step": 1128 }, { "clip_ratio/high_max": 0.001484676333348034, "clip_ratio/high_mean": 0.00064892568752839, "clip_ratio/low_mean": 0.0005010028926335508, "clip_ratio/low_min": 2.5253024432458915e-05, "clip_ratio/region_mean": 0.0011499285828904249, "epoch": 11.681049562682215, "grad_norm": 0.1368827372789383, "learning_rate": 5e-07, "loss": -0.0016, "step": 1129 }, { "clip_ratio/high_max": 0.0019543583184713498, "clip_ratio/high_mean": 0.0007895728540461278, "clip_ratio/low_mean": 0.000576434835238615, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013660077020176686, "epoch": 11.690379008746355, "grad_norm": 0.15477852523326874, "learning_rate": 5e-07, "loss": -0.0059, "step": 1130 }, { "clip_ratio/high_max": 0.0018138765371986665, "clip_ratio/high_mean": 0.0006591311994270654, "clip_ratio/low_mean": 0.0005801215547762695, "clip_ratio/low_min": 1.1640901902865153e-05, "clip_ratio/region_mean": 0.0012392527532938402, "epoch": 11.699708454810496, "grad_norm": 0.13867685198783875, "learning_rate": 5e-07, "loss": -0.0147, "step": 1131 }, { "clip_ratio/high_max": 0.0015976383328961674, "clip_ratio/high_mean": 0.0006770387844881043, "clip_ratio/low_mean": 0.0005526787999770022, "clip_ratio/low_min": 3.598157854867168e-05, "clip_ratio/region_mean": 0.0012297175562707707, "epoch": 11.709037900874636, "grad_norm": 0.15172003209590912, "learning_rate": 5e-07, "loss": 0.0275, "step": 1132 }, { "clip_ratio/high_max": 0.0020003295139758848, "clip_ratio/high_mean": 0.0007213966000563232, "clip_ratio/low_mean": 0.0006777639164283755, "clip_ratio/low_min": 2.7667108952300623e-05, "clip_ratio/region_mean": 0.0013991605483170133, "epoch": 11.718367346938775, "grad_norm": 0.13101805746555328, "learning_rate": 5e-07, "loss": 0.0276, "step": 1133 }, { "clip_ratio/high_max": 0.001819396438804688, "clip_ratio/high_mean": 0.0007351693866439746, "clip_ratio/low_mean": 0.0005961076412859256, "clip_ratio/low_min": 3.2460781767440494e-05, "clip_ratio/region_mean": 0.001331277049757773, "epoch": 11.727696793002915, "grad_norm": 0.1560979187488556, "learning_rate": 5e-07, "loss": -0.0239, "step": 1134 }, { "clip_ratio/high_max": 0.0018541679746704176, "clip_ratio/high_mean": 0.0007912628097983543, "clip_ratio/low_mean": 0.0005570431267187814, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001348305944702588, "epoch": 11.737026239067056, "grad_norm": 0.1343592405319214, "learning_rate": 5e-07, "loss": -0.0433, "step": 1135 }, { "clip_ratio/high_max": 0.0015849280862312298, "clip_ratio/high_mean": 0.0007153544829634484, "clip_ratio/low_mean": 0.0005758857078035362, "clip_ratio/low_min": 4.998129224986769e-05, "clip_ratio/region_mean": 0.0012912401580251753, "epoch": 11.746355685131196, "grad_norm": 0.13640418648719788, "learning_rate": 5e-07, "loss": -0.0156, "step": 1136 }, { "clip_ratio/high_max": 0.002011884222156368, "clip_ratio/high_mean": 0.0008057935683609685, "clip_ratio/low_mean": 0.0006435032819354092, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014492968512058724, "epoch": 11.755685131195335, "grad_norm": 0.13618139922618866, "learning_rate": 5e-07, "loss": -0.0059, "step": 1137 }, { "clip_ratio/high_max": 0.002212222163507249, "clip_ratio/high_mean": 0.0009513957866147393, "clip_ratio/low_mean": 0.0006173065175971715, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001568702318763826, "epoch": 11.765014577259475, "grad_norm": 0.2365909069776535, "learning_rate": 5e-07, "loss": -0.0488, "step": 1138 }, { "clip_ratio/high_max": 0.0019935903037548997, "clip_ratio/high_mean": 0.0008529653696314199, "clip_ratio/low_mean": 0.0005164177293863759, "clip_ratio/low_min": 1.4898689187248237e-05, "clip_ratio/region_mean": 0.001369383080600528, "epoch": 11.774344023323614, "grad_norm": 0.13373157382011414, "learning_rate": 5e-07, "loss": -0.0545, "step": 1139 }, { "clip_ratio/high_max": 0.0016062716567830648, "clip_ratio/high_mean": 0.0007556320533694816, "clip_ratio/low_mean": 0.0006823920193710364, "clip_ratio/low_min": 2.624396438477561e-05, "clip_ratio/region_mean": 0.001438024060917087, "epoch": 11.783673469387756, "grad_norm": 0.14274495840072632, "learning_rate": 5e-07, "loss": 0.0082, "step": 1140 }, { "clip_ratio/high_max": 0.0018511008856876288, "clip_ratio/high_mean": 0.000728546849131817, "clip_ratio/low_mean": 0.0007430540226778248, "clip_ratio/low_min": 1.5703517419751734e-05, "clip_ratio/region_mean": 0.0014716008809045888, "epoch": 11.793002915451895, "grad_norm": 0.14283759891986847, "learning_rate": 5e-07, "loss": 0.0133, "step": 1141 }, { "clip_ratio/high_max": 0.0020393488375702873, "clip_ratio/high_mean": 0.0008029597256609122, "clip_ratio/low_mean": 0.0007024261794867925, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015053859006002313, "epoch": 11.802332361516035, "grad_norm": 0.13800179958343506, "learning_rate": 5e-07, "loss": -0.0336, "step": 1142 }, { "clip_ratio/high_max": 0.0020460710766201373, "clip_ratio/high_mean": 0.0008050639335124288, "clip_ratio/low_mean": 0.0007986078890098725, "clip_ratio/low_min": 1.6591451640124433e-05, "clip_ratio/region_mean": 0.0016036718370742165, "epoch": 11.811661807580174, "grad_norm": 0.13966143131256104, "learning_rate": 5e-07, "loss": 0.0062, "step": 1143 }, { "clip_ratio/high_max": 0.002218192908912897, "clip_ratio/high_mean": 0.0007819019629096147, "clip_ratio/low_mean": 0.0008367466634808807, "clip_ratio/low_min": 1.5074770999490283e-05, "clip_ratio/region_mean": 0.0016186486100195907, "epoch": 11.820991253644316, "grad_norm": 0.1518036127090454, "learning_rate": 5e-07, "loss": -0.0055, "step": 1144 }, { "clip_ratio/high_max": 0.002122878657246474, "clip_ratio/high_mean": 0.0007946672640173347, "clip_ratio/low_mean": 0.0007767795159452362, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001571446791786002, "epoch": 11.830320699708455, "grad_norm": 0.15152855217456818, "learning_rate": 5e-07, "loss": 0.0047, "step": 1145 }, { "clip_ratio/high_max": 0.0018413540274195839, "clip_ratio/high_mean": 0.000732650780264521, "clip_ratio/low_mean": 0.0006897410839883378, "clip_ratio/low_min": 3.892464155796915e-05, "clip_ratio/region_mean": 0.0014223918624338694, "epoch": 11.839650145772595, "grad_norm": 0.136222705245018, "learning_rate": 5e-07, "loss": -0.0356, "step": 1146 }, { "clip_ratio/high_max": 0.0019619043960119598, "clip_ratio/high_mean": 0.0006927567719685612, "clip_ratio/low_mean": 0.0007193813125923043, "clip_ratio/low_min": 1.565239108458627e-05, "clip_ratio/region_mean": 0.0014121380881988443, "epoch": 11.848979591836734, "grad_norm": 0.1319465935230255, "learning_rate": 5e-07, "loss": 0.0001, "step": 1147 }, { "clip_ratio/high_max": 0.0017042094259522855, "clip_ratio/high_mean": 0.0007568312585135573, "clip_ratio/low_mean": 0.0007996864896995248, "clip_ratio/low_min": 7.284736057044938e-05, "clip_ratio/region_mean": 0.0015565177964163013, "epoch": 11.858309037900874, "grad_norm": 0.14110586047172546, "learning_rate": 5e-07, "loss": 0.0094, "step": 1148 }, { "clip_ratio/high_max": 0.0017169723141705617, "clip_ratio/high_mean": 0.0006164837341202656, "clip_ratio/low_mean": 0.0007304856344489963, "clip_ratio/low_min": 3.137473686365411e-05, "clip_ratio/region_mean": 0.001346969362202799, "epoch": 11.867638483965015, "grad_norm": 0.14766737818717957, "learning_rate": 5e-07, "loss": 0.0222, "step": 1149 }, { "clip_ratio/high_max": 0.0021177501730562653, "clip_ratio/high_mean": 0.0008662855434522498, "clip_ratio/low_mean": 0.0006319713825178042, "clip_ratio/low_min": 7.392660700134002e-05, "clip_ratio/region_mean": 0.0014982569264248013, "epoch": 11.876967930029155, "grad_norm": 0.14850133657455444, "learning_rate": 5e-07, "loss": -0.0413, "step": 1150 }, { "clip_ratio/high_max": 0.001781520990334684, "clip_ratio/high_mean": 0.0007535308436672494, "clip_ratio/low_mean": 0.0006555010713782394, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001409031916409731, "epoch": 11.886297376093294, "grad_norm": 0.14338485896587372, "learning_rate": 5e-07, "loss": 0.0124, "step": 1151 }, { "clip_ratio/high_max": 0.0018795743817463517, "clip_ratio/high_mean": 0.0006718214317515958, "clip_ratio/low_mean": 0.0006973199779167771, "clip_ratio/low_min": 1.7265192582271993e-05, "clip_ratio/region_mean": 0.0013691413951164577, "epoch": 11.895626822157434, "grad_norm": 0.12096168845891953, "learning_rate": 5e-07, "loss": -0.0064, "step": 1152 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.042236328125, "completions/max_length": 4096.0, "completions/max_terminated_length": 4074.0, "completions/mean_length": 716.9508056640625, "completions/mean_terminated_length": 567.9383544921875, "completions/min_length": 105.0, "completions/min_terminated_length": 105.0, "epoch": 12.00932944606414, "grad_norm": 0.15644553303718567, "learning_rate": 5e-07, "loss": -0.033, "num_tokens": 693772805.0, "reward": 0.6430315375328064, "reward_std": 0.15560808777809143, "rewards/simpleverify_reward/mean": 0.6430315375328064, "rewards/simpleverify_reward/std": 0.47911375761032104, "step": 1153 }, { "clip_ratio/high_max": 0.001765818422427401, "clip_ratio/high_mean": 0.0007210921676232829, "clip_ratio/low_mean": 0.0005126279338583117, "clip_ratio/low_min": 1.4630149962613359e-05, "clip_ratio/region_mean": 0.0012337200823822059, "epoch": 12.018658892128279, "grad_norm": 0.14651980996131897, "learning_rate": 5e-07, "loss": -0.0048, "step": 1154 }, { "clip_ratio/high_max": 0.0019490003178361803, "clip_ratio/high_mean": 0.000697869651048677, "clip_ratio/low_mean": 0.000481261756249296, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011791313590947539, "epoch": 12.02798833819242, "grad_norm": 0.16169001162052155, "learning_rate": 5e-07, "loss": -0.0051, "step": 1155 }, { "clip_ratio/high_max": 0.001921745038998779, "clip_ratio/high_mean": 0.0007893069796409691, "clip_ratio/low_mean": 0.0005578294944825757, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013471364982251544, "epoch": 12.03731778425656, "grad_norm": 0.1435176283121109, "learning_rate": 5e-07, "loss": 0.0101, "step": 1156 }, { "clip_ratio/high_max": 0.001993443045648746, "clip_ratio/high_mean": 0.0007663389005756471, "clip_ratio/low_mean": 0.00045851143204345135, "clip_ratio/low_min": 1.6301512005156837e-05, "clip_ratio/region_mean": 0.0012248503080627415, "epoch": 12.0466472303207, "grad_norm": 0.14517326653003693, "learning_rate": 5e-07, "loss": -0.0041, "step": 1157 }, { "clip_ratio/high_max": 0.0018149310344597325, "clip_ratio/high_mean": 0.0008095433840935584, "clip_ratio/low_mean": 0.0005365719630390231, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013461153284879401, "epoch": 12.055976676384839, "grad_norm": 0.16151168942451477, "learning_rate": 5e-07, "loss": -0.0595, "step": 1158 }, { "clip_ratio/high_max": 0.0022769263523514383, "clip_ratio/high_mean": 0.0008812323067104444, "clip_ratio/low_mean": 0.00041284392864326946, "clip_ratio/low_min": 1.4198091776052024e-05, "clip_ratio/region_mean": 0.0012940762389916927, "epoch": 12.06530612244898, "grad_norm": 0.16227085888385773, "learning_rate": 5e-07, "loss": -0.0533, "step": 1159 }, { "clip_ratio/high_max": 0.0018318959628231823, "clip_ratio/high_mean": 0.0007442770183843095, "clip_ratio/low_mean": 0.0005530357939278474, "clip_ratio/low_min": 2.076756936730817e-05, "clip_ratio/region_mean": 0.0012973128323210403, "epoch": 12.07463556851312, "grad_norm": 0.1412833333015442, "learning_rate": 5e-07, "loss": 0.002, "step": 1160 }, { "clip_ratio/high_max": 0.0021002662397222593, "clip_ratio/high_mean": 0.0007649529616173822, "clip_ratio/low_mean": 0.000561169983484433, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013261229432828259, "epoch": 12.08396501457726, "grad_norm": 0.1450706124305725, "learning_rate": 5e-07, "loss": -0.0394, "step": 1161 }, { "clip_ratio/high_max": 0.0014488963170151692, "clip_ratio/high_mean": 0.0005483238619490294, "clip_ratio/low_mean": 0.0005515621887752786, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001099886059819255, "epoch": 12.093294460641399, "grad_norm": 0.1320653259754181, "learning_rate": 5e-07, "loss": 0.04, "step": 1162 }, { "clip_ratio/high_max": 0.0019712394787347876, "clip_ratio/high_mean": 0.0006528644280479057, "clip_ratio/low_mean": 0.0004934440776196425, "clip_ratio/low_min": 1.5903307939879596e-05, "clip_ratio/region_mean": 0.0011463084956631064, "epoch": 12.102623906705539, "grad_norm": 0.1375642567873001, "learning_rate": 5e-07, "loss": 0.0134, "step": 1163 }, { "clip_ratio/high_max": 0.0018397469684714451, "clip_ratio/high_mean": 0.0007329009131353814, "clip_ratio/low_mean": 0.0006557131928275339, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013886141096008942, "epoch": 12.11195335276968, "grad_norm": 0.13910770416259766, "learning_rate": 5e-07, "loss": -0.0265, "step": 1164 }, { "clip_ratio/high_max": 0.002043688415142242, "clip_ratio/high_mean": 0.0007131861384550575, "clip_ratio/low_mean": 0.000648986680971575, "clip_ratio/low_min": 4.504296703089494e-05, "clip_ratio/region_mean": 0.0013621728285215795, "epoch": 12.12128279883382, "grad_norm": 0.13208390772342682, "learning_rate": 5e-07, "loss": -0.0058, "step": 1165 }, { "clip_ratio/high_max": 0.0018722332388279028, "clip_ratio/high_mean": 0.0006365714216371998, "clip_ratio/low_mean": 0.0006119118461356265, "clip_ratio/low_min": 3.4147021324315574e-05, "clip_ratio/region_mean": 0.001248483302333625, "epoch": 12.130612244897959, "grad_norm": 0.12912209331989288, "learning_rate": 5e-07, "loss": -0.0174, "step": 1166 }, { "clip_ratio/high_max": 0.001950328798557166, "clip_ratio/high_mean": 0.0008309824916068465, "clip_ratio/low_mean": 0.0006123566899987054, "clip_ratio/low_min": 1.0262725481879897e-05, "clip_ratio/region_mean": 0.0014433391697821207, "epoch": 12.139941690962099, "grad_norm": 0.14495259523391724, "learning_rate": 5e-07, "loss": -0.0147, "step": 1167 }, { "clip_ratio/high_max": 0.0018328023834328633, "clip_ratio/high_mean": 0.0006510135590360733, "clip_ratio/low_mean": 0.0005446962659334531, "clip_ratio/low_min": 2.7400263206800446e-05, "clip_ratio/region_mean": 0.0011957098358834628, "epoch": 12.14927113702624, "grad_norm": 0.12906886637210846, "learning_rate": 5e-07, "loss": -0.0043, "step": 1168 }, { "clip_ratio/high_max": 0.0017522899688628968, "clip_ratio/high_mean": 0.0006897616094647674, "clip_ratio/low_mean": 0.000547473535334575, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012372351666272152, "epoch": 12.15860058309038, "grad_norm": 0.12724457681179047, "learning_rate": 5e-07, "loss": -0.026, "step": 1169 }, { "clip_ratio/high_max": 0.0018428144176141359, "clip_ratio/high_mean": 0.0007937670998217072, "clip_ratio/low_mean": 0.000605269180596224, "clip_ratio/low_min": 4.31804328400176e-05, "clip_ratio/region_mean": 0.0013990362604090478, "epoch": 12.167930029154519, "grad_norm": 0.1285616159439087, "learning_rate": 5e-07, "loss": -0.0409, "step": 1170 }, { "clip_ratio/high_max": 0.0020090812104172073, "clip_ratio/high_mean": 0.0007344109271798516, "clip_ratio/low_mean": 0.0004803061901839101, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012147170873504365, "epoch": 12.177259475218658, "grad_norm": 0.1286328136920929, "learning_rate": 5e-07, "loss": -0.0269, "step": 1171 }, { "clip_ratio/high_max": 0.0018774258096527774, "clip_ratio/high_mean": 0.0007016916542852414, "clip_ratio/low_mean": 0.0006829752546764212, "clip_ratio/low_min": 2.983148897328647e-05, "clip_ratio/region_mean": 0.0013846669062331785, "epoch": 12.186588921282798, "grad_norm": 0.14457669854164124, "learning_rate": 5e-07, "loss": 0.0292, "step": 1172 }, { "clip_ratio/high_max": 0.0018556747381808236, "clip_ratio/high_mean": 0.0007508114522352116, "clip_ratio/low_mean": 0.000548907259144471, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012997187186556403, "epoch": 12.19591836734694, "grad_norm": 0.13338439166545868, "learning_rate": 5e-07, "loss": -0.0363, "step": 1173 }, { "clip_ratio/high_max": 0.00172260265389923, "clip_ratio/high_mean": 0.0006746270883013494, "clip_ratio/low_mean": 0.0006876089519209927, "clip_ratio/low_min": 5.749713636760134e-05, "clip_ratio/region_mean": 0.0013622360274894163, "epoch": 12.205247813411079, "grad_norm": 0.15500113368034363, "learning_rate": 5e-07, "loss": 0.0021, "step": 1174 }, { "clip_ratio/high_max": 0.0020037119793414604, "clip_ratio/high_mean": 0.0009144360174104804, "clip_ratio/low_mean": 0.0005152131579961861, "clip_ratio/low_min": 2.1611342162941583e-05, "clip_ratio/region_mean": 0.00142964916449273, "epoch": 12.214577259475218, "grad_norm": 0.14496110379695892, "learning_rate": 5e-07, "loss": -0.0543, "step": 1175 }, { "clip_ratio/high_max": 0.002005873939197045, "clip_ratio/high_mean": 0.0007430163732351502, "clip_ratio/low_mean": 0.0006896205736666161, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001432636971003376, "epoch": 12.223906705539358, "grad_norm": 0.1454256922006607, "learning_rate": 5e-07, "loss": 0.0101, "step": 1176 }, { "clip_ratio/high_max": 0.0019051679882977623, "clip_ratio/high_mean": 0.0007859625311539276, "clip_ratio/low_mean": 0.0007416747284878511, "clip_ratio/low_min": 4.121642996324226e-05, "clip_ratio/region_mean": 0.001527637243270874, "epoch": 12.2332361516035, "grad_norm": 0.1370042860507965, "learning_rate": 5e-07, "loss": 0.0242, "step": 1177 }, { "clip_ratio/high_max": 0.0016436641963082366, "clip_ratio/high_mean": 0.0006608704807149479, "clip_ratio/low_mean": 0.0005892171939194668, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012500876509875525, "epoch": 12.242565597667639, "grad_norm": 0.11870937049388885, "learning_rate": 5e-07, "loss": -0.0153, "step": 1178 }, { "clip_ratio/high_max": 0.0017597228215890937, "clip_ratio/high_mean": 0.0006773076765966835, "clip_ratio/low_mean": 0.0005539462572414777, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012312539292906877, "epoch": 12.251895043731778, "grad_norm": 0.13499775528907776, "learning_rate": 5e-07, "loss": -0.0033, "step": 1179 }, { "clip_ratio/high_max": 0.002067062770947814, "clip_ratio/high_mean": 0.0008120678085106192, "clip_ratio/low_mean": 0.0007396445253107231, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001551712332002353, "epoch": 12.261224489795918, "grad_norm": 0.14186877012252808, "learning_rate": 5e-07, "loss": 0.0101, "step": 1180 }, { "clip_ratio/high_max": 0.002150959237042116, "clip_ratio/high_mean": 0.0008069371961028082, "clip_ratio/low_mean": 0.0005398943631007569, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013468315773934592, "epoch": 12.270553935860057, "grad_norm": 0.1325785219669342, "learning_rate": 5e-07, "loss": -0.0399, "step": 1181 }, { "clip_ratio/high_max": 0.0020361900242278352, "clip_ratio/high_mean": 0.0008260139056801563, "clip_ratio/low_mean": 0.0006876024053781293, "clip_ratio/low_min": 3.0238181352615356e-05, "clip_ratio/region_mean": 0.0015136162801354658, "epoch": 12.279883381924199, "grad_norm": 0.15145257115364075, "learning_rate": 5e-07, "loss": -0.0186, "step": 1182 }, { "clip_ratio/high_max": 0.001681827059655916, "clip_ratio/high_mean": 0.0007194038771558553, "clip_ratio/low_mean": 0.0007056933864078019, "clip_ratio/low_min": 3.852801637549419e-05, "clip_ratio/region_mean": 0.0014250973108573817, "epoch": 12.289212827988338, "grad_norm": 0.14953164756298065, "learning_rate": 5e-07, "loss": 0.0123, "step": 1183 }, { "clip_ratio/high_max": 0.0021230086786090396, "clip_ratio/high_mean": 0.0007781618987792172, "clip_ratio/low_mean": 0.0006731649064022349, "clip_ratio/low_min": 1.2555243301903829e-05, "clip_ratio/region_mean": 0.0014513268070004415, "epoch": 12.298542274052478, "grad_norm": 0.12617169320583344, "learning_rate": 5e-07, "loss": 0.0116, "step": 1184 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0435965401785714, "completions/max_length": 4096.0, "completions/max_terminated_length": 4080.0, "completions/mean_length": 730.5911254882812, "completions/mean_terminated_length": 577.1828002929688, "completions/min_length": 112.0, "completions/min_terminated_length": 112.0, "epoch": 12.307871720116617, "grad_norm": 0.14020602405071259, "learning_rate": 5e-07, "loss": -0.0444, "num_tokens": 712465296.0, "reward": 0.6376604437828064, "reward_std": 0.15903644263744354, "rewards/simpleverify_reward/mean": 0.6376604437828064, "rewards/simpleverify_reward/std": 0.4806846082210541, "step": 1185 }, { "clip_ratio/high_max": 0.001612726948224008, "clip_ratio/high_mean": 0.0005898498820897657, "clip_ratio/low_mean": 0.0004958347053616308, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010856845656235237, "epoch": 12.317201166180759, "grad_norm": 0.20383945107460022, "learning_rate": 5e-07, "loss": 0.0239, "step": 1186 }, { "clip_ratio/high_max": 0.0016309855818690266, "clip_ratio/high_mean": 0.0006667991638096282, "clip_ratio/low_mean": 0.000430437669820094, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010972368472721428, "epoch": 12.326530612244898, "grad_norm": 0.1394258737564087, "learning_rate": 5e-07, "loss": -0.0429, "step": 1187 }, { "clip_ratio/high_max": 0.0019384173428989016, "clip_ratio/high_mean": 0.0007629004885529866, "clip_ratio/low_mean": 0.0004728114690806251, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012357119667285588, "epoch": 12.335860058309038, "grad_norm": 0.13202756643295288, "learning_rate": 5e-07, "loss": -0.035, "step": 1188 }, { "clip_ratio/high_max": 0.0018892688167397864, "clip_ratio/high_mean": 0.0007091062780091306, "clip_ratio/low_mean": 0.0004157834209763678, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011248896662436891, "epoch": 12.345189504373177, "grad_norm": 0.14793869853019714, "learning_rate": 5e-07, "loss": -0.0461, "step": 1189 }, { "clip_ratio/high_max": 0.001985453098313883, "clip_ratio/high_mean": 0.0007944057142594829, "clip_ratio/low_mean": 0.0005267722813186992, "clip_ratio/low_min": 1.3249947187432554e-05, "clip_ratio/region_mean": 0.0013211779987614136, "epoch": 12.354518950437317, "grad_norm": 0.14497801661491394, "learning_rate": 5e-07, "loss": 0.0039, "step": 1190 }, { "clip_ratio/high_max": 0.0017279465500905644, "clip_ratio/high_mean": 0.0006595836912310915, "clip_ratio/low_mean": 0.0005493057260537171, "clip_ratio/low_min": 2.4512169147783425e-05, "clip_ratio/region_mean": 0.001208889421832282, "epoch": 12.363848396501458, "grad_norm": 0.1295594573020935, "learning_rate": 5e-07, "loss": -0.0061, "step": 1191 }, { "clip_ratio/high_max": 0.0024060797950369306, "clip_ratio/high_mean": 0.0008383702461287612, "clip_ratio/low_mean": 0.0005060745183982363, "clip_ratio/low_min": 1.4870331142446958e-05, "clip_ratio/region_mean": 0.0013444447686197236, "epoch": 12.373177842565598, "grad_norm": 0.13399285078048706, "learning_rate": 5e-07, "loss": -0.0221, "step": 1192 }, { "clip_ratio/high_max": 0.0018882907097577117, "clip_ratio/high_mean": 0.0006937202469998738, "clip_ratio/low_mean": 0.0006081719166104449, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013018921708862763, "epoch": 12.382507288629737, "grad_norm": 0.15104162693023682, "learning_rate": 5e-07, "loss": -0.0286, "step": 1193 }, { "clip_ratio/high_max": 0.0014889946542098187, "clip_ratio/high_mean": 0.0006331431159196654, "clip_ratio/low_mean": 0.0005120216328577953, "clip_ratio/low_min": 1.2344459719315637e-05, "clip_ratio/region_mean": 0.0011451647515059449, "epoch": 12.391836734693877, "grad_norm": 0.1460522711277008, "learning_rate": 5e-07, "loss": -0.0206, "step": 1194 }, { "clip_ratio/high_max": 0.0016302534968417604, "clip_ratio/high_mean": 0.0005489652512551402, "clip_ratio/low_mean": 0.0005468472500069765, "clip_ratio/low_min": 1.571931534272153e-05, "clip_ratio/region_mean": 0.001095812527637463, "epoch": 12.401166180758018, "grad_norm": 0.1427641659975052, "learning_rate": 5e-07, "loss": -0.0052, "step": 1195 }, { "clip_ratio/high_max": 0.0019434141613601241, "clip_ratio/high_mean": 0.0006222380161489127, "clip_ratio/low_mean": 0.0005413706176113919, "clip_ratio/low_min": 2.898233287851326e-05, "clip_ratio/region_mean": 0.0011636086273938417, "epoch": 12.410495626822158, "grad_norm": 0.11806466430425644, "learning_rate": 5e-07, "loss": 0.0019, "step": 1196 }, { "clip_ratio/high_max": 0.001909378854179522, "clip_ratio/high_mean": 0.0007485161031581811, "clip_ratio/low_mean": 0.0006192595028551295, "clip_ratio/low_min": 1.6974470781860873e-05, "clip_ratio/region_mean": 0.0013677756178367417, "epoch": 12.419825072886297, "grad_norm": 0.15992729365825653, "learning_rate": 5e-07, "loss": -0.0378, "step": 1197 }, { "clip_ratio/high_max": 0.0018988812735187821, "clip_ratio/high_mean": 0.0007542659786849981, "clip_ratio/low_mean": 0.0007658992981305346, "clip_ratio/low_min": 8.043410798563855e-05, "clip_ratio/region_mean": 0.0015201653077383526, "epoch": 12.429154518950437, "grad_norm": 0.14908994734287262, "learning_rate": 5e-07, "loss": 0.0335, "step": 1198 }, { "clip_ratio/high_max": 0.0019410889872233383, "clip_ratio/high_mean": 0.0007387901796391816, "clip_ratio/low_mean": 0.000557912568183383, "clip_ratio/low_min": 3.917207141057588e-05, "clip_ratio/region_mean": 0.0012967027541890275, "epoch": 12.438483965014576, "grad_norm": 0.12969765067100525, "learning_rate": 5e-07, "loss": -0.0037, "step": 1199 }, { "clip_ratio/high_max": 0.001997358223889023, "clip_ratio/high_mean": 0.000762281062634429, "clip_ratio/low_mean": 0.0006455403417930938, "clip_ratio/low_min": 1.200999213324394e-05, "clip_ratio/region_mean": 0.0014078214117034804, "epoch": 12.447813411078718, "grad_norm": 0.15710321068763733, "learning_rate": 5e-07, "loss": -0.0155, "step": 1200 }, { "clip_ratio/high_max": 0.0016018409442040138, "clip_ratio/high_mean": 0.0006414445560949389, "clip_ratio/low_mean": 0.000635865211734199, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001277309795113979, "epoch": 12.457142857142857, "grad_norm": 0.13913245499134064, "learning_rate": 5e-07, "loss": -0.0359, "step": 1201 }, { "clip_ratio/high_max": 0.0019252365600550547, "clip_ratio/high_mean": 0.0007330635817197617, "clip_ratio/low_mean": 0.0006391084161805338, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001372171984257875, "epoch": 12.466472303206997, "grad_norm": 0.12452198565006256, "learning_rate": 5e-07, "loss": -0.0299, "step": 1202 }, { "clip_ratio/high_max": 0.0018770837086776737, "clip_ratio/high_mean": 0.0007021723522484535, "clip_ratio/low_mean": 0.000593198253227456, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012953706427651923, "epoch": 12.475801749271136, "grad_norm": 0.1293376386165619, "learning_rate": 5e-07, "loss": -0.0197, "step": 1203 }, { "clip_ratio/high_max": 0.001901040206575999, "clip_ratio/high_mean": 0.0007517021404055413, "clip_ratio/low_mean": 0.0007161447683756705, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001467846923333127, "epoch": 12.485131195335278, "grad_norm": 0.3254278302192688, "learning_rate": 5e-07, "loss": -0.0022, "step": 1204 }, { "clip_ratio/high_max": 0.0016549959836993366, "clip_ratio/high_mean": 0.0006424762786991778, "clip_ratio/low_mean": 0.0007682196683163056, "clip_ratio/low_min": 9.941148164216429e-06, "clip_ratio/region_mean": 0.0014106959897617344, "epoch": 12.494460641399417, "grad_norm": 0.13477812707424164, "learning_rate": 5e-07, "loss": 0.004, "step": 1205 }, { "clip_ratio/high_max": 0.001636433462408604, "clip_ratio/high_mean": 0.0005767528045907966, "clip_ratio/low_mean": 0.0005874500102436286, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011642028403002769, "epoch": 12.503790087463557, "grad_norm": 0.1431804895401001, "learning_rate": 5e-07, "loss": 0.007, "step": 1206 }, { "clip_ratio/high_max": 0.0016959572967607528, "clip_ratio/high_mean": 0.0006948689069758984, "clip_ratio/low_mean": 0.000600192885940487, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012950618111062795, "epoch": 12.513119533527696, "grad_norm": 0.3745322823524475, "learning_rate": 5e-07, "loss": -0.0085, "step": 1207 }, { "clip_ratio/high_max": 0.002133893867721781, "clip_ratio/high_mean": 0.0008345527185156243, "clip_ratio/low_mean": 0.0007091588522598613, "clip_ratio/low_min": 2.6792034987010993e-05, "clip_ratio/region_mean": 0.0015437115798704326, "epoch": 12.522448979591836, "grad_norm": 0.15592855215072632, "learning_rate": 5e-07, "loss": 0.0187, "step": 1208 }, { "clip_ratio/high_max": 0.0020466588939598296, "clip_ratio/high_mean": 0.0007552238494099583, "clip_ratio/low_mean": 0.0006422079613912501, "clip_ratio/low_min": 2.912989839387592e-05, "clip_ratio/region_mean": 0.0013974317844258621, "epoch": 12.531778425655977, "grad_norm": 0.14353114366531372, "learning_rate": 5e-07, "loss": -0.0279, "step": 1209 }, { "clip_ratio/high_max": 0.0021939765210845508, "clip_ratio/high_mean": 0.0007016992240096442, "clip_ratio/low_mean": 0.0007564282732346328, "clip_ratio/low_min": 1.4747522072866559e-05, "clip_ratio/region_mean": 0.0014581274735974148, "epoch": 12.541107871720117, "grad_norm": 0.1386343240737915, "learning_rate": 5e-07, "loss": -0.0005, "step": 1210 }, { "clip_ratio/high_max": 0.0021082968305563554, "clip_ratio/high_mean": 0.0008564597446820699, "clip_ratio/low_mean": 0.0006666231565759517, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015230829085339792, "epoch": 12.550437317784256, "grad_norm": 0.15119795501232147, "learning_rate": 5e-07, "loss": -0.0483, "step": 1211 }, { "clip_ratio/high_max": 0.001996045932173729, "clip_ratio/high_mean": 0.0008914689169614576, "clip_ratio/low_mean": 0.0006096880219956802, "clip_ratio/low_min": 1.2307995348237455e-05, "clip_ratio/region_mean": 0.0015011569848866202, "epoch": 12.559766763848396, "grad_norm": 0.13173779845237732, "learning_rate": 5e-07, "loss": -0.0626, "step": 1212 }, { "clip_ratio/high_max": 0.001916795372380875, "clip_ratio/high_mean": 0.0007732098765700357, "clip_ratio/low_mean": 0.0006096367869758978, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001382846607157262, "epoch": 12.569096209912537, "grad_norm": 0.13776831328868866, "learning_rate": 5e-07, "loss": -0.0305, "step": 1213 }, { "clip_ratio/high_max": 0.0018542122234066483, "clip_ratio/high_mean": 0.0007232752723211888, "clip_ratio/low_mean": 0.000725083666111459, "clip_ratio/low_min": 1.4114724763203412e-05, "clip_ratio/region_mean": 0.00144835896207951, "epoch": 12.578425655976677, "grad_norm": 0.1258634626865387, "learning_rate": 5e-07, "loss": -0.0135, "step": 1214 }, { "clip_ratio/high_max": 0.0014146079374768306, "clip_ratio/high_mean": 0.0005462741919473046, "clip_ratio/low_mean": 0.0008887761523510562, "clip_ratio/low_min": 0.00012552521275210893, "clip_ratio/region_mean": 0.0014350503479363397, "epoch": 12.587755102040816, "grad_norm": 0.14401257038116455, "learning_rate": 5e-07, "loss": 0.0367, "step": 1215 }, { "clip_ratio/high_max": 0.0016173644216905814, "clip_ratio/high_mean": 0.0006171532922962797, "clip_ratio/low_mean": 0.0008262715073215077, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014434248259931337, "epoch": 12.597084548104956, "grad_norm": 0.12639743089675903, "learning_rate": 5e-07, "loss": 0.024, "step": 1216 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.048304966517857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 4003.0, "completions/mean_length": 741.6127319335938, "completions/mean_terminated_length": 571.3549194335938, "completions/min_length": 91.0, "completions/min_terminated_length": 91.0, "epoch": 12.606413994169095, "grad_norm": 0.15305782854557037, "learning_rate": 5e-07, "loss": -0.001, "num_tokens": 730971481.0, "reward": 0.6278250813484192, "reward_std": 0.16740186512470245, "rewards/simpleverify_reward/mean": 0.6278250813484192, "rewards/simpleverify_reward/std": 0.4833931624889374, "step": 1217 }, { "clip_ratio/high_max": 0.0016808219625090715, "clip_ratio/high_mean": 0.0008156962503562681, "clip_ratio/low_mean": 0.0005266361076792236, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013423323653114494, "epoch": 12.615743440233237, "grad_norm": 0.16377736628055573, "learning_rate": 5e-07, "loss": -0.0314, "step": 1218 }, { "clip_ratio/high_max": 0.0020383567116368795, "clip_ratio/high_mean": 0.0008181196735677077, "clip_ratio/low_mean": 0.0005008976831959444, "clip_ratio/low_min": 1.461304691474652e-05, "clip_ratio/region_mean": 0.0013190173667680938, "epoch": 12.625072886297376, "grad_norm": 0.16106781363487244, "learning_rate": 5e-07, "loss": -0.0052, "step": 1219 }, { "clip_ratio/high_max": 0.0015403232428070623, "clip_ratio/high_mean": 0.0006793167158321012, "clip_ratio/low_mean": 0.000527423937455751, "clip_ratio/low_min": 3.306003782199696e-05, "clip_ratio/region_mean": 0.0012067406241840217, "epoch": 12.634402332361516, "grad_norm": 0.20902949571609497, "learning_rate": 5e-07, "loss": 0.0071, "step": 1220 }, { "clip_ratio/high_max": 0.0018049106656690128, "clip_ratio/high_mean": 0.000670666679070564, "clip_ratio/low_mean": 0.0004912147514914977, "clip_ratio/low_min": 1.26237127915374e-05, "clip_ratio/region_mean": 0.00116188142055762, "epoch": 12.643731778425655, "grad_norm": 0.14749254286289215, "learning_rate": 5e-07, "loss": -0.0202, "step": 1221 }, { "clip_ratio/high_max": 0.0017623930434638169, "clip_ratio/high_mean": 0.0007467694567822036, "clip_ratio/low_mean": 0.0006155947903607739, "clip_ratio/low_min": 4.3543980609683786e-05, "clip_ratio/region_mean": 0.0013623642807942815, "epoch": 12.653061224489797, "grad_norm": 0.15726526081562042, "learning_rate": 5e-07, "loss": -0.0304, "step": 1222 }, { "clip_ratio/high_max": 0.0019114713395538274, "clip_ratio/high_mean": 0.0007807267202224466, "clip_ratio/low_mean": 0.0005356726314857951, "clip_ratio/low_min": 1.302083364862483e-05, "clip_ratio/region_mean": 0.0013163993498892523, "epoch": 12.662390670553936, "grad_norm": 0.16654308140277863, "learning_rate": 5e-07, "loss": 0.0146, "step": 1223 }, { "clip_ratio/high_max": 0.0018132389668608084, "clip_ratio/high_mean": 0.0006913281322340481, "clip_ratio/low_mean": 0.0006218671915121377, "clip_ratio/low_min": 9.673424756329041e-06, "clip_ratio/region_mean": 0.001313195320108207, "epoch": 12.671720116618076, "grad_norm": 0.1480877548456192, "learning_rate": 5e-07, "loss": 0.0143, "step": 1224 }, { "clip_ratio/high_max": 0.001709894753730623, "clip_ratio/high_mean": 0.0006737099411111558, "clip_ratio/low_mean": 0.0005590247510554036, "clip_ratio/low_min": 2.498175308573991e-05, "clip_ratio/region_mean": 0.0012327347030804958, "epoch": 12.681049562682215, "grad_norm": 0.1332094818353653, "learning_rate": 5e-07, "loss": -0.0005, "step": 1225 }, { "clip_ratio/high_max": 0.0018651442660484463, "clip_ratio/high_mean": 0.0006563218503288226, "clip_ratio/low_mean": 0.0006095924193232349, "clip_ratio/low_min": 1.2065636838087812e-05, "clip_ratio/region_mean": 0.0012659142594202422, "epoch": 12.690379008746355, "grad_norm": 0.14166386425495148, "learning_rate": 5e-07, "loss": 0.0238, "step": 1226 }, { "clip_ratio/high_max": 0.0017769347250577994, "clip_ratio/high_mean": 0.0006511068668260123, "clip_ratio/low_mean": 0.0005500296783793601, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012011365506623406, "epoch": 12.699708454810496, "grad_norm": 0.15046022832393646, "learning_rate": 5e-07, "loss": -0.0223, "step": 1227 }, { "clip_ratio/high_max": 0.0020599360832420643, "clip_ratio/high_mean": 0.0008062592823989689, "clip_ratio/low_mean": 0.0005135181818332057, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001319777475146111, "epoch": 12.709037900874636, "grad_norm": 0.1308765560388565, "learning_rate": 5e-07, "loss": -0.0441, "step": 1228 }, { "clip_ratio/high_max": 0.0018919665526482277, "clip_ratio/high_mean": 0.0007905379352450836, "clip_ratio/low_mean": 0.0006701776783302194, "clip_ratio/low_min": 1.635269472899381e-05, "clip_ratio/region_mean": 0.0014607155972043984, "epoch": 12.718367346938775, "grad_norm": 0.1822924017906189, "learning_rate": 5e-07, "loss": 0.0052, "step": 1229 }, { "clip_ratio/high_max": 0.002446707061608322, "clip_ratio/high_mean": 0.0009155731349892449, "clip_ratio/low_mean": 0.0005479303008542047, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014635034203820396, "epoch": 12.727696793002915, "grad_norm": 0.15327437222003937, "learning_rate": 5e-07, "loss": -0.0584, "step": 1230 }, { "clip_ratio/high_max": 0.0019064384177909233, "clip_ratio/high_mean": 0.000839969119624584, "clip_ratio/low_mean": 0.0006674830583506264, "clip_ratio/low_min": 2.5536261091474444e-05, "clip_ratio/region_mean": 0.0015074521725182422, "epoch": 12.737026239067056, "grad_norm": 0.14626742899417877, "learning_rate": 5e-07, "loss": -0.0205, "step": 1231 }, { "clip_ratio/high_max": 0.0016114579957502428, "clip_ratio/high_mean": 0.000750168876038515, "clip_ratio/low_mean": 0.0005429009052022593, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012930697776027955, "epoch": 12.746355685131196, "grad_norm": 0.15322080254554749, "learning_rate": 5e-07, "loss": -0.0383, "step": 1232 }, { "clip_ratio/high_max": 0.0019417433177295607, "clip_ratio/high_mean": 0.0007440247027261648, "clip_ratio/low_mean": 0.0007276434516825248, "clip_ratio/low_min": 1.4966474736866076e-05, "clip_ratio/region_mean": 0.00147166818351252, "epoch": 12.755685131195335, "grad_norm": 0.14318135380744934, "learning_rate": 5e-07, "loss": -0.0213, "step": 1233 }, { "clip_ratio/high_max": 0.00213820632052375, "clip_ratio/high_mean": 0.0008485429352731444, "clip_ratio/low_mean": 0.0006979435438552173, "clip_ratio/low_min": 4.1013918234966695e-05, "clip_ratio/region_mean": 0.0015464864845853299, "epoch": 12.765014577259475, "grad_norm": 0.14396220445632935, "learning_rate": 5e-07, "loss": -0.0171, "step": 1234 }, { "clip_ratio/high_max": 0.0019432612352829892, "clip_ratio/high_mean": 0.000747754353142227, "clip_ratio/low_mean": 0.0005254296884231735, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012731840506603476, "epoch": 12.774344023323614, "grad_norm": 0.14560867846012115, "learning_rate": 5e-07, "loss": -0.0303, "step": 1235 }, { "clip_ratio/high_max": 0.0022671961633022875, "clip_ratio/high_mean": 0.0009398784313816577, "clip_ratio/low_mean": 0.0006274052648223005, "clip_ratio/low_min": 4.767724749399349e-05, "clip_ratio/region_mean": 0.0015672837143938523, "epoch": 12.783673469387756, "grad_norm": 0.15020087361335754, "learning_rate": 5e-07, "loss": -0.042, "step": 1236 }, { "clip_ratio/high_max": 0.002059183447272517, "clip_ratio/high_mean": 0.0008218164821300888, "clip_ratio/low_mean": 0.0007280958634510171, "clip_ratio/low_min": 1.5016818906588014e-05, "clip_ratio/region_mean": 0.0015499123182962649, "epoch": 12.793002915451895, "grad_norm": 0.15381057560443878, "learning_rate": 5e-07, "loss": 0.0304, "step": 1237 }, { "clip_ratio/high_max": 0.0021367518202168867, "clip_ratio/high_mean": 0.0007582880061818287, "clip_ratio/low_mean": 0.0005774373576059588, "clip_ratio/low_min": 3.388827826711349e-05, "clip_ratio/region_mean": 0.0013357253701542504, "epoch": 12.802332361516035, "grad_norm": 0.1390434354543686, "learning_rate": 5e-07, "loss": -0.021, "step": 1238 }, { "clip_ratio/high_max": 0.002149423620721791, "clip_ratio/high_mean": 0.000832321185953333, "clip_ratio/low_mean": 0.0006405005851775059, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014728217520314502, "epoch": 12.811661807580174, "grad_norm": 0.17085188627243042, "learning_rate": 5e-07, "loss": -0.0143, "step": 1239 }, { "clip_ratio/high_max": 0.0021806505901622586, "clip_ratio/high_mean": 0.0008614446396677522, "clip_ratio/low_mean": 0.0006045501904736739, "clip_ratio/low_min": 3.032351833098801e-05, "clip_ratio/region_mean": 0.0014659947737527546, "epoch": 12.820991253644316, "grad_norm": 0.55730801820755, "learning_rate": 5e-07, "loss": -0.0308, "step": 1240 }, { "clip_ratio/high_max": 0.0019504256488289684, "clip_ratio/high_mean": 0.0008649495084682712, "clip_ratio/low_mean": 0.0007011663528828649, "clip_ratio/low_min": 1.2164266081526875e-05, "clip_ratio/region_mean": 0.0015661158577131573, "epoch": 12.830320699708455, "grad_norm": 0.15614360570907593, "learning_rate": 5e-07, "loss": -0.0101, "step": 1241 }, { "clip_ratio/high_max": 0.0019412505498621613, "clip_ratio/high_mean": 0.0007698244280618383, "clip_ratio/low_mean": 0.0005943853493590723, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013642097583215218, "epoch": 12.839650145772595, "grad_norm": 0.21478131413459778, "learning_rate": 5e-07, "loss": -0.0307, "step": 1242 }, { "clip_ratio/high_max": 0.0019063616709900089, "clip_ratio/high_mean": 0.0008040774064284051, "clip_ratio/low_mean": 0.0006916512647876516, "clip_ratio/low_min": 4.386990985949524e-05, "clip_ratio/region_mean": 0.0014957286912249401, "epoch": 12.848979591836734, "grad_norm": 0.13337506353855133, "learning_rate": 5e-07, "loss": -0.0113, "step": 1243 }, { "clip_ratio/high_max": 0.0021155461290618405, "clip_ratio/high_mean": 0.0009025161762110656, "clip_ratio/low_mean": 0.000821361690213962, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.00172387787461048, "epoch": 12.858309037900874, "grad_norm": 0.1569489985704422, "learning_rate": 5e-07, "loss": -0.0238, "step": 1244 }, { "clip_ratio/high_max": 0.0024346049285668414, "clip_ratio/high_mean": 0.0008753813199291471, "clip_ratio/low_mean": 0.0006581455290870508, "clip_ratio/low_min": 2.4781918909866363e-05, "clip_ratio/region_mean": 0.0015335267889895476, "epoch": 12.867638483965015, "grad_norm": 0.14416837692260742, "learning_rate": 5e-07, "loss": -0.0277, "step": 1245 }, { "clip_ratio/high_max": 0.0017858942774182651, "clip_ratio/high_mean": 0.000715032405423699, "clip_ratio/low_mean": 0.0006326013572106604, "clip_ratio/low_min": 1.27135881484719e-05, "clip_ratio/region_mean": 0.001347633766272338, "epoch": 12.876967930029155, "grad_norm": 0.13001838326454163, "learning_rate": 5e-07, "loss": -0.0195, "step": 1246 }, { "clip_ratio/high_max": 0.0018101318091794383, "clip_ratio/high_mean": 0.0006981241476751165, "clip_ratio/low_mean": 0.0006472088507507578, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001345332981145475, "epoch": 12.886297376093294, "grad_norm": 0.15189793705940247, "learning_rate": 5e-07, "loss": -0.0461, "step": 1247 }, { "clip_ratio/high_max": 0.002024817280471325, "clip_ratio/high_mean": 0.0008338721090694889, "clip_ratio/low_mean": 0.0006104419417169993, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014443140644289088, "epoch": 12.895626822157434, "grad_norm": 0.14191243052482605, "learning_rate": 5e-07, "loss": -0.0194, "step": 1248 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0462472098214286, "completions/max_length": 4096.0, "completions/max_terminated_length": 4072.0, "completions/mean_length": 732.2736206054688, "completions/mean_terminated_length": 569.16748046875, "completions/min_length": 102.0, "completions/min_terminated_length": 102.0, "epoch": 13.00932944606414, "grad_norm": 0.14877565205097198, "learning_rate": 5e-07, "loss": 0.0005, "num_tokens": 749425798.0, "reward": 0.643380343914032, "reward_std": 0.16117031872272491, "rewards/simpleverify_reward/mean": 0.6433802843093872, "rewards/simpleverify_reward/std": 0.4790095388889313, "step": 1249 }, { "clip_ratio/high_max": 0.0014300194197858218, "clip_ratio/high_mean": 0.0005935284170845989, "clip_ratio/low_mean": 0.00046236264461185783, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010558910435065627, "epoch": 13.018658892128279, "grad_norm": 0.1527089774608612, "learning_rate": 5e-07, "loss": 0.0388, "step": 1250 }, { "clip_ratio/high_max": 0.0017938402670552023, "clip_ratio/high_mean": 0.0007585854009448667, "clip_ratio/low_mean": 0.0004365438419426937, "clip_ratio/low_min": 2.3306202820094768e-05, "clip_ratio/region_mean": 0.0011951292435696814, "epoch": 13.02798833819242, "grad_norm": 0.1344321072101593, "learning_rate": 5e-07, "loss": -0.0679, "step": 1251 }, { "clip_ratio/high_max": 0.0014658649033663096, "clip_ratio/high_mean": 0.0005879477071175643, "clip_ratio/low_mean": 0.0004777915246449993, "clip_ratio/low_min": 1.173488544736756e-05, "clip_ratio/region_mean": 0.0010657392249413533, "epoch": 13.03731778425656, "grad_norm": 0.14636744558811188, "learning_rate": 5e-07, "loss": 0.0111, "step": 1252 }, { "clip_ratio/high_max": 0.001521335947472835, "clip_ratio/high_mean": 0.0006821431434218539, "clip_ratio/low_mean": 0.0004732816100840864, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011554247357707936, "epoch": 13.0466472303207, "grad_norm": 0.1503443568944931, "learning_rate": 5e-07, "loss": -0.0079, "step": 1253 }, { "clip_ratio/high_max": 0.002034241257206304, "clip_ratio/high_mean": 0.0007973270894581219, "clip_ratio/low_mean": 0.0004467267776817607, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012440538594091777, "epoch": 13.055976676384839, "grad_norm": 0.12586656212806702, "learning_rate": 5e-07, "loss": -0.0446, "step": 1254 }, { "clip_ratio/high_max": 0.0015135385619942099, "clip_ratio/high_mean": 0.0006540384092659224, "clip_ratio/low_mean": 0.00045774133195664035, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011117797639599303, "epoch": 13.06530612244898, "grad_norm": 0.14609979093074799, "learning_rate": 5e-07, "loss": -0.0118, "step": 1255 }, { "clip_ratio/high_max": 0.0014492101872747298, "clip_ratio/high_mean": 0.0006134814793767873, "clip_ratio/low_mean": 0.0005469672687468119, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011604486935539171, "epoch": 13.07463556851312, "grad_norm": 0.15251614153385162, "learning_rate": 5e-07, "loss": -0.0026, "step": 1256 }, { "clip_ratio/high_max": 0.0019318859485792927, "clip_ratio/high_mean": 0.0007884438582550501, "clip_ratio/low_mean": 0.0005745072758145398, "clip_ratio/low_min": 6.460200165747665e-05, "clip_ratio/region_mean": 0.0013629511413455475, "epoch": 13.08396501457726, "grad_norm": 0.15120062232017517, "learning_rate": 5e-07, "loss": -0.0063, "step": 1257 }, { "clip_ratio/high_max": 0.0020541868361760862, "clip_ratio/high_mean": 0.0008129296020342736, "clip_ratio/low_mean": 0.000501104513205064, "clip_ratio/low_min": 1.1232926226512063e-05, "clip_ratio/region_mean": 0.0013140341397956945, "epoch": 13.093294460641399, "grad_norm": 0.1420772522687912, "learning_rate": 5e-07, "loss": -0.0349, "step": 1258 }, { "clip_ratio/high_max": 0.002060227488982491, "clip_ratio/high_mean": 0.0007613574853166938, "clip_ratio/low_mean": 0.0005249786663625855, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012863361807831097, "epoch": 13.102623906705539, "grad_norm": 0.13828247785568237, "learning_rate": 5e-07, "loss": -0.0127, "step": 1259 }, { "clip_ratio/high_max": 0.0019004418209078722, "clip_ratio/high_mean": 0.000801141133706551, "clip_ratio/low_mean": 0.0004876141010754509, "clip_ratio/low_min": 1.5311121387640014e-05, "clip_ratio/region_mean": 0.001288755203859182, "epoch": 13.11195335276968, "grad_norm": 0.15646933019161224, "learning_rate": 5e-07, "loss": -0.0142, "step": 1260 }, { "clip_ratio/high_max": 0.0017345858614135068, "clip_ratio/high_mean": 0.0007035776798147708, "clip_ratio/low_mean": 0.0005981586818961659, "clip_ratio/low_min": 5.02603634231491e-05, "clip_ratio/region_mean": 0.0013017363889957778, "epoch": 13.12128279883382, "grad_norm": 0.15112054347991943, "learning_rate": 5e-07, "loss": -0.0279, "step": 1261 }, { "clip_ratio/high_max": 0.0018466339352016803, "clip_ratio/high_mean": 0.0007214012730401009, "clip_ratio/low_mean": 0.0006138901590020396, "clip_ratio/low_min": 3.8301452150335535e-05, "clip_ratio/region_mean": 0.0013352914174902253, "epoch": 13.130612244897959, "grad_norm": 0.15530596673488617, "learning_rate": 5e-07, "loss": 0.001, "step": 1262 }, { "clip_ratio/high_max": 0.002024198147410061, "clip_ratio/high_mean": 0.0007608144805999473, "clip_ratio/low_mean": 0.0006082403024265659, "clip_ratio/low_min": 1.872659231594298e-05, "clip_ratio/region_mean": 0.001369054789392976, "epoch": 13.139941690962099, "grad_norm": 0.14771798253059387, "learning_rate": 5e-07, "loss": 0.0143, "step": 1263 }, { "clip_ratio/high_max": 0.001708582800347358, "clip_ratio/high_mean": 0.0006790621619074955, "clip_ratio/low_mean": 0.0005400582695074263, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012191204150440171, "epoch": 13.14927113702624, "grad_norm": 0.14212672412395477, "learning_rate": 5e-07, "loss": -0.0183, "step": 1264 }, { "clip_ratio/high_max": 0.0019085042549704667, "clip_ratio/high_mean": 0.0007595590868731961, "clip_ratio/low_mean": 0.0005578174341280828, "clip_ratio/low_min": 1.3727212717640214e-05, "clip_ratio/region_mean": 0.0013173765291867312, "epoch": 13.15860058309038, "grad_norm": 0.14561031758785248, "learning_rate": 5e-07, "loss": 0.003, "step": 1265 }, { "clip_ratio/high_max": 0.002041215295321308, "clip_ratio/high_mean": 0.000836761169921374, "clip_ratio/low_mean": 0.0005959738555247895, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001432735032722121, "epoch": 13.167930029154519, "grad_norm": 0.144510880112648, "learning_rate": 5e-07, "loss": -0.0216, "step": 1266 }, { "clip_ratio/high_max": 0.0020837078627664596, "clip_ratio/high_mean": 0.0008186297873180592, "clip_ratio/low_mean": 0.0006625195483138668, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014811493310844526, "epoch": 13.177259475218658, "grad_norm": 0.14366711676120758, "learning_rate": 5e-07, "loss": -0.0364, "step": 1267 }, { "clip_ratio/high_max": 0.0021218185502220877, "clip_ratio/high_mean": 0.0007648397568118526, "clip_ratio/low_mean": 0.000559891045668337, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013247307906567585, "epoch": 13.186588921282798, "grad_norm": 0.17561061680316925, "learning_rate": 5e-07, "loss": -0.0516, "step": 1268 }, { "clip_ratio/high_max": 0.0019899839244317263, "clip_ratio/high_mean": 0.0009018130385811673, "clip_ratio/low_mean": 0.0006294452105066739, "clip_ratio/low_min": 1.7236623534699902e-05, "clip_ratio/region_mean": 0.0015312582399928942, "epoch": 13.19591836734694, "grad_norm": 0.1496981680393219, "learning_rate": 5e-07, "loss": -0.0031, "step": 1269 }, { "clip_ratio/high_max": 0.0018438695769873448, "clip_ratio/high_mean": 0.0007872636742831673, "clip_ratio/low_mean": 0.0006364053533616243, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014236690185498446, "epoch": 13.205247813411079, "grad_norm": 0.13235031068325043, "learning_rate": 5e-07, "loss": -0.04, "step": 1270 }, { "clip_ratio/high_max": 0.001780887079803506, "clip_ratio/high_mean": 0.0007454919759766199, "clip_ratio/low_mean": 0.0006393953699443955, "clip_ratio/low_min": 2.4995000785565935e-05, "clip_ratio/region_mean": 0.001384887371386867, "epoch": 13.214577259475218, "grad_norm": 0.1406499445438385, "learning_rate": 5e-07, "loss": -0.0072, "step": 1271 }, { "clip_ratio/high_max": 0.0017942827762453817, "clip_ratio/high_mean": 0.0007059664221742423, "clip_ratio/low_mean": 0.0006488656727015041, "clip_ratio/low_min": 3.709312022692757e-05, "clip_ratio/region_mean": 0.0013548321003327146, "epoch": 13.223906705539358, "grad_norm": 0.13045406341552734, "learning_rate": 5e-07, "loss": -0.0022, "step": 1272 }, { "clip_ratio/high_max": 0.0018388376593065914, "clip_ratio/high_mean": 0.0007814768732714583, "clip_ratio/low_mean": 0.0007318369680433534, "clip_ratio/low_min": 7.757645289530046e-05, "clip_ratio/region_mean": 0.0015133138331293594, "epoch": 13.2332361516035, "grad_norm": 0.1561194658279419, "learning_rate": 5e-07, "loss": -0.0368, "step": 1273 }, { "clip_ratio/high_max": 0.0019808301876764745, "clip_ratio/high_mean": 0.0008299286419060081, "clip_ratio/low_mean": 0.0005200862942729145, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013500149398169015, "epoch": 13.242565597667639, "grad_norm": 0.12543587386608124, "learning_rate": 5e-07, "loss": -0.0434, "step": 1274 }, { "clip_ratio/high_max": 0.0016930728197621647, "clip_ratio/high_mean": 0.0007434735598508269, "clip_ratio/low_mean": 0.0007004467379374546, "clip_ratio/low_min": 3.327122612972744e-05, "clip_ratio/region_mean": 0.0014439202950597974, "epoch": 13.251895043731778, "grad_norm": 0.14247015118598938, "learning_rate": 5e-07, "loss": -0.0058, "step": 1275 }, { "clip_ratio/high_max": 0.0018708229945332278, "clip_ratio/high_mean": 0.0007387474361166824, "clip_ratio/low_mean": 0.0006364946439134656, "clip_ratio/low_min": 1.2812628483516164e-05, "clip_ratio/region_mean": 0.0013752420345554128, "epoch": 13.261224489795918, "grad_norm": 0.12925434112548828, "learning_rate": 5e-07, "loss": -0.0325, "step": 1276 }, { "clip_ratio/high_max": 0.001990424199902918, "clip_ratio/high_mean": 0.0007292095433513168, "clip_ratio/low_mean": 0.0006970414888201049, "clip_ratio/low_min": 1.628028076083865e-05, "clip_ratio/region_mean": 0.0014262510303524323, "epoch": 13.270553935860057, "grad_norm": 0.13361014425754547, "learning_rate": 5e-07, "loss": 0.018, "step": 1277 }, { "clip_ratio/high_max": 0.002035448716924293, "clip_ratio/high_mean": 0.0008302566429847502, "clip_ratio/low_mean": 0.0005874916569155175, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014177482880768366, "epoch": 13.279883381924199, "grad_norm": 0.1449812650680542, "learning_rate": 5e-07, "loss": -0.0313, "step": 1278 }, { "clip_ratio/high_max": 0.0017146780337498058, "clip_ratio/high_mean": 0.0006951612231205218, "clip_ratio/low_mean": 0.0006504982961814676, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013456595406751148, "epoch": 13.289212827988338, "grad_norm": 0.1436925083398819, "learning_rate": 5e-07, "loss": 0.0135, "step": 1279 }, { "clip_ratio/high_max": 0.0022200812163646333, "clip_ratio/high_mean": 0.0008717521086509805, "clip_ratio/low_mean": 0.0006465282363024016, "clip_ratio/low_min": 1.4311885024653748e-05, "clip_ratio/region_mean": 0.0015182803617790341, "epoch": 13.298542274052478, "grad_norm": 0.15237142145633698, "learning_rate": 5e-07, "loss": 0.0004, "step": 1280 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.041294642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 4087.0, "completions/mean_length": 708.3484497070312, "completions/mean_terminated_length": 562.4309692382812, "completions/min_length": 92.0, "completions/min_terminated_length": 92.0, "epoch": 13.307871720116617, "grad_norm": 0.14099115133285522, "learning_rate": 5e-07, "loss": 0.0032, "num_tokens": 767744180.0, "reward": 0.6517857313156128, "reward_std": 0.15259025990962982, "rewards/simpleverify_reward/mean": 0.6517857313156128, "rewards/simpleverify_reward/std": 0.47641268372535706, "step": 1281 }, { "clip_ratio/high_max": 0.001878128394309897, "clip_ratio/high_mean": 0.0007892322300904198, "clip_ratio/low_mean": 0.00032552577158639906, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011147579898533877, "epoch": 13.317201166180759, "grad_norm": 0.13552561402320862, "learning_rate": 5e-07, "loss": -0.0724, "step": 1282 }, { "clip_ratio/high_max": 0.0018581584918138105, "clip_ratio/high_mean": 0.0007067797678246279, "clip_ratio/low_mean": 0.00036592053538697655, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010727003100328147, "epoch": 13.326530612244898, "grad_norm": 0.14826519787311554, "learning_rate": 5e-07, "loss": -0.0047, "step": 1283 }, { "clip_ratio/high_max": 0.0019106469553662464, "clip_ratio/high_mean": 0.0007465452144970186, "clip_ratio/low_mean": 0.0003963851038406574, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011429303121985868, "epoch": 13.335860058309038, "grad_norm": 0.12816141545772552, "learning_rate": 5e-07, "loss": -0.025, "step": 1284 }, { "clip_ratio/high_max": 0.0018788583874993492, "clip_ratio/high_mean": 0.0007712961105426075, "clip_ratio/low_mean": 0.0004284092592570232, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011997053734376095, "epoch": 13.345189504373177, "grad_norm": 0.14177173376083374, "learning_rate": 5e-07, "loss": -0.0338, "step": 1285 }, { "clip_ratio/high_max": 0.0015485124386032112, "clip_ratio/high_mean": 0.0005759485929957009, "clip_ratio/low_mean": 0.00044590033030544873, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010218489514954854, "epoch": 13.354518950437317, "grad_norm": 0.16176974773406982, "learning_rate": 5e-07, "loss": -0.0133, "step": 1286 }, { "clip_ratio/high_max": 0.001833028014516458, "clip_ratio/high_mean": 0.0006313532576314174, "clip_ratio/low_mean": 0.0005210562994761858, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011524095862114336, "epoch": 13.363848396501458, "grad_norm": 0.21289771795272827, "learning_rate": 5e-07, "loss": -0.0042, "step": 1287 }, { "clip_ratio/high_max": 0.001837701340264175, "clip_ratio/high_mean": 0.0007069672519719461, "clip_ratio/low_mean": 0.0005299616541378782, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012369288815534674, "epoch": 13.373177842565598, "grad_norm": 0.14194338023662567, "learning_rate": 5e-07, "loss": 0.0003, "step": 1288 }, { "clip_ratio/high_max": 0.001976145795197226, "clip_ratio/high_mean": 0.0007101684059307445, "clip_ratio/low_mean": 0.00047555685432598693, "clip_ratio/low_min": 2.1309238945832476e-05, "clip_ratio/region_mean": 0.0011857252320623957, "epoch": 13.382507288629737, "grad_norm": 0.15848934650421143, "learning_rate": 5e-07, "loss": -0.0134, "step": 1289 }, { "clip_ratio/high_max": 0.002012095596001018, "clip_ratio/high_mean": 0.0008559373891330324, "clip_ratio/low_mean": 0.00041136046365863876, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012672978427872295, "epoch": 13.391836734693877, "grad_norm": 0.13333940505981445, "learning_rate": 5e-07, "loss": -0.0611, "step": 1290 }, { "clip_ratio/high_max": 0.001835995921283029, "clip_ratio/high_mean": 0.0007316687460843241, "clip_ratio/low_mean": 0.00044623911617236445, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011779078377003316, "epoch": 13.401166180758018, "grad_norm": 0.1262081265449524, "learning_rate": 5e-07, "loss": -0.0346, "step": 1291 }, { "clip_ratio/high_max": 0.0020967185737390537, "clip_ratio/high_mean": 0.000783797811891418, "clip_ratio/low_mean": 0.0005060906316884939, "clip_ratio/low_min": 2.6189687559963204e-05, "clip_ratio/region_mean": 0.0012898884669994004, "epoch": 13.410495626822158, "grad_norm": 0.1508316844701767, "learning_rate": 5e-07, "loss": -0.0086, "step": 1292 }, { "clip_ratio/high_max": 0.0017263705412915442, "clip_ratio/high_mean": 0.0006904136789671611, "clip_ratio/low_mean": 0.0004967895310983295, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011872031864186283, "epoch": 13.419825072886297, "grad_norm": 0.14386476576328278, "learning_rate": 5e-07, "loss": -0.0188, "step": 1293 }, { "clip_ratio/high_max": 0.002304025321791414, "clip_ratio/high_mean": 0.0008212882748921402, "clip_ratio/low_mean": 0.0005046060782660788, "clip_ratio/low_min": 4.878999789070804e-05, "clip_ratio/region_mean": 0.0013258943799883127, "epoch": 13.429154518950437, "grad_norm": 0.15589196979999542, "learning_rate": 5e-07, "loss": -0.0342, "step": 1294 }, { "clip_ratio/high_max": 0.0016660071669321042, "clip_ratio/high_mean": 0.0007897726318333298, "clip_ratio/low_mean": 0.0005810083243886766, "clip_ratio/low_min": 2.9624977287312504e-05, "clip_ratio/region_mean": 0.001370780973957153, "epoch": 13.438483965014576, "grad_norm": 0.1352260559797287, "learning_rate": 5e-07, "loss": -0.0159, "step": 1295 }, { "clip_ratio/high_max": 0.002230285870609805, "clip_ratio/high_mean": 0.0008831085615383927, "clip_ratio/low_mean": 0.0005043219673552812, "clip_ratio/low_min": 1.581877950229682e-05, "clip_ratio/region_mean": 0.0013874305368517525, "epoch": 13.447813411078718, "grad_norm": 0.15146781504154205, "learning_rate": 5e-07, "loss": -0.0194, "step": 1296 }, { "clip_ratio/high_max": 0.0017966319719562307, "clip_ratio/high_mean": 0.0007629532874489087, "clip_ratio/low_mean": 0.0005276474162201339, "clip_ratio/low_min": 2.9620852728839964e-05, "clip_ratio/region_mean": 0.0012906007086712634, "epoch": 13.457142857142857, "grad_norm": 0.14297908544540405, "learning_rate": 5e-07, "loss": -0.0357, "step": 1297 }, { "clip_ratio/high_max": 0.001729075753246434, "clip_ratio/high_mean": 0.0006853404420326115, "clip_ratio/low_mean": 0.0006226294080988737, "clip_ratio/low_min": 2.1118432414368726e-05, "clip_ratio/region_mean": 0.0013079698437650222, "epoch": 13.466472303206997, "grad_norm": 0.23539680242538452, "learning_rate": 5e-07, "loss": -0.0079, "step": 1298 }, { "clip_ratio/high_max": 0.001783670039003482, "clip_ratio/high_mean": 0.000769885975842044, "clip_ratio/low_mean": 0.000555118181182479, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013250041702121962, "epoch": 13.475801749271136, "grad_norm": 3.7353920936584473, "learning_rate": 5e-07, "loss": -0.0045, "step": 1299 }, { "clip_ratio/high_max": 0.0020127926909481175, "clip_ratio/high_mean": 0.0007388345384242712, "clip_ratio/low_mean": 0.0005717942262890574, "clip_ratio/low_min": 1.5203113434836268e-05, "clip_ratio/region_mean": 0.0013106287478876766, "epoch": 13.485131195335278, "grad_norm": 0.15436038374900818, "learning_rate": 5e-07, "loss": 0.0083, "step": 1300 }, { "clip_ratio/high_max": 0.001966823350812774, "clip_ratio/high_mean": 0.0008152875361702172, "clip_ratio/low_mean": 0.0005357966533665603, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013510841872630408, "epoch": 13.494460641399417, "grad_norm": 0.13294821977615356, "learning_rate": 5e-07, "loss": -0.0369, "step": 1301 }, { "clip_ratio/high_max": 0.0016792190435808152, "clip_ratio/high_mean": 0.0007699639627389843, "clip_ratio/low_mean": 0.0006466347376772319, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001416598679497838, "epoch": 13.503790087463557, "grad_norm": 0.13744574785232544, "learning_rate": 5e-07, "loss": 0.0163, "step": 1302 }, { "clip_ratio/high_max": 0.0019428228952165227, "clip_ratio/high_mean": 0.000683531170579954, "clip_ratio/low_mean": 0.0005690707293979358, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001252601923624752, "epoch": 13.513119533527696, "grad_norm": 0.1487922966480255, "learning_rate": 5e-07, "loss": 0.043, "step": 1303 }, { "clip_ratio/high_max": 0.0019123920719721355, "clip_ratio/high_mean": 0.0007069975895319658, "clip_ratio/low_mean": 0.0006694599742331775, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013764575451205019, "epoch": 13.522448979591836, "grad_norm": 0.1385929435491562, "learning_rate": 5e-07, "loss": 0.0156, "step": 1304 }, { "clip_ratio/high_max": 0.001990798133192584, "clip_ratio/high_mean": 0.0007916807262517978, "clip_ratio/low_mean": 0.0005080342225483037, "clip_ratio/low_min": 1.1396790796425194e-05, "clip_ratio/region_mean": 0.001299714931519702, "epoch": 13.531778425655977, "grad_norm": 0.13964475691318512, "learning_rate": 5e-07, "loss": -0.0139, "step": 1305 }, { "clip_ratio/high_max": 0.0020031607564305887, "clip_ratio/high_mean": 0.0007550640148110688, "clip_ratio/low_mean": 0.0006755793872343929, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001430643409548793, "epoch": 13.541107871720117, "grad_norm": 0.14062552154064178, "learning_rate": 5e-07, "loss": -0.004, "step": 1306 }, { "clip_ratio/high_max": 0.001999953943595756, "clip_ratio/high_mean": 0.0008143400518747512, "clip_ratio/low_mean": 0.000581581379265117, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001395921401126543, "epoch": 13.550437317784256, "grad_norm": 0.15786069631576538, "learning_rate": 5e-07, "loss": -0.0195, "step": 1307 }, { "clip_ratio/high_max": 0.0017756987654138356, "clip_ratio/high_mean": 0.0007499902039853623, "clip_ratio/low_mean": 0.000562363237804675, "clip_ratio/low_min": 1.9555694962036796e-05, "clip_ratio/region_mean": 0.0013123534263286274, "epoch": 13.559766763848396, "grad_norm": 0.13173966109752655, "learning_rate": 5e-07, "loss": -0.039, "step": 1308 }, { "clip_ratio/high_max": 0.002263301648781635, "clip_ratio/high_mean": 0.0008914486661524279, "clip_ratio/low_mean": 0.0006472668774222257, "clip_ratio/low_min": 1.4504525097436272e-05, "clip_ratio/region_mean": 0.0015387155253847595, "epoch": 13.569096209912537, "grad_norm": 0.13642564415931702, "learning_rate": 5e-07, "loss": -0.0199, "step": 1309 }, { "clip_ratio/high_max": 0.0017381226316501852, "clip_ratio/high_mean": 0.0007302567864826415, "clip_ratio/low_mean": 0.0005992834107928502, "clip_ratio/low_min": 1.316621001024032e-05, "clip_ratio/region_mean": 0.0013295401658979245, "epoch": 13.578425655976677, "grad_norm": 0.17860233783721924, "learning_rate": 5e-07, "loss": 0.0018, "step": 1310 }, { "clip_ratio/high_max": 0.002026752663368825, "clip_ratio/high_mean": 0.0008474503265460953, "clip_ratio/low_mean": 0.0006217968084456515, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014692471486341674, "epoch": 13.587755102040816, "grad_norm": 0.14607638120651245, "learning_rate": 5e-07, "loss": -0.0093, "step": 1311 }, { "clip_ratio/high_max": 0.001773301359207835, "clip_ratio/high_mean": 0.0006702168193442049, "clip_ratio/low_mean": 0.0007508903836423997, "clip_ratio/low_min": 1.7846945411292836e-05, "clip_ratio/region_mean": 0.0014211071902536787, "epoch": 13.597084548104956, "grad_norm": 0.15476107597351074, "learning_rate": 5e-07, "loss": 0.043, "step": 1312 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0499441964285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 4085.0, "completions/mean_length": 755.6544799804688, "completions/mean_terminated_length": 580.0533447265625, "completions/min_length": 119.0, "completions/min_terminated_length": 119.0, "epoch": 13.606413994169095, "grad_norm": 0.16009904444217682, "learning_rate": 5e-07, "loss": -0.04, "num_tokens": 786409920.0, "reward": 0.6363700032234192, "reward_std": 0.15410655736923218, "rewards/simpleverify_reward/mean": 0.6363700032234192, "rewards/simpleverify_reward/std": 0.4810522794723511, "step": 1313 }, { "clip_ratio/high_max": 0.0017780927228159271, "clip_ratio/high_mean": 0.0006330597843771102, "clip_ratio/low_mean": 0.0004288762756914366, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010619360709824832, "epoch": 13.615743440233237, "grad_norm": 0.13288332521915436, "learning_rate": 5e-07, "loss": 0.0073, "step": 1314 }, { "clip_ratio/high_max": 0.0018586488768050913, "clip_ratio/high_mean": 0.0007398831403406803, "clip_ratio/low_mean": 0.00045340989163378254, "clip_ratio/low_min": 1.1737089153029956e-05, "clip_ratio/region_mean": 0.0011932930246985052, "epoch": 13.625072886297376, "grad_norm": 0.14271877706050873, "learning_rate": 5e-07, "loss": -0.0422, "step": 1315 }, { "clip_ratio/high_max": 0.0021416136223706417, "clip_ratio/high_mean": 0.000730059950001305, "clip_ratio/low_mean": 0.0004620300478563877, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011920899705728516, "epoch": 13.634402332361516, "grad_norm": 0.14251582324504852, "learning_rate": 5e-07, "loss": -0.0564, "step": 1316 }, { "clip_ratio/high_max": 0.002107983935275115, "clip_ratio/high_mean": 0.0007721620750089642, "clip_ratio/low_mean": 0.0004894946487183915, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012616567037184723, "epoch": 13.643731778425655, "grad_norm": 0.17988498508930206, "learning_rate": 5e-07, "loss": -0.0014, "step": 1317 }, { "clip_ratio/high_max": 0.0018582206139399204, "clip_ratio/high_mean": 0.0006770815580239287, "clip_ratio/low_mean": 0.0005014313346691779, "clip_ratio/low_min": 1.5184645235422067e-05, "clip_ratio/region_mean": 0.0011785128917836118, "epoch": 13.653061224489797, "grad_norm": 0.1574021577835083, "learning_rate": 5e-07, "loss": 0.0005, "step": 1318 }, { "clip_ratio/high_max": 0.0019941211576224305, "clip_ratio/high_mean": 0.0008277340748463757, "clip_ratio/low_mean": 0.00046547511192329694, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012932091631228104, "epoch": 13.662390670553936, "grad_norm": 0.14830724895000458, "learning_rate": 5e-07, "loss": -0.0254, "step": 1319 }, { "clip_ratio/high_max": 0.0016749045462347567, "clip_ratio/high_mean": 0.0006611879161937395, "clip_ratio/low_mean": 0.0005682229721060139, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001229410892847227, "epoch": 13.671720116618076, "grad_norm": 0.1476219892501831, "learning_rate": 5e-07, "loss": -0.0043, "step": 1320 }, { "clip_ratio/high_max": 0.0016367916941817384, "clip_ratio/high_mean": 0.0005812190515825932, "clip_ratio/low_mean": 0.0005350616111172712, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011162806404172443, "epoch": 13.681049562682215, "grad_norm": 0.13913817703723907, "learning_rate": 5e-07, "loss": -0.001, "step": 1321 }, { "clip_ratio/high_max": 0.001647822282393463, "clip_ratio/high_mean": 0.0006820405196776846, "clip_ratio/low_mean": 0.0005473520309351443, "clip_ratio/low_min": 2.9983928470755927e-05, "clip_ratio/region_mean": 0.0012293925556150498, "epoch": 13.690379008746355, "grad_norm": 0.25654521584510803, "learning_rate": 5e-07, "loss": -0.0173, "step": 1322 }, { "clip_ratio/high_max": 0.001896274188766256, "clip_ratio/high_mean": 0.0006440755623771111, "clip_ratio/low_mean": 0.0004875478662142996, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011316234522382729, "epoch": 13.699708454810496, "grad_norm": 0.13212744891643524, "learning_rate": 5e-07, "loss": -0.004, "step": 1323 }, { "clip_ratio/high_max": 0.0017474826709076297, "clip_ratio/high_mean": 0.0006339352539725951, "clip_ratio/low_mean": 0.0005824405234307051, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001216375814692583, "epoch": 13.709037900874636, "grad_norm": 0.1541476994752884, "learning_rate": 5e-07, "loss": 0.0649, "step": 1324 }, { "clip_ratio/high_max": 0.0021879502673982643, "clip_ratio/high_mean": 0.0007002842448855517, "clip_ratio/low_mean": 0.0005082926363684237, "clip_ratio/low_min": 1.848018837335985e-05, "clip_ratio/region_mean": 0.0012085768685210496, "epoch": 13.718367346938775, "grad_norm": 0.13502225279808044, "learning_rate": 5e-07, "loss": -0.022, "step": 1325 }, { "clip_ratio/high_max": 0.0021302123604982626, "clip_ratio/high_mean": 0.0007498971881432226, "clip_ratio/low_mean": 0.0005638701595671591, "clip_ratio/low_min": 1.230557154485723e-05, "clip_ratio/region_mean": 0.0013137673122400884, "epoch": 13.727696793002915, "grad_norm": 0.1443113088607788, "learning_rate": 5e-07, "loss": -0.0222, "step": 1326 }, { "clip_ratio/high_max": 0.0018637128814589232, "clip_ratio/high_mean": 0.0007582244616060052, "clip_ratio/low_mean": 0.0006456148248616955, "clip_ratio/low_min": 3.145463051623665e-05, "clip_ratio/region_mean": 0.0014038392801012378, "epoch": 13.737026239067056, "grad_norm": 0.1323981136083603, "learning_rate": 5e-07, "loss": -0.0206, "step": 1327 }, { "clip_ratio/high_max": 0.0017199804788106121, "clip_ratio/high_mean": 0.0006984443734836532, "clip_ratio/low_mean": 0.0005330583780960296, "clip_ratio/low_min": 6.56541724310955e-05, "clip_ratio/region_mean": 0.0012315027379372623, "epoch": 13.746355685131196, "grad_norm": 0.1295733004808426, "learning_rate": 5e-07, "loss": -0.0399, "step": 1328 }, { "clip_ratio/high_max": 0.0020609854254871607, "clip_ratio/high_mean": 0.0008141679882101016, "clip_ratio/low_mean": 0.0005793031996290665, "clip_ratio/low_min": 1.2281390809221193e-05, "clip_ratio/region_mean": 0.0013934711860201787, "epoch": 13.755685131195335, "grad_norm": 0.14503847062587738, "learning_rate": 5e-07, "loss": -0.0436, "step": 1329 }, { "clip_ratio/high_max": 0.0021219095797277987, "clip_ratio/high_mean": 0.0007692198241784354, "clip_ratio/low_mean": 0.000576449159780168, "clip_ratio/low_min": 5.323183904692996e-05, "clip_ratio/region_mean": 0.001345668966678204, "epoch": 13.765014577259475, "grad_norm": 0.13942258059978485, "learning_rate": 5e-07, "loss": -0.0202, "step": 1330 }, { "clip_ratio/high_max": 0.00187624871978187, "clip_ratio/high_mean": 0.0007702975981374038, "clip_ratio/low_mean": 0.0006927269660081947, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014630246078013442, "epoch": 13.774344023323614, "grad_norm": 0.15189923346042633, "learning_rate": 5e-07, "loss": -0.0075, "step": 1331 }, { "clip_ratio/high_max": 0.0017356250173179433, "clip_ratio/high_mean": 0.0007230879818962421, "clip_ratio/low_mean": 0.0006613582718273392, "clip_ratio/low_min": 5.721025536331581e-05, "clip_ratio/region_mean": 0.0013844462482666131, "epoch": 13.783673469387756, "grad_norm": 0.1269250363111496, "learning_rate": 5e-07, "loss": 0.0042, "step": 1332 }, { "clip_ratio/high_max": 0.0018111979006789625, "clip_ratio/high_mean": 0.0007932592125143856, "clip_ratio/low_mean": 0.0004905139899165079, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012837732065236196, "epoch": 13.793002915451895, "grad_norm": 0.11971694231033325, "learning_rate": 5e-07, "loss": -0.0379, "step": 1333 }, { "clip_ratio/high_max": 0.00167117954515561, "clip_ratio/high_mean": 0.0006664037791779265, "clip_ratio/low_mean": 0.0006736805744367302, "clip_ratio/low_min": 1.9592476746765897e-05, "clip_ratio/region_mean": 0.001340084334515268, "epoch": 13.802332361516035, "grad_norm": 0.15565913915634155, "learning_rate": 5e-07, "loss": 0.0005, "step": 1334 }, { "clip_ratio/high_max": 0.001583807072165655, "clip_ratio/high_mean": 0.0006241048208721622, "clip_ratio/low_mean": 0.0007368537208094494, "clip_ratio/low_min": 2.4380729882977903e-05, "clip_ratio/region_mean": 0.001360958551231306, "epoch": 13.811661807580174, "grad_norm": 0.15386252105236053, "learning_rate": 5e-07, "loss": 0.004, "step": 1335 }, { "clip_ratio/high_max": 0.002098778073559515, "clip_ratio/high_mean": 0.0008942113800003426, "clip_ratio/low_mean": 0.0007265634922077879, "clip_ratio/low_min": 6.47617234790232e-05, "clip_ratio/region_mean": 0.0016207748994929716, "epoch": 13.820991253644316, "grad_norm": 0.15695734322071075, "learning_rate": 5e-07, "loss": -0.0359, "step": 1336 }, { "clip_ratio/high_max": 0.0017996503993344959, "clip_ratio/high_mean": 0.000605410441494314, "clip_ratio/low_mean": 0.0007104945489118109, "clip_ratio/low_min": 2.709146065171808e-05, "clip_ratio/region_mean": 0.0013159049922251143, "epoch": 13.830320699708455, "grad_norm": 0.1348036527633667, "learning_rate": 5e-07, "loss": 0.0131, "step": 1337 }, { "clip_ratio/high_max": 0.002056600969808642, "clip_ratio/high_mean": 0.0008593768270657165, "clip_ratio/low_mean": 0.0007296396652236581, "clip_ratio/low_min": 1.391052774124546e-05, "clip_ratio/region_mean": 0.0015890164868324064, "epoch": 13.839650145772595, "grad_norm": 0.15909375250339508, "learning_rate": 5e-07, "loss": -0.0089, "step": 1338 }, { "clip_ratio/high_max": 0.0015003066982899327, "clip_ratio/high_mean": 0.0005994949242449366, "clip_ratio/low_mean": 0.0006834750365669606, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001282969955354929, "epoch": 13.848979591836734, "grad_norm": 0.13473115861415863, "learning_rate": 5e-07, "loss": 0.0119, "step": 1339 }, { "clip_ratio/high_max": 0.0019765912657021545, "clip_ratio/high_mean": 0.0007196498263510875, "clip_ratio/low_mean": 0.0005649204267683672, "clip_ratio/low_min": 3.648170059022959e-05, "clip_ratio/region_mean": 0.0012845702622144017, "epoch": 13.858309037900874, "grad_norm": 0.16498315334320068, "learning_rate": 5e-07, "loss": -0.0484, "step": 1340 }, { "clip_ratio/high_max": 0.0020443896173674148, "clip_ratio/high_mean": 0.000850721195092774, "clip_ratio/low_mean": 0.0005760517306043766, "clip_ratio/low_min": 8.963143955043051e-06, "clip_ratio/region_mean": 0.0014267729384300765, "epoch": 13.867638483965015, "grad_norm": 0.1411380022764206, "learning_rate": 5e-07, "loss": -0.0426, "step": 1341 }, { "clip_ratio/high_max": 0.0018677731059142388, "clip_ratio/high_mean": 0.0007750891127216164, "clip_ratio/low_mean": 0.0006185431338963099, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001393632213876117, "epoch": 13.876967930029155, "grad_norm": 0.1397019922733307, "learning_rate": 5e-07, "loss": -0.0291, "step": 1342 }, { "clip_ratio/high_max": 0.0020237600183463655, "clip_ratio/high_mean": 0.0007991323818714591, "clip_ratio/low_mean": 0.0007627958693774417, "clip_ratio/low_min": 3.135189399472438e-05, "clip_ratio/region_mean": 0.0015619282166881021, "epoch": 13.886297376093294, "grad_norm": 0.1339261680841446, "learning_rate": 5e-07, "loss": 0.0104, "step": 1343 }, { "clip_ratio/high_max": 0.0018707907911448274, "clip_ratio/high_mean": 0.0007013859294602298, "clip_ratio/low_mean": 0.0006935396522749215, "clip_ratio/low_min": 1.2873326340923086e-05, "clip_ratio/region_mean": 0.0013949255953775719, "epoch": 13.895626822157434, "grad_norm": 0.15653087198734283, "learning_rate": 5e-07, "loss": 0.0164, "step": 1344 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.047572544642857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 4076.0, "completions/mean_length": 739.8426513671875, "completions/mean_terminated_length": 572.2068481445312, "completions/min_length": 96.0, "completions/min_terminated_length": 96.0, "epoch": 14.00932944606414, "grad_norm": 0.13902561366558075, "learning_rate": 5e-07, "loss": -0.0157, "num_tokens": 804872281.0, "reward": 0.6514718532562256, "reward_std": 0.15223561227321625, "rewards/simpleverify_reward/mean": 0.6514717936515808, "rewards/simpleverify_reward/std": 0.4765125513076782, "step": 1345 }, { "clip_ratio/high_max": 0.0020683754846686497, "clip_ratio/high_mean": 0.0008563667397538666, "clip_ratio/low_mean": 0.00043137512875546236, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012877418885182124, "epoch": 14.018658892128279, "grad_norm": 0.1369263082742691, "learning_rate": 5e-07, "loss": -0.0536, "step": 1346 }, { "clip_ratio/high_max": 0.002131165274477098, "clip_ratio/high_mean": 0.0007021867859293707, "clip_ratio/low_mean": 0.00048486733612662647, "clip_ratio/low_min": 1.1593396266107447e-05, "clip_ratio/region_mean": 0.0011870541238749865, "epoch": 14.02798833819242, "grad_norm": 0.12866684794425964, "learning_rate": 5e-07, "loss": -0.0259, "step": 1347 }, { "clip_ratio/high_max": 0.0019715811067726463, "clip_ratio/high_mean": 0.0006913059514772613, "clip_ratio/low_mean": 0.0004008952209915151, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010922011679213028, "epoch": 14.03731778425656, "grad_norm": 0.1576058566570282, "learning_rate": 5e-07, "loss": -0.025, "step": 1348 }, { "clip_ratio/high_max": 0.002151768498151796, "clip_ratio/high_mean": 0.0007912145720183617, "clip_ratio/low_mean": 0.00043285535321047064, "clip_ratio/low_min": 9.661462172516622e-06, "clip_ratio/region_mean": 0.001224069928866811, "epoch": 14.0466472303207, "grad_norm": 0.16631673276424408, "learning_rate": 5e-07, "loss": -0.023, "step": 1349 }, { "clip_ratio/high_max": 0.0018739083425316494, "clip_ratio/high_mean": 0.0006382819246937288, "clip_ratio/low_mean": 0.0004588566998791066, "clip_ratio/low_min": 1.3351847883313894e-05, "clip_ratio/region_mean": 0.0010971386109304149, "epoch": 14.055976676384839, "grad_norm": 0.14610406756401062, "learning_rate": 5e-07, "loss": -0.0427, "step": 1350 }, { "clip_ratio/high_max": 0.002097486016282346, "clip_ratio/high_mean": 0.0007763486028125044, "clip_ratio/low_mean": 0.0005108239442961349, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012871725521108601, "epoch": 14.06530612244898, "grad_norm": 0.13860712945461273, "learning_rate": 5e-07, "loss": -0.0318, "step": 1351 }, { "clip_ratio/high_max": 0.002181212905270513, "clip_ratio/high_mean": 0.0007987826302269241, "clip_ratio/low_mean": 0.0005692209415428806, "clip_ratio/low_min": 1.2209416127006989e-05, "clip_ratio/region_mean": 0.0013680035663128365, "epoch": 14.07463556851312, "grad_norm": 0.15873385965824127, "learning_rate": 5e-07, "loss": 0.0051, "step": 1352 }, { "clip_ratio/high_max": 0.00225450082871248, "clip_ratio/high_mean": 0.0007706741025685915, "clip_ratio/low_mean": 0.0004981693346053362, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012688434471783694, "epoch": 14.08396501457726, "grad_norm": 0.14461420476436615, "learning_rate": 5e-07, "loss": -0.0315, "step": 1353 }, { "clip_ratio/high_max": 0.00202920604715473, "clip_ratio/high_mean": 0.0007270674723258708, "clip_ratio/low_mean": 0.0005213053937040968, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001248372867848957, "epoch": 14.093294460641399, "grad_norm": 0.14506343007087708, "learning_rate": 5e-07, "loss": -0.0225, "step": 1354 }, { "clip_ratio/high_max": 0.0017133202345576137, "clip_ratio/high_mean": 0.0007221803316497244, "clip_ratio/low_mean": 0.00048578118867226294, "clip_ratio/low_min": 1.3435082109936047e-05, "clip_ratio/region_mean": 0.0012079615298716817, "epoch": 14.102623906705539, "grad_norm": 0.1461682766675949, "learning_rate": 5e-07, "loss": -0.0225, "step": 1355 }, { "clip_ratio/high_max": 0.00176513334736228, "clip_ratio/high_mean": 0.0006370151731971418, "clip_ratio/low_mean": 0.0005457769439090043, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011827921371150296, "epoch": 14.11195335276968, "grad_norm": 0.17663006484508514, "learning_rate": 5e-07, "loss": 0.0148, "step": 1356 }, { "clip_ratio/high_max": 0.0018890992541855667, "clip_ratio/high_mean": 0.0007809143262420548, "clip_ratio/low_mean": 0.0006557202086696634, "clip_ratio/low_min": 4.3572240429057274e-05, "clip_ratio/region_mean": 0.0014366345276357606, "epoch": 14.12128279883382, "grad_norm": 0.1513398438692093, "learning_rate": 5e-07, "loss": -0.0044, "step": 1357 }, { "clip_ratio/high_max": 0.001910109851451125, "clip_ratio/high_mean": 0.0007914250609246665, "clip_ratio/low_mean": 0.0005374810425564647, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013289060480019543, "epoch": 14.130612244897959, "grad_norm": 0.13544178009033203, "learning_rate": 5e-07, "loss": -0.0605, "step": 1358 }, { "clip_ratio/high_max": 0.0017447670543333516, "clip_ratio/high_mean": 0.0007334696056204848, "clip_ratio/low_mean": 0.0006947290621610591, "clip_ratio/low_min": 6.233638305275235e-05, "clip_ratio/region_mean": 0.0014281986223068088, "epoch": 14.139941690962099, "grad_norm": 0.1642685830593109, "learning_rate": 5e-07, "loss": 0.0038, "step": 1359 }, { "clip_ratio/high_max": 0.002021109343331773, "clip_ratio/high_mean": 0.0008206393922591815, "clip_ratio/low_mean": 0.0006001429655952961, "clip_ratio/low_min": 1.3898154065827839e-05, "clip_ratio/region_mean": 0.0014207823478500359, "epoch": 14.14927113702624, "grad_norm": 0.1679585576057434, "learning_rate": 5e-07, "loss": -0.0003, "step": 1360 }, { "clip_ratio/high_max": 0.0012986520014237612, "clip_ratio/high_mean": 0.00047509153591818176, "clip_ratio/low_mean": 0.0006811776147515047, "clip_ratio/low_min": 1.7020696759573184e-05, "clip_ratio/region_mean": 0.0011562691543076653, "epoch": 14.15860058309038, "grad_norm": 0.14426560699939728, "learning_rate": 5e-07, "loss": 0.0141, "step": 1361 }, { "clip_ratio/high_max": 0.001981216235435568, "clip_ratio/high_mean": 0.0007208238475868711, "clip_ratio/low_mean": 0.0006539342302858131, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013747580524068326, "epoch": 14.167930029154519, "grad_norm": 0.15823277831077576, "learning_rate": 5e-07, "loss": -0.0313, "step": 1362 }, { "clip_ratio/high_max": 0.0018850402702810243, "clip_ratio/high_mean": 0.0007316774963328498, "clip_ratio/low_mean": 0.0005234715863480233, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012551490472105797, "epoch": 14.177259475218658, "grad_norm": 0.14078326523303986, "learning_rate": 5e-07, "loss": -0.0209, "step": 1363 }, { "clip_ratio/high_max": 0.0018999108760908712, "clip_ratio/high_mean": 0.0007796508534738678, "clip_ratio/low_mean": 0.0006995195635681739, "clip_ratio/low_min": 1.8032313164439984e-05, "clip_ratio/region_mean": 0.0014791703797527589, "epoch": 14.186588921282798, "grad_norm": 0.13885757327079773, "learning_rate": 5e-07, "loss": -0.0154, "step": 1364 }, { "clip_ratio/high_max": 0.0020468212896957994, "clip_ratio/high_mean": 0.000840089493067353, "clip_ratio/low_mean": 0.0007319219585042447, "clip_ratio/low_min": 5.670219979947433e-05, "clip_ratio/region_mean": 0.0015720114715804812, "epoch": 14.19591836734694, "grad_norm": 0.1572674810886383, "learning_rate": 5e-07, "loss": -0.0446, "step": 1365 }, { "clip_ratio/high_max": 0.0016930835045059212, "clip_ratio/high_mean": 0.0007161204739531968, "clip_ratio/low_mean": 0.0006726201590936398, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013887406457797624, "epoch": 14.205247813411079, "grad_norm": 0.14489062130451202, "learning_rate": 5e-07, "loss": 0.0199, "step": 1366 }, { "clip_ratio/high_max": 0.0017370920832036063, "clip_ratio/high_mean": 0.0006639273215114372, "clip_ratio/low_mean": 0.0005674314206771669, "clip_ratio/low_min": 2.8191248929942958e-05, "clip_ratio/region_mean": 0.0012313587249082047, "epoch": 14.214577259475218, "grad_norm": 0.1457103192806244, "learning_rate": 5e-07, "loss": 0.0033, "step": 1367 }, { "clip_ratio/high_max": 0.0021014542944612913, "clip_ratio/high_mean": 0.0008079961935436586, "clip_ratio/low_mean": 0.0007061346605041763, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015141308831516653, "epoch": 14.223906705539358, "grad_norm": 0.14718717336654663, "learning_rate": 5e-07, "loss": -0.0048, "step": 1368 }, { "clip_ratio/high_max": 0.0016814807604532689, "clip_ratio/high_mean": 0.0007234388067445252, "clip_ratio/low_mean": 0.0005992110418446828, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013226498740550596, "epoch": 14.2332361516035, "grad_norm": 0.13984902203083038, "learning_rate": 5e-07, "loss": -0.0093, "step": 1369 }, { "clip_ratio/high_max": 0.0020641638548113406, "clip_ratio/high_mean": 0.0007546481610916089, "clip_ratio/low_mean": 0.0007339928251894889, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001488640955358278, "epoch": 14.242565597667639, "grad_norm": 0.1663520634174347, "learning_rate": 5e-07, "loss": -0.013, "step": 1370 }, { "clip_ratio/high_max": 0.0019430443426244892, "clip_ratio/high_mean": 0.0008447165237157606, "clip_ratio/low_mean": 0.0005813451571157202, "clip_ratio/low_min": 2.3032531316857785e-05, "clip_ratio/region_mean": 0.0014260616844694596, "epoch": 14.251895043731778, "grad_norm": 0.12052075564861298, "learning_rate": 5e-07, "loss": -0.0452, "step": 1371 }, { "clip_ratio/high_max": 0.001973275837372057, "clip_ratio/high_mean": 0.0008123340376187116, "clip_ratio/low_mean": 0.0008067775361269014, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0016191115355468355, "epoch": 14.261224489795918, "grad_norm": 0.1579110026359558, "learning_rate": 5e-07, "loss": 0.0023, "step": 1372 }, { "clip_ratio/high_max": 0.001847626881499309, "clip_ratio/high_mean": 0.0007311796289286576, "clip_ratio/low_mean": 0.0007521607258240692, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014833403765805997, "epoch": 14.270553935860057, "grad_norm": 0.14381858706474304, "learning_rate": 5e-07, "loss": 0.0021, "step": 1373 }, { "clip_ratio/high_max": 0.0014450514172494877, "clip_ratio/high_mean": 0.0006467929761129199, "clip_ratio/low_mean": 0.0005236509691712854, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011704439675668254, "epoch": 14.279883381924199, "grad_norm": 0.18712426722049713, "learning_rate": 5e-07, "loss": -0.0253, "step": 1374 }, { "clip_ratio/high_max": 0.001918302870763, "clip_ratio/high_mean": 0.0007689950580243021, "clip_ratio/low_mean": 0.0005466639377118554, "clip_ratio/low_min": 1.6796560885268264e-05, "clip_ratio/region_mean": 0.0013156589593563695, "epoch": 14.289212827988338, "grad_norm": 0.14740996062755585, "learning_rate": 5e-07, "loss": -0.0072, "step": 1375 }, { "clip_ratio/high_max": 0.0019393369948375039, "clip_ratio/high_mean": 0.0007844365100027062, "clip_ratio/low_mean": 0.00058943389376509, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013738704219576903, "epoch": 14.298542274052478, "grad_norm": 0.13411155343055725, "learning_rate": 5e-07, "loss": -0.0047, "step": 1376 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0494559151785714, "completions/max_length": 4096.0, "completions/max_terminated_length": 4095.0, "completions/mean_length": 744.3768310546875, "completions/mean_terminated_length": 569.9950561523438, "completions/min_length": 89.0, "completions/min_terminated_length": 89.0, "epoch": 14.307871720116617, "grad_norm": 0.16154013574123383, "learning_rate": 5e-07, "loss": -0.0521, "num_tokens": 823263190.0, "reward": 0.6390555500984192, "reward_std": 0.16075482964515686, "rewards/simpleverify_reward/mean": 0.6390555500984192, "rewards/simpleverify_reward/std": 0.48028287291526794, "step": 1377 }, { "clip_ratio/high_max": 0.0016383336114813574, "clip_ratio/high_mean": 0.0006736801897204714, "clip_ratio/low_mean": 0.00042780331818903505, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.00110148352359829, "epoch": 14.317201166180759, "grad_norm": 0.14795222878456116, "learning_rate": 5e-07, "loss": -0.0167, "step": 1378 }, { "clip_ratio/high_max": 0.0020514386778813787, "clip_ratio/high_mean": 0.0008300420158775523, "clip_ratio/low_mean": 0.000563666838388599, "clip_ratio/low_min": 2.606898488011211e-05, "clip_ratio/region_mean": 0.0013937088588136248, "epoch": 14.326530612244898, "grad_norm": 0.16053931415081024, "learning_rate": 5e-07, "loss": -0.0459, "step": 1379 }, { "clip_ratio/high_max": 0.0020812212460441515, "clip_ratio/high_mean": 0.0007848581008147448, "clip_ratio/low_mean": 0.0004270363278919831, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001211894446896622, "epoch": 14.335860058309038, "grad_norm": 0.14361362159252167, "learning_rate": 5e-07, "loss": -0.0596, "step": 1380 }, { "clip_ratio/high_max": 0.0018166306399507448, "clip_ratio/high_mean": 0.0007041022527118912, "clip_ratio/low_mean": 0.0005655826553265797, "clip_ratio/low_min": 1.2854792657890357e-05, "clip_ratio/region_mean": 0.0012696848789346404, "epoch": 14.345189504373177, "grad_norm": 0.14964520931243896, "learning_rate": 5e-07, "loss": -0.027, "step": 1381 }, { "clip_ratio/high_max": 0.001747173366311472, "clip_ratio/high_mean": 0.0007213266999315238, "clip_ratio/low_mean": 0.0005226894345469191, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001244016137206927, "epoch": 14.354518950437317, "grad_norm": 0.1818731129169464, "learning_rate": 5e-07, "loss": 0.0065, "step": 1382 }, { "clip_ratio/high_max": 0.0020761165869771503, "clip_ratio/high_mean": 0.000844153775688028, "clip_ratio/low_mean": 0.0004864827496930957, "clip_ratio/low_min": 2.8841210223617963e-05, "clip_ratio/region_mean": 0.0013306365199241554, "epoch": 14.363848396501458, "grad_norm": 0.1444423496723175, "learning_rate": 5e-07, "loss": -0.0546, "step": 1383 }, { "clip_ratio/high_max": 0.002241321919427719, "clip_ratio/high_mean": 0.0009169743279926479, "clip_ratio/low_mean": 0.0005945247858107905, "clip_ratio/low_min": 1.461646388634108e-05, "clip_ratio/region_mean": 0.0015114991329028271, "epoch": 14.373177842565598, "grad_norm": 0.17745636403560638, "learning_rate": 5e-07, "loss": -0.0027, "step": 1384 }, { "clip_ratio/high_max": 0.001772977688233368, "clip_ratio/high_mean": 0.0007893034708104096, "clip_ratio/low_mean": 0.00039956709360922105, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011888705776073039, "epoch": 14.382507288629737, "grad_norm": 0.14529049396514893, "learning_rate": 5e-07, "loss": -0.0418, "step": 1385 }, { "clip_ratio/high_max": 0.0021247365257295314, "clip_ratio/high_mean": 0.0008590847028244752, "clip_ratio/low_mean": 0.0005674549261129869, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.00142653962757322, "epoch": 14.391836734693877, "grad_norm": 0.14662665128707886, "learning_rate": 5e-07, "loss": -0.0369, "step": 1386 }, { "clip_ratio/high_max": 0.0020029033803439233, "clip_ratio/high_mean": 0.0007551080780103803, "clip_ratio/low_mean": 0.0004936785871905158, "clip_ratio/low_min": 1.5830801203264855e-05, "clip_ratio/region_mean": 0.0012487866297306027, "epoch": 14.401166180758018, "grad_norm": 0.14219318330287933, "learning_rate": 5e-07, "loss": -0.0252, "step": 1387 }, { "clip_ratio/high_max": 0.0019594343357312027, "clip_ratio/high_mean": 0.000809772354841698, "clip_ratio/low_mean": 0.0005697490687452955, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013795214581477921, "epoch": 14.410495626822158, "grad_norm": 0.15110574662685394, "learning_rate": 5e-07, "loss": -0.0463, "step": 1388 }, { "clip_ratio/high_max": 0.0020655262414948083, "clip_ratio/high_mean": 0.0007970095266500721, "clip_ratio/low_mean": 0.0005185476379665488, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013155571723473258, "epoch": 14.419825072886297, "grad_norm": 0.16458779573440552, "learning_rate": 5e-07, "loss": -0.0454, "step": 1389 }, { "clip_ratio/high_max": 0.0018312575011805166, "clip_ratio/high_mean": 0.0007453100115526468, "clip_ratio/low_mean": 0.0006863019734737463, "clip_ratio/low_min": 1.3876554476155434e-05, "clip_ratio/region_mean": 0.0014316119777504355, "epoch": 14.429154518950437, "grad_norm": 0.17948304116725922, "learning_rate": 5e-07, "loss": 0.033, "step": 1390 }, { "clip_ratio/high_max": 0.0019352707313373685, "clip_ratio/high_mean": 0.000769567493989598, "clip_ratio/low_mean": 0.0006442316207539989, "clip_ratio/low_min": 3.4458140362403356e-05, "clip_ratio/region_mean": 0.0014137990801827982, "epoch": 14.438483965014576, "grad_norm": 0.1443287432193756, "learning_rate": 5e-07, "loss": -0.0085, "step": 1391 }, { "clip_ratio/high_max": 0.0019766806944971904, "clip_ratio/high_mean": 0.0006987767337705009, "clip_ratio/low_mean": 0.0006440942006520345, "clip_ratio/low_min": 2.914772085205186e-05, "clip_ratio/region_mean": 0.00134287093533203, "epoch": 14.447813411078718, "grad_norm": 0.1607384979724884, "learning_rate": 5e-07, "loss": 0.02, "step": 1392 }, { "clip_ratio/high_max": 0.0018027112091658637, "clip_ratio/high_mean": 0.0007639094237674726, "clip_ratio/low_mean": 0.0005990161262161564, "clip_ratio/low_min": 2.725936155911768e-05, "clip_ratio/region_mean": 0.00136292557726847, "epoch": 14.457142857142857, "grad_norm": 0.15762482583522797, "learning_rate": 5e-07, "loss": 0.005, "step": 1393 }, { "clip_ratio/high_max": 0.0017597553523955867, "clip_ratio/high_mean": 0.0007307438318093773, "clip_ratio/low_mean": 0.0005507215864781756, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012814654473913833, "epoch": 14.466472303206997, "grad_norm": 0.12722429633140564, "learning_rate": 5e-07, "loss": -0.035, "step": 1394 }, { "clip_ratio/high_max": 0.0021705858889617957, "clip_ratio/high_mean": 0.0008352483873750316, "clip_ratio/low_mean": 0.0005327849567038356, "clip_ratio/low_min": 1.1340954188199248e-05, "clip_ratio/region_mean": 0.001368033372273203, "epoch": 14.475801749271136, "grad_norm": 0.14285461604595184, "learning_rate": 5e-07, "loss": -0.0368, "step": 1395 }, { "clip_ratio/high_max": 0.0017187315679620951, "clip_ratio/high_mean": 0.0007083421241986798, "clip_ratio/low_mean": 0.0006815245235429757, "clip_ratio/low_min": 2.4281865989905782e-05, "clip_ratio/region_mean": 0.0013898666293243878, "epoch": 14.485131195335278, "grad_norm": 0.14174261689186096, "learning_rate": 5e-07, "loss": 0.0101, "step": 1396 }, { "clip_ratio/high_max": 0.0019329160422785208, "clip_ratio/high_mean": 0.0007691861774219433, "clip_ratio/low_mean": 0.0007659903640160337, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015351765396189876, "epoch": 14.494460641399417, "grad_norm": 0.19730588793754578, "learning_rate": 5e-07, "loss": 0.0161, "step": 1397 }, { "clip_ratio/high_max": 0.0019407533836783841, "clip_ratio/high_mean": 0.0007267417713592295, "clip_ratio/low_mean": 0.0006765011344214145, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014032429207873065, "epoch": 14.503790087463557, "grad_norm": 0.20793960988521576, "learning_rate": 5e-07, "loss": 0.0176, "step": 1398 }, { "clip_ratio/high_max": 0.0021061936786281876, "clip_ratio/high_mean": 0.0007923213861431577, "clip_ratio/low_mean": 0.0005114076639074483, "clip_ratio/low_min": 1.4240145901567303e-05, "clip_ratio/region_mean": 0.0013037290809734259, "epoch": 14.513119533527696, "grad_norm": 0.13702943921089172, "learning_rate": 5e-07, "loss": -0.0289, "step": 1399 }, { "clip_ratio/high_max": 0.0016988787974696606, "clip_ratio/high_mean": 0.0006605909320569481, "clip_ratio/low_mean": 0.0007067375390761299, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013673284447577316, "epoch": 14.522448979591836, "grad_norm": 0.14462846517562866, "learning_rate": 5e-07, "loss": 0.0152, "step": 1400 }, { "clip_ratio/high_max": 0.002064587144559482, "clip_ratio/high_mean": 0.0008607772142568137, "clip_ratio/low_mean": 0.000631545461146743, "clip_ratio/low_min": 3.9399886190949474e-05, "clip_ratio/region_mean": 0.001492322662670631, "epoch": 14.531778425655977, "grad_norm": 0.14725172519683838, "learning_rate": 5e-07, "loss": -0.0482, "step": 1401 }, { "clip_ratio/high_max": 0.0019268271935288794, "clip_ratio/high_mean": 0.0007891183904575882, "clip_ratio/low_mean": 0.0007006071009527659, "clip_ratio/low_min": 2.773947471723659e-05, "clip_ratio/region_mean": 0.0014897255023242906, "epoch": 14.541107871720117, "grad_norm": 0.1451755315065384, "learning_rate": 5e-07, "loss": -0.0125, "step": 1402 }, { "clip_ratio/high_max": 0.0018210216367151588, "clip_ratio/high_mean": 0.0007431662143062567, "clip_ratio/low_mean": 0.0006571032495230611, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014002694733790122, "epoch": 14.550437317784256, "grad_norm": 0.14866766333580017, "learning_rate": 5e-07, "loss": 0.0069, "step": 1403 }, { "clip_ratio/high_max": 0.0024232780560851097, "clip_ratio/high_mean": 0.0009303739425376989, "clip_ratio/low_mean": 0.00061995174655749, "clip_ratio/low_min": 1.5367591913673095e-05, "clip_ratio/region_mean": 0.0015503256872761995, "epoch": 14.559766763848396, "grad_norm": 0.14477083086967468, "learning_rate": 5e-07, "loss": -0.0207, "step": 1404 }, { "clip_ratio/high_max": 0.0015612582392350305, "clip_ratio/high_mean": 0.0005899983152630739, "clip_ratio/low_mean": 0.00063557193698216, "clip_ratio/low_min": 3.4301159757887945e-05, "clip_ratio/region_mean": 0.0012255702895345166, "epoch": 14.569096209912537, "grad_norm": 0.16697290539741516, "learning_rate": 5e-07, "loss": 0.048, "step": 1405 }, { "clip_ratio/high_max": 0.001865966674813535, "clip_ratio/high_mean": 0.0007516646874137223, "clip_ratio/low_mean": 0.0007124642997951014, "clip_ratio/low_min": 2.1701389414374717e-05, "clip_ratio/region_mean": 0.0014641289744758978, "epoch": 14.578425655976677, "grad_norm": 0.16455332934856415, "learning_rate": 5e-07, "loss": 0.0133, "step": 1406 }, { "clip_ratio/high_max": 0.0021946478009340353, "clip_ratio/high_mean": 0.0008526764995622216, "clip_ratio/low_mean": 0.0006896909635543125, "clip_ratio/low_min": 1.9154153051204048e-05, "clip_ratio/region_mean": 0.001542367463116534, "epoch": 14.587755102040816, "grad_norm": 0.14440160989761353, "learning_rate": 5e-07, "loss": -0.0306, "step": 1407 }, { "clip_ratio/high_max": 0.0018153817400161643, "clip_ratio/high_mean": 0.0007176012277341215, "clip_ratio/low_mean": 0.0006614922722292249, "clip_ratio/low_min": 1.406707178830402e-05, "clip_ratio/region_mean": 0.0013790935518045444, "epoch": 14.597084548104956, "grad_norm": 0.1490182727575302, "learning_rate": 5e-07, "loss": 0.002, "step": 1408 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.049351283482142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 4029.0, "completions/mean_length": 739.3182373046875, "completions/mean_terminated_length": 565.0618286132812, "completions/min_length": 95.0, "completions/min_terminated_length": 95.0, "epoch": 14.606413994169095, "grad_norm": 0.157324880361557, "learning_rate": 5e-07, "loss": 0.0239, "num_tokens": 841580033.0, "reward": 0.6443917751312256, "reward_std": 0.1516842246055603, "rewards/simpleverify_reward/mean": 0.6443917155265808, "rewards/simpleverify_reward/std": 0.4787055552005768, "step": 1409 }, { "clip_ratio/high_max": 0.0018253466114401817, "clip_ratio/high_mean": 0.0006944761939848831, "clip_ratio/low_mean": 0.0004113722343390691, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011058484324166784, "epoch": 14.615743440233237, "grad_norm": 0.13707834482192993, "learning_rate": 5e-07, "loss": -0.0406, "step": 1410 }, { "clip_ratio/high_max": 0.0019334373791934922, "clip_ratio/high_mean": 0.0007220865900308127, "clip_ratio/low_mean": 0.00048322384191124, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001205310403747717, "epoch": 14.625072886297376, "grad_norm": 0.1709345281124115, "learning_rate": 5e-07, "loss": -0.0045, "step": 1411 }, { "clip_ratio/high_max": 0.0017885494962683879, "clip_ratio/high_mean": 0.0007162028305174317, "clip_ratio/low_mean": 0.0004852104375459021, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012014132444164716, "epoch": 14.634402332361516, "grad_norm": 0.15374408662319183, "learning_rate": 5e-07, "loss": -0.0156, "step": 1412 }, { "clip_ratio/high_max": 0.0015632610593456775, "clip_ratio/high_mean": 0.0005954244243184803, "clip_ratio/low_mean": 0.0005167118342797039, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011121362549602054, "epoch": 14.643731778425655, "grad_norm": 0.15600575506687164, "learning_rate": 5e-07, "loss": -0.0182, "step": 1413 }, { "clip_ratio/high_max": 0.0017665198138274718, "clip_ratio/high_mean": 0.0006061392305127811, "clip_ratio/low_mean": 0.0004438308042153949, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010499700401851442, "epoch": 14.653061224489797, "grad_norm": 0.15446312725543976, "learning_rate": 5e-07, "loss": -0.0109, "step": 1414 }, { "clip_ratio/high_max": 0.0015713251705165021, "clip_ratio/high_mean": 0.0006706859367113793, "clip_ratio/low_mean": 0.0004592031282300013, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001129889042204013, "epoch": 14.662390670553936, "grad_norm": 0.1624457687139511, "learning_rate": 5e-07, "loss": -0.0001, "step": 1415 }, { "clip_ratio/high_max": 0.001655785792536335, "clip_ratio/high_mean": 0.0006395423406502232, "clip_ratio/low_mean": 0.00047354347407235764, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.00111308584746439, "epoch": 14.671720116618076, "grad_norm": 0.14467543363571167, "learning_rate": 5e-07, "loss": -0.0226, "step": 1416 }, { "clip_ratio/high_max": 0.0019041521154576913, "clip_ratio/high_mean": 0.0006819912587161525, "clip_ratio/low_mean": 0.0005242142560746288, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001206205524795223, "epoch": 14.681049562682215, "grad_norm": 0.1587066650390625, "learning_rate": 5e-07, "loss": 0.034, "step": 1417 }, { "clip_ratio/high_max": 0.0017631333939789329, "clip_ratio/high_mean": 0.0007374755768978503, "clip_ratio/low_mean": 0.0004812030201719608, "clip_ratio/low_min": 1.0009609468397684e-05, "clip_ratio/region_mean": 0.0012186786116217263, "epoch": 14.690379008746355, "grad_norm": 0.1373339146375656, "learning_rate": 5e-07, "loss": -0.0806, "step": 1418 }, { "clip_ratio/high_max": 0.001686806837824406, "clip_ratio/high_mean": 0.0006144712006062036, "clip_ratio/low_mean": 0.00044520559094962664, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010596768224786501, "epoch": 14.699708454810496, "grad_norm": 6.117257118225098, "learning_rate": 5e-07, "loss": -0.0373, "step": 1419 }, { "clip_ratio/high_max": 0.0019000153770321049, "clip_ratio/high_mean": 0.0008597463292971952, "clip_ratio/low_mean": 0.0005312622306519188, "clip_ratio/low_min": 2.6013635761046316e-05, "clip_ratio/region_mean": 0.0013910085290262941, "epoch": 14.709037900874636, "grad_norm": 0.1844167411327362, "learning_rate": 5e-07, "loss": -0.0723, "step": 1420 }, { "clip_ratio/high_max": 0.0018908808451669756, "clip_ratio/high_mean": 0.0007792000324116088, "clip_ratio/low_mean": 0.0005274930276755185, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013066930732748006, "epoch": 14.718367346938775, "grad_norm": 0.17280332744121552, "learning_rate": 5e-07, "loss": -0.0565, "step": 1421 }, { "clip_ratio/high_max": 0.0019468955106276553, "clip_ratio/high_mean": 0.0006544854950334411, "clip_ratio/low_mean": 0.0005989508503034813, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012534363668237347, "epoch": 14.727696793002915, "grad_norm": 0.13851264119148254, "learning_rate": 5e-07, "loss": -0.0383, "step": 1422 }, { "clip_ratio/high_max": 0.0018685278337216005, "clip_ratio/high_mean": 0.0008310026296385331, "clip_ratio/low_mean": 0.0005463931875056005, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001377395834424533, "epoch": 14.737026239067056, "grad_norm": 0.13662144541740417, "learning_rate": 5e-07, "loss": -0.0564, "step": 1423 }, { "clip_ratio/high_max": 0.0017645692933001556, "clip_ratio/high_mean": 0.0007023406451480696, "clip_ratio/low_mean": 0.0006236983299459098, "clip_ratio/low_min": 1.1848341273434926e-05, "clip_ratio/region_mean": 0.0013260390041978098, "epoch": 14.746355685131196, "grad_norm": 0.14711728692054749, "learning_rate": 5e-07, "loss": -0.0094, "step": 1424 }, { "clip_ratio/high_max": 0.0018198601974290796, "clip_ratio/high_mean": 0.0006015751309860207, "clip_ratio/low_mean": 0.0006098216781538213, "clip_ratio/low_min": 2.1389460016507655e-05, "clip_ratio/region_mean": 0.0012113968223275151, "epoch": 14.755685131195335, "grad_norm": 0.1591993123292923, "learning_rate": 5e-07, "loss": 0.0066, "step": 1425 }, { "clip_ratio/high_max": 0.002349659596802667, "clip_ratio/high_mean": 0.0008927821945690084, "clip_ratio/low_mean": 0.0006925969737494597, "clip_ratio/low_min": 3.1370256692753173e-05, "clip_ratio/region_mean": 0.0015853791192057543, "epoch": 14.765014577259475, "grad_norm": 0.16494101285934448, "learning_rate": 5e-07, "loss": -0.0333, "step": 1426 }, { "clip_ratio/high_max": 0.0017567032118677162, "clip_ratio/high_mean": 0.0007262928675118019, "clip_ratio/low_mean": 0.0005314975178407622, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012577903544297442, "epoch": 14.774344023323614, "grad_norm": 0.15009595453739166, "learning_rate": 5e-07, "loss": -0.0329, "step": 1427 }, { "clip_ratio/high_max": 0.0018006103964580689, "clip_ratio/high_mean": 0.0006360772240441293, "clip_ratio/low_mean": 0.0007216846752271522, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013577618992712814, "epoch": 14.783673469387756, "grad_norm": 0.19137035310268402, "learning_rate": 5e-07, "loss": -0.0, "step": 1428 }, { "clip_ratio/high_max": 0.0017619526624912396, "clip_ratio/high_mean": 0.0007100796347003779, "clip_ratio/low_mean": 0.0007473806053894805, "clip_ratio/low_min": 1.829089887905866e-05, "clip_ratio/region_mean": 0.001457460235542385, "epoch": 14.793002915451895, "grad_norm": 0.13169540464878082, "learning_rate": 5e-07, "loss": -0.0231, "step": 1429 }, { "clip_ratio/high_max": 0.0020507002700469457, "clip_ratio/high_mean": 0.0007915288897493156, "clip_ratio/low_mean": 0.0006843389846835635, "clip_ratio/low_min": 5.991337093291804e-05, "clip_ratio/region_mean": 0.0014758679026272148, "epoch": 14.802332361516035, "grad_norm": 0.14691326022148132, "learning_rate": 5e-07, "loss": -0.0317, "step": 1430 }, { "clip_ratio/high_max": 0.001864183559519006, "clip_ratio/high_mean": 0.000722110544302268, "clip_ratio/low_mean": 0.0006262959350351593, "clip_ratio/low_min": 1.6648908058414236e-05, "clip_ratio/region_mean": 0.0013484064911608584, "epoch": 14.811661807580174, "grad_norm": 0.2076224833726883, "learning_rate": 5e-07, "loss": -0.012, "step": 1431 }, { "clip_ratio/high_max": 0.0018426175483909901, "clip_ratio/high_mean": 0.0007544089839939261, "clip_ratio/low_mean": 0.0006194425877765752, "clip_ratio/low_min": 2.0836805560975336e-05, "clip_ratio/region_mean": 0.0013738516026933212, "epoch": 14.820991253644316, "grad_norm": 0.12682709097862244, "learning_rate": 5e-07, "loss": -0.0376, "step": 1432 }, { "clip_ratio/high_max": 0.002026880560151767, "clip_ratio/high_mean": 0.000836177609016886, "clip_ratio/low_mean": 0.0006123280290921684, "clip_ratio/low_min": 5.259236058918759e-05, "clip_ratio/region_mean": 0.0014485055799013935, "epoch": 14.830320699708455, "grad_norm": 0.1357240229845047, "learning_rate": 5e-07, "loss": -0.0493, "step": 1433 }, { "clip_ratio/high_max": 0.0015898891870165244, "clip_ratio/high_mean": 0.0005927198610606865, "clip_ratio/low_mean": 0.0006646291458309861, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001257349009392783, "epoch": 14.839650145772595, "grad_norm": 0.15676145255565643, "learning_rate": 5e-07, "loss": -0.0192, "step": 1434 }, { "clip_ratio/high_max": 0.0017011879644996952, "clip_ratio/high_mean": 0.0006422964361263439, "clip_ratio/low_mean": 0.0007108896224963246, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013531860458897427, "epoch": 14.848979591836734, "grad_norm": 0.15646827220916748, "learning_rate": 5e-07, "loss": 0.015, "step": 1435 }, { "clip_ratio/high_max": 0.0017633474089961965, "clip_ratio/high_mean": 0.0007145444233174203, "clip_ratio/low_mean": 0.0006272408008953789, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013417852569546085, "epoch": 14.858309037900874, "grad_norm": 0.1386570781469345, "learning_rate": 5e-07, "loss": 0.02, "step": 1436 }, { "clip_ratio/high_max": 0.0019783047755481675, "clip_ratio/high_mean": 0.0006980582675168989, "clip_ratio/low_mean": 0.00067468554880179, "clip_ratio/low_min": 2.4224806111305952e-05, "clip_ratio/region_mean": 0.0013727438017667737, "epoch": 14.867638483965015, "grad_norm": 0.17861558496952057, "learning_rate": 5e-07, "loss": 0.005, "step": 1437 }, { "clip_ratio/high_max": 0.0017667601696302881, "clip_ratio/high_mean": 0.0006884819231345318, "clip_ratio/low_mean": 0.0007140727648220491, "clip_ratio/low_min": 4.297936629882315e-05, "clip_ratio/region_mean": 0.0014025546770426445, "epoch": 14.876967930029155, "grad_norm": 0.14605605602264404, "learning_rate": 5e-07, "loss": 0.0243, "step": 1438 }, { "clip_ratio/high_max": 0.001512851635197876, "clip_ratio/high_mean": 0.0006017428431732696, "clip_ratio/low_mean": 0.0006487096534328884, "clip_ratio/low_min": 2.5548867597535718e-05, "clip_ratio/region_mean": 0.0012504525111580733, "epoch": 14.886297376093294, "grad_norm": 0.13912726938724518, "learning_rate": 5e-07, "loss": 0.0197, "step": 1439 }, { "clip_ratio/high_max": 0.0019153435896441806, "clip_ratio/high_mean": 0.0007604498250657343, "clip_ratio/low_mean": 0.0007132673836167669, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014737172168679535, "epoch": 14.895626822157434, "grad_norm": 0.1615169644355774, "learning_rate": 5e-07, "loss": -0.019, "step": 1440 }, { "epoch": 14.895626822157434, "step": 1440, "total_flos": 0.0, "train_loss": -0.0037147246307250095, "train_runtime": 83704.5789, "train_samples_per_second": 17.127, "train_steps_per_second": 0.019 } ], "logging_steps": 1, "max_steps": 1600, "num_input_tokens_seen": 841580033, "num_train_epochs": 15, "save_steps": 160, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }