{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 7.89588801399825, "eval_steps": 500, "global_step": 1536, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015764508928571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 4069.0, "completions/mean_length": 603.188720703125, "completions/mean_terminated_length": 547.2442626953125, "completions/min_length": 4.0, "completions/min_terminated_length": 4.0, "epoch": 0.004666083406240887, "grad_norm": 0.16825807094573975, "learning_rate": 1e-06, "loss": -0.0403, "num_tokens": 9160265.0, "reward": 0.4989537000656128, "reward_std": 0.2556326985359192, "rewards/simpleverify_reward/mean": 0.4989536702632904, "rewards/simpleverify_reward/std": 0.5000162720680237, "step": 1 }, { "clip_ratio/high_max": 0.0022431204488384537, "clip_ratio/high_mean": 0.0009628464467823505, "clip_ratio/low_mean": 0.0006316744875221048, "clip_ratio/low_min": 1.3116474292473868e-05, "clip_ratio/region_mean": 0.0015945209524943493, "epoch": 0.009332166812481774, "grad_norm": 0.15917454659938812, "learning_rate": 1e-06, "loss": 0.0233, "step": 2 }, { "clip_ratio/high_max": 0.0024326741622644477, "clip_ratio/high_mean": 0.0011160007470607525, "clip_ratio/low_mean": 0.0007444577131536789, "clip_ratio/low_min": 5.975654494250193e-05, "clip_ratio/region_mean": 0.0018604584111017175, "epoch": 0.01399825021872266, "grad_norm": 0.12686820328235626, "learning_rate": 1e-06, "loss": -0.0208, "step": 3 }, { "clip_ratio/high_max": 0.0024386195655097254, "clip_ratio/high_mean": 0.0011106257916253526, "clip_ratio/low_mean": 0.0009062907756742788, "clip_ratio/low_min": 8.275225536635844e-05, "clip_ratio/region_mean": 0.002016916587308515, "epoch": 0.018664333624963548, "grad_norm": 0.1381240040063858, "learning_rate": 1e-06, "loss": 0.06, "step": 4 }, { "clip_ratio/high_max": 0.002695557937840931, "clip_ratio/high_mean": 0.0011688486374623608, "clip_ratio/low_mean": 0.000957150463364087, "clip_ratio/low_min": 9.766997663973598e-05, "clip_ratio/region_mean": 0.0021259991044644266, "epoch": 0.023330417031204434, "grad_norm": 0.13304825127124786, "learning_rate": 1e-06, "loss": -0.0109, "step": 5 }, { "clip_ratio/high_max": 0.0027244676457485184, "clip_ratio/high_mean": 0.001221334961883258, "clip_ratio/low_mean": 0.0012220956159580965, "clip_ratio/low_min": 0.0001768647343851626, "clip_ratio/region_mean": 0.0024434305960312486, "epoch": 0.02799650043744532, "grad_norm": 0.13083291053771973, "learning_rate": 1e-06, "loss": -0.0095, "step": 6 }, { "clip_ratio/high_max": 0.0027735916155506857, "clip_ratio/high_mean": 0.0013042515602137428, "clip_ratio/low_mean": 0.0013594048214145005, "clip_ratio/low_min": 0.00015477509259653743, "clip_ratio/region_mean": 0.0026636563998181373, "epoch": 0.032662583843686206, "grad_norm": 0.12845644354820251, "learning_rate": 1e-06, "loss": -0.0029, "step": 7 }, { "clip_ratio/high_max": 0.003233741706935689, "clip_ratio/high_mean": 0.0015223221234919038, "clip_ratio/low_mean": 0.001188874084618874, "clip_ratio/low_min": 0.00014168537745717913, "clip_ratio/region_mean": 0.002711196182644926, "epoch": 0.037328667249927096, "grad_norm": 0.13185247778892517, "learning_rate": 1e-06, "loss": -0.0489, "step": 8 }, { "clip_ratio/high_max": 0.003264270315412432, "clip_ratio/high_mean": 0.0015198703003989067, "clip_ratio/low_mean": 0.0012674927584157558, "clip_ratio/low_min": 0.00025792716405703686, "clip_ratio/region_mean": 0.002787363038805779, "epoch": 0.04199475065616798, "grad_norm": 0.1370556801557541, "learning_rate": 1e-06, "loss": -0.0241, "step": 9 }, { "clip_ratio/high_max": 0.003038945986190811, "clip_ratio/high_mean": 0.0013063992992101703, "clip_ratio/low_mean": 0.0012288918114791159, "clip_ratio/low_min": 0.00022937654102861416, "clip_ratio/region_mean": 0.0025352911397931166, "epoch": 0.04666083406240887, "grad_norm": 0.13925476372241974, "learning_rate": 1e-06, "loss": 0.0092, "step": 10 }, { "clip_ratio/high_max": 0.002494553707947489, "clip_ratio/high_mean": 0.0012013574669254012, "clip_ratio/low_mean": 0.0013136883826518897, "clip_ratio/low_min": 0.0003180843195877969, "clip_ratio/region_mean": 0.0025150458968710154, "epoch": 0.05132691746864975, "grad_norm": 0.13018232583999634, "learning_rate": 1e-06, "loss": 0.0019, "step": 11 }, { "clip_ratio/high_max": 0.002584379901236389, "clip_ratio/high_mean": 0.0012154537653259467, "clip_ratio/low_mean": 0.0011643258076219354, "clip_ratio/low_min": 0.00020645100994443055, "clip_ratio/region_mean": 0.0023797796384315006, "epoch": 0.05599300087489064, "grad_norm": 0.12660029530525208, "learning_rate": 1e-06, "loss": 0.0087, "step": 12 }, { "clip_ratio/high_max": 0.002588285831734538, "clip_ratio/high_mean": 0.0012208971274958458, "clip_ratio/low_mean": 0.0010608917182253208, "clip_ratio/low_min": 7.698153967794497e-05, "clip_ratio/region_mean": 0.002281788816617336, "epoch": 0.06065908428113152, "grad_norm": 0.12888728082180023, "learning_rate": 1e-06, "loss": 0.0125, "step": 13 }, { "clip_ratio/high_max": 0.002168842482205946, "clip_ratio/high_mean": 0.0010724798994488083, "clip_ratio/low_mean": 0.0012188458531454671, "clip_ratio/low_min": 0.0003006507567988592, "clip_ratio/region_mean": 0.0022913257125765085, "epoch": 0.06532516768737241, "grad_norm": 0.12231365591287613, "learning_rate": 1e-06, "loss": 0.0022, "step": 14 }, { "clip_ratio/high_max": 0.002738081937422976, "clip_ratio/high_mean": 0.0012902747293992434, "clip_ratio/low_mean": 0.0010534348512010183, "clip_ratio/low_min": 0.00014821432250755606, "clip_ratio/region_mean": 0.0023437095442204736, "epoch": 0.0699912510936133, "grad_norm": 0.14451473951339722, "learning_rate": 1e-06, "loss": -0.0254, "step": 15 }, { "clip_ratio/high_max": 0.002559307074989192, "clip_ratio/high_mean": 0.0010771507968456717, "clip_ratio/low_mean": 0.0010660523912520148, "clip_ratio/low_min": 0.00013991356991027715, "clip_ratio/region_mean": 0.002143203171726782, "epoch": 0.07465733449985419, "grad_norm": 0.1317187398672104, "learning_rate": 1e-06, "loss": 0.0326, "step": 16 }, { "clip_ratio/high_max": 0.0035354366118554026, "clip_ratio/high_mean": 0.001526299245597329, "clip_ratio/low_mean": 0.001160885349236196, "clip_ratio/low_min": 9.711422944747028e-05, "clip_ratio/region_mean": 0.002687184649403207, "epoch": 0.07932341790609507, "grad_norm": 0.12933936715126038, "learning_rate": 1e-06, "loss": -0.0408, "step": 17 }, { "clip_ratio/high_max": 0.002537498548917938, "clip_ratio/high_mean": 0.0011709105401678244, "clip_ratio/low_mean": 0.0012086608294339385, "clip_ratio/low_min": 0.00017532482615933986, "clip_ratio/region_mean": 0.0023795713495928794, "epoch": 0.08398950131233596, "grad_norm": 0.12360536307096481, "learning_rate": 1e-06, "loss": 0.0229, "step": 18 }, { "clip_ratio/high_max": 0.0028355855683912523, "clip_ratio/high_mean": 0.0013620893587358296, "clip_ratio/low_mean": 0.0010958041602862068, "clip_ratio/low_min": 0.00014026590724824928, "clip_ratio/region_mean": 0.002457893548125867, "epoch": 0.08865558471857685, "grad_norm": 0.11669901013374329, "learning_rate": 1e-06, "loss": -0.0212, "step": 19 }, { "clip_ratio/high_max": 0.0028455474093789235, "clip_ratio/high_mean": 0.0012413982149155345, "clip_ratio/low_mean": 0.0013979854811623227, "clip_ratio/low_min": 0.00011787635321525158, "clip_ratio/region_mean": 0.0026393836669740267, "epoch": 0.09332166812481774, "grad_norm": 0.12858091294765472, "learning_rate": 1e-06, "loss": 0.0595, "step": 20 }, { "clip_ratio/high_max": 0.003188476213836111, "clip_ratio/high_mean": 0.0014160720565996598, "clip_ratio/low_mean": 0.0012823242032027338, "clip_ratio/low_min": 0.00020925595254084328, "clip_ratio/region_mean": 0.002698396216146648, "epoch": 0.09798775153105861, "grad_norm": 0.12983207404613495, "learning_rate": 1e-06, "loss": -0.0113, "step": 21 }, { "clip_ratio/high_max": 0.0028564574677147903, "clip_ratio/high_mean": 0.0013004683787585236, "clip_ratio/low_mean": 0.0011537495229276828, "clip_ratio/low_min": 0.0001961843572644284, "clip_ratio/region_mean": 0.0024542179380659945, "epoch": 0.1026538349372995, "grad_norm": 0.12031391263008118, "learning_rate": 1e-06, "loss": -0.0098, "step": 22 }, { "clip_ratio/high_max": 0.002843314148776699, "clip_ratio/high_mean": 0.00134471870114794, "clip_ratio/low_mean": 0.0012956337413925212, "clip_ratio/low_min": 5.7733907851797994e-05, "clip_ratio/region_mean": 0.0026403524607303552, "epoch": 0.10731991834354039, "grad_norm": 0.1164054349064827, "learning_rate": 1e-06, "loss": -0.0032, "step": 23 }, { "clip_ratio/high_max": 0.0032875677716219798, "clip_ratio/high_mean": 0.0014806390063313302, "clip_ratio/low_mean": 0.0011173126149515156, "clip_ratio/low_min": 7.565274154330837e-05, "clip_ratio/region_mean": 0.0025979516794905066, "epoch": 0.11198600174978128, "grad_norm": 0.13383948802947998, "learning_rate": 1e-06, "loss": -0.0492, "step": 24 }, { "clip_ratio/high_max": 0.0032452556770294905, "clip_ratio/high_mean": 0.0015567433474643622, "clip_ratio/low_mean": 0.0013572097213909728, "clip_ratio/low_min": 0.0002881341588363284, "clip_ratio/region_mean": 0.0029139530088286847, "epoch": 0.11665208515602217, "grad_norm": 0.12567749619483948, "learning_rate": 1e-06, "loss": -0.0245, "step": 25 }, { "clip_ratio/high_max": 0.002892524833441712, "clip_ratio/high_mean": 0.00136099345763796, "clip_ratio/low_mean": 0.0014545994272339158, "clip_ratio/low_min": 0.00016666963801981183, "clip_ratio/region_mean": 0.0028155929103377275, "epoch": 0.12131816856226305, "grad_norm": 0.12536926567554474, "learning_rate": 1e-06, "loss": 0.0088, "step": 26 }, { "clip_ratio/high_max": 0.00290807215787936, "clip_ratio/high_mean": 0.0013549137729569338, "clip_ratio/low_mean": 0.001522806011053035, "clip_ratio/low_min": 0.00029649123280250933, "clip_ratio/region_mean": 0.002877719874959439, "epoch": 0.12598425196850394, "grad_norm": 0.12423344701528549, "learning_rate": 1e-06, "loss": 0.0015, "step": 27 }, { "clip_ratio/high_max": 0.002823303402692545, "clip_ratio/high_mean": 0.0013742239534622058, "clip_ratio/low_mean": 0.0015396033850265667, "clip_ratio/low_min": 0.00020346302972029662, "clip_ratio/region_mean": 0.002913827273005154, "epoch": 0.13065033537474482, "grad_norm": 0.12013071775436401, "learning_rate": 1e-06, "loss": 0.0084, "step": 28 }, { "clip_ratio/high_max": 0.0029015862819505855, "clip_ratio/high_mean": 0.0014054770181246568, "clip_ratio/low_mean": 0.0014502845551760402, "clip_ratio/low_min": 0.00013019296420679893, "clip_ratio/region_mean": 0.0028557615587487817, "epoch": 0.13531641878098571, "grad_norm": 0.12109560519456863, "learning_rate": 1e-06, "loss": 0.0122, "step": 29 }, { "clip_ratio/high_max": 0.0030319598299684003, "clip_ratio/high_mean": 0.0013882502607884817, "clip_ratio/low_mean": 0.0016965941358648706, "clip_ratio/low_min": 0.000465552186142304, "clip_ratio/region_mean": 0.0030848444293951616, "epoch": 0.1399825021872266, "grad_norm": 0.1187513992190361, "learning_rate": 1e-06, "loss": 0.0018, "step": 30 }, { "clip_ratio/high_max": 0.003424561276915483, "clip_ratio/high_mean": 0.001583503617439419, "clip_ratio/low_mean": 0.0015595244040014222, "clip_ratio/low_min": 0.00025373905373271555, "clip_ratio/region_mean": 0.0031430279414053075, "epoch": 0.1446485855934675, "grad_norm": 0.14122727513313293, "learning_rate": 1e-06, "loss": -0.0257, "step": 31 }, { "clip_ratio/high_max": 0.0030605307983933017, "clip_ratio/high_mean": 0.001422762838046765, "clip_ratio/low_mean": 0.001624619500944391, "clip_ratio/low_min": 0.00013118834613123909, "clip_ratio/region_mean": 0.0030473822698695585, "epoch": 0.14931466899970838, "grad_norm": 0.12433193624019623, "learning_rate": 1e-06, "loss": 0.0322, "step": 32 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.013881138392857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 4041.0, "completions/mean_length": 611.9447021484375, "completions/mean_terminated_length": 562.9012451171875, "completions/min_length": 2.0, "completions/min_terminated_length": 2.0, "epoch": 0.15398075240594924, "grad_norm": 0.13136796653270721, "learning_rate": 1e-06, "loss": -0.0096, "num_tokens": 18527584.0, "reward": 0.5294364094734192, "reward_std": 0.2430260330438614, "rewards/simpleverify_reward/mean": 0.5294364094734192, "rewards/simpleverify_reward/std": 0.4991501271724701, "step": 33 }, { "clip_ratio/high_max": 0.0023058708175085485, "clip_ratio/high_mean": 0.0010608333941490855, "clip_ratio/low_mean": 0.0006683274905299186, "clip_ratio/low_min": 2.6147899006900843e-05, "clip_ratio/region_mean": 0.0017291609110543504, "epoch": 0.15864683581219013, "grad_norm": 0.14291034638881683, "learning_rate": 1e-06, "loss": 0.0104, "step": 34 }, { "clip_ratio/high_max": 0.0024183506538975053, "clip_ratio/high_mean": 0.001051281866239151, "clip_ratio/low_mean": 0.0006239201175048947, "clip_ratio/low_min": 3.9680769987171516e-05, "clip_ratio/region_mean": 0.001675202016485855, "epoch": 0.16331291921843102, "grad_norm": 0.13595029711723328, "learning_rate": 1e-06, "loss": -0.0099, "step": 35 }, { "clip_ratio/high_max": 0.002563152498623822, "clip_ratio/high_mean": 0.0010042754547612276, "clip_ratio/low_mean": 0.000609123410868051, "clip_ratio/low_min": 4.9355025112163275e-05, "clip_ratio/region_mean": 0.0016133989120135084, "epoch": 0.1679790026246719, "grad_norm": 0.1255405843257904, "learning_rate": 1e-06, "loss": -0.0064, "step": 36 }, { "clip_ratio/high_max": 0.002357198747631628, "clip_ratio/high_mean": 0.0010236527887172997, "clip_ratio/low_mean": 0.0007413587427436141, "clip_ratio/low_min": 4.127016381971771e-05, "clip_ratio/region_mean": 0.0017650115405558608, "epoch": 0.1726450860309128, "grad_norm": 0.1268312633037567, "learning_rate": 1e-06, "loss": 0.0199, "step": 37 }, { "clip_ratio/high_max": 0.002156820279196836, "clip_ratio/high_mean": 0.0010005908807215746, "clip_ratio/low_mean": 0.0007532192448707065, "clip_ratio/low_min": 7.858727713028202e-05, "clip_ratio/region_mean": 0.0017538101237732917, "epoch": 0.1773111694371537, "grad_norm": 0.14367766678333282, "learning_rate": 1e-06, "loss": -0.0034, "step": 38 }, { "clip_ratio/high_max": 0.002738971532380674, "clip_ratio/high_mean": 0.0011742145397874992, "clip_ratio/low_mean": 0.000790875581515138, "clip_ratio/low_min": 6.495592515420867e-05, "clip_ratio/region_mean": 0.0019650901085697114, "epoch": 0.18197725284339458, "grad_norm": 0.13926085829734802, "learning_rate": 1e-06, "loss": -0.018, "step": 39 }, { "clip_ratio/high_max": 0.0023046898932079785, "clip_ratio/high_mean": 0.0011357082203176105, "clip_ratio/low_mean": 0.0009107508940360276, "clip_ratio/low_min": 0.00013045053947280394, "clip_ratio/region_mean": 0.002046459099801723, "epoch": 0.18664333624963547, "grad_norm": 0.1315917670726776, "learning_rate": 1e-06, "loss": -0.0079, "step": 40 }, { "clip_ratio/high_max": 0.0023842503360356204, "clip_ratio/high_mean": 0.00112566656753188, "clip_ratio/low_mean": 0.0010515346421016147, "clip_ratio/low_min": 0.00015354276274592848, "clip_ratio/region_mean": 0.002177201175072696, "epoch": 0.19130941965587636, "grad_norm": 0.13507364690303802, "learning_rate": 1e-06, "loss": -0.0215, "step": 41 }, { "clip_ratio/high_max": 0.0026018995922640897, "clip_ratio/high_mean": 0.00112384133899468, "clip_ratio/low_mean": 0.001048213771355222, "clip_ratio/low_min": 0.0001181974903374794, "clip_ratio/region_mean": 0.0021720550503232516, "epoch": 0.19597550306211722, "grad_norm": 0.11782612651586533, "learning_rate": 1e-06, "loss": 0.0084, "step": 42 }, { "clip_ratio/high_max": 0.0026973372441716492, "clip_ratio/high_mean": 0.001119166085118195, "clip_ratio/low_mean": 0.0011986727222392801, "clip_ratio/low_min": 0.00011101602831331547, "clip_ratio/region_mean": 0.0023178388437372632, "epoch": 0.2006415864683581, "grad_norm": 0.1235799640417099, "learning_rate": 1e-06, "loss": 0.0455, "step": 43 }, { "clip_ratio/high_max": 0.002128927102603484, "clip_ratio/high_mean": 0.0009653180422901642, "clip_ratio/low_mean": 0.0012455816759029403, "clip_ratio/low_min": 0.0001529217747702205, "clip_ratio/region_mean": 0.0022108996636234224, "epoch": 0.205307669874599, "grad_norm": 0.13647939264774323, "learning_rate": 1e-06, "loss": 0.056, "step": 44 }, { "clip_ratio/high_max": 0.0025862012844299898, "clip_ratio/high_mean": 0.001224781652126694, "clip_ratio/low_mean": 0.0011133782591059571, "clip_ratio/low_min": 0.00016104968563013244, "clip_ratio/region_mean": 0.002338159865757916, "epoch": 0.2099737532808399, "grad_norm": 0.12769809365272522, "learning_rate": 1e-06, "loss": 0.0086, "step": 45 }, { "clip_ratio/high_max": 0.0024678852132637985, "clip_ratio/high_mean": 0.0012310537495068274, "clip_ratio/low_mean": 0.0011564890082809143, "clip_ratio/low_min": 0.00017939623103302438, "clip_ratio/region_mean": 0.0023875428523751907, "epoch": 0.21463983668708078, "grad_norm": 0.12696535885334015, "learning_rate": 1e-06, "loss": -0.0188, "step": 46 }, { "clip_ratio/high_max": 0.002291153563419357, "clip_ratio/high_mean": 0.0011049769491364714, "clip_ratio/low_mean": 0.001051378101692535, "clip_ratio/low_min": 9.607033734937431e-05, "clip_ratio/region_mean": 0.002156354967155494, "epoch": 0.21930592009332167, "grad_norm": 0.12068193405866623, "learning_rate": 1e-06, "loss": -0.0154, "step": 47 }, { "clip_ratio/high_max": 0.0023848756100051105, "clip_ratio/high_mean": 0.0010859482645173557, "clip_ratio/low_mean": 0.0011789464060711907, "clip_ratio/low_min": 0.00021505491713469382, "clip_ratio/region_mean": 0.0022648946614935994, "epoch": 0.22397200349956256, "grad_norm": 0.11583662778139114, "learning_rate": 1e-06, "loss": 0.0199, "step": 48 }, { "clip_ratio/high_max": 0.003066568140638992, "clip_ratio/high_mean": 0.0013708297665289138, "clip_ratio/low_mean": 0.0013199262248235755, "clip_ratio/low_min": 0.00017432043205189984, "clip_ratio/region_mean": 0.0026907560022664256, "epoch": 0.22863808690580345, "grad_norm": 0.1144961416721344, "learning_rate": 1e-06, "loss": -0.0099, "step": 49 }, { "clip_ratio/high_max": 0.0027678100450430065, "clip_ratio/high_mean": 0.001266906912860577, "clip_ratio/low_mean": 0.0013194326420489233, "clip_ratio/low_min": 0.00020752859018102754, "clip_ratio/region_mean": 0.002586339571280405, "epoch": 0.23330417031204434, "grad_norm": 0.123209647834301, "learning_rate": 1e-06, "loss": 0.01, "step": 50 }, { "clip_ratio/high_max": 0.00342193063988816, "clip_ratio/high_mean": 0.0014160521241137758, "clip_ratio/low_mean": 0.0011307128879707307, "clip_ratio/low_min": 0.0001739427934808191, "clip_ratio/region_mean": 0.002546764982980676, "epoch": 0.2379702537182852, "grad_norm": 0.11714783310890198, "learning_rate": 1e-06, "loss": -0.0102, "step": 51 }, { "clip_ratio/high_max": 0.0029763600759906694, "clip_ratio/high_mean": 0.001263724894670304, "clip_ratio/low_mean": 0.0011015970521839336, "clip_ratio/low_min": 0.00015545579844911117, "clip_ratio/region_mean": 0.0023653219468542375, "epoch": 0.2426363371245261, "grad_norm": 0.12042421102523804, "learning_rate": 1e-06, "loss": -0.0067, "step": 52 }, { "clip_ratio/high_max": 0.0031162177911028266, "clip_ratio/high_mean": 0.001347083663858939, "clip_ratio/low_mean": 0.0012247143713466357, "clip_ratio/low_min": 0.00013263207893032813, "clip_ratio/region_mean": 0.0025717980024637654, "epoch": 0.24730242053076698, "grad_norm": 0.1240837574005127, "learning_rate": 1e-06, "loss": 0.0196, "step": 53 }, { "clip_ratio/high_max": 0.0027978752623312175, "clip_ratio/high_mean": 0.0012512018547568005, "clip_ratio/low_mean": 0.0011979926312051248, "clip_ratio/low_min": 0.00014231304703571368, "clip_ratio/region_mean": 0.0024491945368936285, "epoch": 0.25196850393700787, "grad_norm": 0.10426893085241318, "learning_rate": 1e-06, "loss": -0.0037, "step": 54 }, { "clip_ratio/high_max": 0.003130602119199466, "clip_ratio/high_mean": 0.0013899867408326827, "clip_ratio/low_mean": 0.0011404099532228429, "clip_ratio/low_min": 3.0508501367876306e-05, "clip_ratio/region_mean": 0.0025303966394858435, "epoch": 0.2566345873432488, "grad_norm": 0.12543822824954987, "learning_rate": 1e-06, "loss": -0.0185, "step": 55 }, { "clip_ratio/high_max": 0.0028997409463045187, "clip_ratio/high_mean": 0.0013613371520477813, "clip_ratio/low_mean": 0.0013036046475463081, "clip_ratio/low_min": 0.0003099424266110873, "clip_ratio/region_mean": 0.0026649418141460046, "epoch": 0.26130067074948965, "grad_norm": 0.1263367384672165, "learning_rate": 1e-06, "loss": -0.0083, "step": 56 }, { "clip_ratio/high_max": 0.0027787757862824947, "clip_ratio/high_mean": 0.0012391031195875257, "clip_ratio/low_mean": 0.0012746982702083187, "clip_ratio/low_min": 0.00015071545385580976, "clip_ratio/region_mean": 0.0025138013807008974, "epoch": 0.2659667541557305, "grad_norm": 0.12137658149003983, "learning_rate": 1e-06, "loss": -0.0219, "step": 57 }, { "clip_ratio/high_max": 0.002790854974591639, "clip_ratio/high_mean": 0.001271753182663815, "clip_ratio/low_mean": 0.00129299901163904, "clip_ratio/low_min": 0.0001493587233198923, "clip_ratio/region_mean": 0.0025647522052167915, "epoch": 0.27063283756197143, "grad_norm": 0.10908447206020355, "learning_rate": 1e-06, "loss": 0.0081, "step": 58 }, { "clip_ratio/high_max": 0.0030602671758970246, "clip_ratio/high_mean": 0.0011957619171880651, "clip_ratio/low_mean": 0.0014619633002439514, "clip_ratio/low_min": 0.00017814520015235757, "clip_ratio/region_mean": 0.0026577251992421225, "epoch": 0.2752989209682123, "grad_norm": 0.11459843069314957, "learning_rate": 1e-06, "loss": 0.0452, "step": 59 }, { "clip_ratio/high_max": 0.00275232477724785, "clip_ratio/high_mean": 0.0011349596425134223, "clip_ratio/low_mean": 0.0015506329109484795, "clip_ratio/low_min": 0.00018194458971265703, "clip_ratio/region_mean": 0.002685592553461902, "epoch": 0.2799650043744532, "grad_norm": 0.1261429339647293, "learning_rate": 1e-06, "loss": 0.0556, "step": 60 }, { "clip_ratio/high_max": 0.003160063140967395, "clip_ratio/high_mean": 0.0014107936622167472, "clip_ratio/low_mean": 0.0015674529240641277, "clip_ratio/low_min": 0.0002114010785589926, "clip_ratio/region_mean": 0.0029782466663164087, "epoch": 0.28463108778069407, "grad_norm": 0.11813414096832275, "learning_rate": 1e-06, "loss": 0.0082, "step": 61 }, { "clip_ratio/high_max": 0.0030250746640376747, "clip_ratio/high_mean": 0.0014572870677511673, "clip_ratio/low_mean": 0.0014617256565543357, "clip_ratio/low_min": 0.00023663033152843127, "clip_ratio/region_mean": 0.002919012746133376, "epoch": 0.289297171186935, "grad_norm": 0.1186414286494255, "learning_rate": 1e-06, "loss": -0.0192, "step": 62 }, { "clip_ratio/high_max": 0.003208254143828526, "clip_ratio/high_mean": 0.0014186432017595507, "clip_ratio/low_mean": 0.0014737911806150805, "clip_ratio/low_min": 0.00011337467549310531, "clip_ratio/region_mean": 0.002892434407840483, "epoch": 0.29396325459317585, "grad_norm": 0.11375010013580322, "learning_rate": 1e-06, "loss": -0.0158, "step": 63 }, { "clip_ratio/high_max": 0.002864666279492667, "clip_ratio/high_mean": 0.0012809197560272878, "clip_ratio/low_mean": 0.0015702278979006223, "clip_ratio/low_min": 0.00024213889810198452, "clip_ratio/region_mean": 0.002851147648470942, "epoch": 0.29862933799941677, "grad_norm": 0.11020326614379883, "learning_rate": 1e-06, "loss": 0.0196, "step": 64 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 4070.0, "completions/mean_length": 609.0919189453125, "completions/mean_terminated_length": 557.7557373046875, "completions/min_length": 3.0, "completions/min_terminated_length": 3.0, "epoch": 0.30329542140565763, "grad_norm": 0.1333010196685791, "learning_rate": 1e-06, "loss": 0.0193, "num_tokens": 27844941.0, "reward": 0.5291574001312256, "reward_std": 0.23031266033649445, "rewards/simpleverify_reward/mean": 0.5291573405265808, "rewards/simpleverify_reward/std": 0.4991665184497833, "step": 65 }, { "clip_ratio/high_max": 0.002095986492349766, "clip_ratio/high_mean": 0.0009549665555823594, "clip_ratio/low_mean": 0.0006022504849170218, "clip_ratio/low_min": 6.761079293937655e-05, "clip_ratio/region_mean": 0.0015572170159430243, "epoch": 0.3079615048118985, "grad_norm": 0.11942674964666367, "learning_rate": 1e-06, "loss": -0.0201, "step": 66 }, { "clip_ratio/high_max": 0.002184139368182514, "clip_ratio/high_mean": 0.0009601217097952031, "clip_ratio/low_mean": 0.0006109681016823743, "clip_ratio/low_min": 4.145236925978679e-05, "clip_ratio/region_mean": 0.0015710898514953442, "epoch": 0.3126275882181394, "grad_norm": 0.1350686401128769, "learning_rate": 1e-06, "loss": 0.0108, "step": 67 }, { "clip_ratio/high_max": 0.0021282443776726723, "clip_ratio/high_mean": 0.0010061018001579214, "clip_ratio/low_mean": 0.0006382428437063936, "clip_ratio/low_min": 1.2626263014681172e-05, "clip_ratio/region_mean": 0.0016443446293123998, "epoch": 0.31729367162438027, "grad_norm": 0.12547576427459717, "learning_rate": 1e-06, "loss": -0.0125, "step": 68 }, { "clip_ratio/high_max": 0.0022075731540098786, "clip_ratio/high_mean": 0.0009602974023437127, "clip_ratio/low_mean": 0.0007607722145621665, "clip_ratio/low_min": 8.331664412253303e-05, "clip_ratio/region_mean": 0.0017210696532856673, "epoch": 0.3219597550306212, "grad_norm": 0.13183489441871643, "learning_rate": 1e-06, "loss": 0.0434, "step": 69 }, { "clip_ratio/high_max": 0.002173729895730503, "clip_ratio/high_mean": 0.0009830275594140403, "clip_ratio/low_mean": 0.0008126407901727362, "clip_ratio/low_min": 9.501906424702611e-05, "clip_ratio/region_mean": 0.0017956683441298082, "epoch": 0.32662583843686205, "grad_norm": 0.1218208521604538, "learning_rate": 1e-06, "loss": -0.0121, "step": 70 }, { "clip_ratio/high_max": 0.002139653734047897, "clip_ratio/high_mean": 0.0009312951751780929, "clip_ratio/low_mean": 0.0009362947894260287, "clip_ratio/low_min": 0.00014902820930728922, "clip_ratio/region_mean": 0.0018675899700610898, "epoch": 0.33129192184310297, "grad_norm": 0.11886744946241379, "learning_rate": 1e-06, "loss": 0.0016, "step": 71 }, { "clip_ratio/high_max": 0.002137196312105516, "clip_ratio/high_mean": 0.0009359336963825626, "clip_ratio/low_mean": 0.0010420407306810375, "clip_ratio/low_min": 7.669547994737513e-05, "clip_ratio/region_mean": 0.0019779744325205684, "epoch": 0.3359580052493438, "grad_norm": 0.11734430491924286, "learning_rate": 1e-06, "loss": 0.0147, "step": 72 }, { "clip_ratio/high_max": 0.0020253926631994545, "clip_ratio/high_mean": 0.0009318325828644447, "clip_ratio/low_mean": 0.0011065478720411193, "clip_ratio/low_min": 9.858188423095271e-05, "clip_ratio/region_mean": 0.0020383804221637547, "epoch": 0.34062408865558474, "grad_norm": 0.12952370941638947, "learning_rate": 1e-06, "loss": 0.0019, "step": 73 }, { "clip_ratio/high_max": 0.0030815795544185676, "clip_ratio/high_mean": 0.0012394871810101904, "clip_ratio/low_mean": 0.0010314514183846768, "clip_ratio/low_min": 0.00010130217287951382, "clip_ratio/region_mean": 0.002270938639412634, "epoch": 0.3452901720618256, "grad_norm": 0.11882374435663223, "learning_rate": 1e-06, "loss": 0.0008, "step": 74 }, { "clip_ratio/high_max": 0.002301025109773036, "clip_ratio/high_mean": 0.0010060086569865234, "clip_ratio/low_mean": 0.001249170585651882, "clip_ratio/low_min": 0.00020350982686068164, "clip_ratio/region_mean": 0.002255179249914363, "epoch": 0.34995625546806647, "grad_norm": 0.118986114859581, "learning_rate": 1e-06, "loss": 0.0166, "step": 75 }, { "clip_ratio/high_max": 0.0022405715426430106, "clip_ratio/high_mean": 0.0010710069691413082, "clip_ratio/low_mean": 0.0012152361778134946, "clip_ratio/low_min": 0.00017451080293540144, "clip_ratio/region_mean": 0.0022862431142129935, "epoch": 0.3546223388743074, "grad_norm": 0.1328345239162445, "learning_rate": 1e-06, "loss": -0.0105, "step": 76 }, { "clip_ratio/high_max": 0.0024153872072929516, "clip_ratio/high_mean": 0.001028700189635856, "clip_ratio/low_mean": 0.001207942441396881, "clip_ratio/low_min": 2.5003879272844642e-05, "clip_ratio/region_mean": 0.0022366426419466734, "epoch": 0.35928842228054825, "grad_norm": 0.12570519745349884, "learning_rate": 1e-06, "loss": -0.0072, "step": 77 }, { "clip_ratio/high_max": 0.002154354573576711, "clip_ratio/high_mean": 0.0009097466227103723, "clip_ratio/low_mean": 0.0012079304033250082, "clip_ratio/low_min": 0.00019134853755531367, "clip_ratio/region_mean": 0.0021176769951125607, "epoch": 0.36395450568678916, "grad_norm": 0.12000148743391037, "learning_rate": 1e-06, "loss": 0.0441, "step": 78 }, { "clip_ratio/high_max": 0.002160135132726282, "clip_ratio/high_mean": 0.0009694766740722116, "clip_ratio/low_mean": 0.0011317591342958622, "clip_ratio/low_min": 0.00012337863336142618, "clip_ratio/region_mean": 0.0021012357537983917, "epoch": 0.36862058909303, "grad_norm": 0.13037844002246857, "learning_rate": 1e-06, "loss": 0.0074, "step": 79 }, { "clip_ratio/high_max": 0.0026232917880406603, "clip_ratio/high_mean": 0.0011170122197654564, "clip_ratio/low_mean": 0.0012029362114844844, "clip_ratio/low_min": 0.00013459848742058966, "clip_ratio/region_mean": 0.002319948427611962, "epoch": 0.37328667249927094, "grad_norm": 0.13097058236598969, "learning_rate": 1e-06, "loss": -0.0518, "step": 80 }, { "clip_ratio/high_max": 0.0029030659279669635, "clip_ratio/high_mean": 0.0013486304742400534, "clip_ratio/low_mean": 0.0015102603938430548, "clip_ratio/low_min": 0.00024855359515640885, "clip_ratio/region_mean": 0.0028588907880475745, "epoch": 0.3779527559055118, "grad_norm": 0.12466525286436081, "learning_rate": 1e-06, "loss": 0.0189, "step": 81 }, { "clip_ratio/high_max": 0.0029934592021163553, "clip_ratio/high_mean": 0.0012536262402136344, "clip_ratio/low_mean": 0.0013735974571318366, "clip_ratio/low_min": 0.0002149079400624032, "clip_ratio/region_mean": 0.0026272237082594074, "epoch": 0.3826188393117527, "grad_norm": 0.11244761943817139, "learning_rate": 1e-06, "loss": -0.0205, "step": 82 }, { "clip_ratio/high_max": 0.0031377016493934207, "clip_ratio/high_mean": 0.001302192387811374, "clip_ratio/low_mean": 0.0014633027858508285, "clip_ratio/low_min": 0.00017245216440642253, "clip_ratio/region_mean": 0.0027654952282318845, "epoch": 0.3872849227179936, "grad_norm": 0.12036320567131042, "learning_rate": 1e-06, "loss": 0.0103, "step": 83 }, { "clip_ratio/high_max": 0.0028718647590721957, "clip_ratio/high_mean": 0.0012610847988980822, "clip_ratio/low_mean": 0.0012295683700358495, "clip_ratio/low_min": 4.059804996359162e-05, "clip_ratio/region_mean": 0.0024906531543820165, "epoch": 0.39195100612423445, "grad_norm": 0.1192980483174324, "learning_rate": 1e-06, "loss": -0.0129, "step": 84 }, { "clip_ratio/high_max": 0.002950182279164437, "clip_ratio/high_mean": 0.0012620100169442594, "clip_ratio/low_mean": 0.0014633037244493607, "clip_ratio/low_min": 0.000233894261327805, "clip_ratio/region_mean": 0.0027253137523075566, "epoch": 0.39661708953047536, "grad_norm": 0.12224233150482178, "learning_rate": 1e-06, "loss": 0.0429, "step": 85 }, { "clip_ratio/high_max": 0.002965279338241089, "clip_ratio/high_mean": 0.0012996648183616344, "clip_ratio/low_mean": 0.0012757712611346506, "clip_ratio/low_min": 0.0001809010464057792, "clip_ratio/region_mean": 0.002575436039478518, "epoch": 0.4012831729367162, "grad_norm": 0.12633928656578064, "learning_rate": 1e-06, "loss": -0.0124, "step": 86 }, { "clip_ratio/high_max": 0.0028046031438861974, "clip_ratio/high_mean": 0.001274156995350495, "clip_ratio/low_mean": 0.0014248448969738092, "clip_ratio/low_min": 0.00025866729174595093, "clip_ratio/region_mean": 0.002699001932342071, "epoch": 0.40594925634295714, "grad_norm": 0.11714571714401245, "learning_rate": 1e-06, "loss": 0.0012, "step": 87 }, { "clip_ratio/high_max": 0.0030716592737007886, "clip_ratio/high_mean": 0.001227542825290584, "clip_ratio/low_mean": 0.001435277812561253, "clip_ratio/low_min": 0.0001626812754693674, "clip_ratio/region_mean": 0.0026628206978784874, "epoch": 0.410615339749198, "grad_norm": 0.11476991325616837, "learning_rate": 1e-06, "loss": 0.0143, "step": 88 }, { "clip_ratio/high_max": 0.002687366693862714, "clip_ratio/high_mean": 0.0011354204143572133, "clip_ratio/low_mean": 0.0014332951759570278, "clip_ratio/low_min": 0.00024562145063100616, "clip_ratio/region_mean": 0.0025687156958156265, "epoch": 0.4152814231554389, "grad_norm": 0.12055323272943497, "learning_rate": 1e-06, "loss": 0.0015, "step": 89 }, { "clip_ratio/high_max": 0.0032671166482032277, "clip_ratio/high_mean": 0.0014134002558421344, "clip_ratio/low_mean": 0.0013541813932533842, "clip_ratio/low_min": 0.00013320259586180327, "clip_ratio/region_mean": 0.002767581660009455, "epoch": 0.4199475065616798, "grad_norm": 0.11246298253536224, "learning_rate": 1e-06, "loss": 0.0003, "step": 90 }, { "clip_ratio/high_max": 0.0025632146789575927, "clip_ratio/high_mean": 0.0012043046335747931, "clip_ratio/low_mean": 0.001762463383784052, "clip_ratio/low_min": 0.00019503739258652786, "clip_ratio/region_mean": 0.002966768079204485, "epoch": 0.4246135899679207, "grad_norm": 0.11150757968425751, "learning_rate": 1e-06, "loss": 0.0162, "step": 91 }, { "clip_ratio/high_max": 0.0029760202378383838, "clip_ratio/high_mean": 0.001375091349473223, "clip_ratio/low_mean": 0.0016609986232651863, "clip_ratio/low_min": 0.00021439711963466834, "clip_ratio/region_mean": 0.0030360899545485154, "epoch": 0.42927967337416156, "grad_norm": 0.12024401128292084, "learning_rate": 1e-06, "loss": -0.011, "step": 92 }, { "clip_ratio/high_max": 0.003003647507284768, "clip_ratio/high_mean": 0.0012914321414427832, "clip_ratio/low_mean": 0.0017726602163747884, "clip_ratio/low_min": 0.00015167150559136644, "clip_ratio/region_mean": 0.0030640923359896988, "epoch": 0.4339457567804024, "grad_norm": 0.11853361129760742, "learning_rate": 1e-06, "loss": -0.0078, "step": 93 }, { "clip_ratio/high_max": 0.002971899950352963, "clip_ratio/high_mean": 0.0012267363817954902, "clip_ratio/low_mean": 0.0017872519238153473, "clip_ratio/low_min": 0.00029699699371121824, "clip_ratio/region_mean": 0.003013988316524774, "epoch": 0.43861184018664334, "grad_norm": 0.10903094708919525, "learning_rate": 1e-06, "loss": 0.0437, "step": 94 }, { "clip_ratio/high_max": 0.0027418189420131966, "clip_ratio/high_mean": 0.0012564454918901902, "clip_ratio/low_mean": 0.0016971448421827517, "clip_ratio/low_min": 0.00018493739662517328, "clip_ratio/region_mean": 0.002953590315883048, "epoch": 0.4432779235928842, "grad_norm": 0.12291538715362549, "learning_rate": 1e-06, "loss": 0.0068, "step": 95 }, { "clip_ratio/high_max": 0.003106837932136841, "clip_ratio/high_mean": 0.0014011914317961782, "clip_ratio/low_mean": 0.001698844149359502, "clip_ratio/low_min": 0.0002623428499646252, "clip_ratio/region_mean": 0.003100035566603765, "epoch": 0.4479440069991251, "grad_norm": 0.1240989938378334, "learning_rate": 1e-06, "loss": -0.0523, "step": 96 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0165318080357143, "completions/max_length": 4096.0, "completions/max_terminated_length": 4018.0, "completions/mean_length": 603.2432861328125, "completions/mean_terminated_length": 544.5310668945312, "completions/min_length": 16.0, "completions/min_terminated_length": 16.0, "epoch": 0.452610090405366, "grad_norm": 0.13344593346118927, "learning_rate": 1e-06, "loss": 0.0362, "num_tokens": 36922308.0, "reward": 0.5622209906578064, "reward_std": 0.21516825258731842, "rewards/simpleverify_reward/mean": 0.5622209906578064, "rewards/simpleverify_reward/std": 0.4961307644844055, "step": 97 }, { "clip_ratio/high_max": 0.003061515380977653, "clip_ratio/high_mean": 0.001179942266389844, "clip_ratio/low_mean": 0.000555939896912605, "clip_ratio/low_min": 4.299077409086749e-05, "clip_ratio/region_mean": 0.0017358822005917318, "epoch": 0.4572761738116069, "grad_norm": 0.13381463289260864, "learning_rate": 1e-06, "loss": -0.0098, "step": 98 }, { "clip_ratio/high_max": 0.002349179470911622, "clip_ratio/high_mean": 0.001020134466671152, "clip_ratio/low_mean": 0.0005440891563921468, "clip_ratio/low_min": 1.3507672520063352e-05, "clip_ratio/region_mean": 0.0015642236467101611, "epoch": 0.46194225721784776, "grad_norm": 0.1167745515704155, "learning_rate": 1e-06, "loss": -0.0149, "step": 99 }, { "clip_ratio/high_max": 0.0023128092507249676, "clip_ratio/high_mean": 0.0009480548451392679, "clip_ratio/low_mean": 0.0006903991043145652, "clip_ratio/low_min": 7.126853324734839e-05, "clip_ratio/region_mean": 0.0016384539339924231, "epoch": 0.4666083406240887, "grad_norm": 0.12403348088264465, "learning_rate": 1e-06, "loss": 0.0245, "step": 100 }, { "clip_ratio/high_max": 0.002502296381862834, "clip_ratio/high_mean": 0.0010109271388500929, "clip_ratio/low_mean": 0.0005228388636169257, "clip_ratio/low_min": 1.6200103345909156e-05, "clip_ratio/region_mean": 0.0015337660006480291, "epoch": 0.47127442403032954, "grad_norm": 0.13828738033771515, "learning_rate": 1e-06, "loss": -0.0542, "step": 101 }, { "clip_ratio/high_max": 0.0021421416313387454, "clip_ratio/high_mean": 0.0009780003547348315, "clip_ratio/low_mean": 0.0007443298763973871, "clip_ratio/low_min": 5.147692445461871e-05, "clip_ratio/region_mean": 0.0017223302493221126, "epoch": 0.4759405074365704, "grad_norm": 0.1175597757101059, "learning_rate": 1e-06, "loss": -0.0005, "step": 102 }, { "clip_ratio/high_max": 0.0020065509306732565, "clip_ratio/high_mean": 0.0009202863984683063, "clip_ratio/low_mean": 0.0006362996318785008, "clip_ratio/low_min": 3.835896950477036e-05, "clip_ratio/region_mean": 0.0015565860740025528, "epoch": 0.4806065908428113, "grad_norm": 0.126253142952919, "learning_rate": 1e-06, "loss": -0.0014, "step": 103 }, { "clip_ratio/high_max": 0.0021492191226570867, "clip_ratio/high_mean": 0.000973820177023299, "clip_ratio/low_mean": 0.0007368668557319324, "clip_ratio/low_min": 4.009028634754941e-05, "clip_ratio/region_mean": 0.001710687043669168, "epoch": 0.4852726742490522, "grad_norm": 0.12503156065940857, "learning_rate": 1e-06, "loss": 0.0279, "step": 104 }, { "clip_ratio/high_max": 0.0021064714310341515, "clip_ratio/high_mean": 0.001027931986754993, "clip_ratio/low_mean": 0.0008327936739078723, "clip_ratio/low_min": 5.584114660450723e-05, "clip_ratio/region_mean": 0.001860725664300844, "epoch": 0.4899387576552931, "grad_norm": 0.12067950516939163, "learning_rate": 1e-06, "loss": -0.0443, "step": 105 }, { "clip_ratio/high_max": 0.0021253314189380035, "clip_ratio/high_mean": 0.0008722530110389926, "clip_ratio/low_mean": 0.0008943315260694362, "clip_ratio/low_min": 0.00010063044737762539, "clip_ratio/region_mean": 0.001766584493452683, "epoch": 0.49460484106153396, "grad_norm": 0.1445721536874771, "learning_rate": 1e-06, "loss": 0.0318, "step": 106 }, { "clip_ratio/high_max": 0.002380074074608274, "clip_ratio/high_mean": 0.0010265141900163144, "clip_ratio/low_mean": 0.0009450614634261001, "clip_ratio/low_min": 0.00018350116079091094, "clip_ratio/region_mean": 0.001971575664356351, "epoch": 0.4992709244677749, "grad_norm": 0.126907080411911, "learning_rate": 1e-06, "loss": 0.0329, "step": 107 }, { "clip_ratio/high_max": 0.0025467996965744533, "clip_ratio/high_mean": 0.0010927866569545586, "clip_ratio/low_mean": 0.0008930186013458297, "clip_ratio/low_min": 6.541984748764662e-05, "clip_ratio/region_mean": 0.001985805298318155, "epoch": 0.5039370078740157, "grad_norm": 0.11740852892398834, "learning_rate": 1e-06, "loss": 0.0124, "step": 108 }, { "clip_ratio/high_max": 0.002364410935115302, "clip_ratio/high_mean": 0.0009945115180016728, "clip_ratio/low_mean": 0.0010006824450101703, "clip_ratio/low_min": 0.00011146509223181056, "clip_ratio/region_mean": 0.001995193975744769, "epoch": 0.5086030912802566, "grad_norm": 0.11474307626485825, "learning_rate": 1e-06, "loss": 0.027, "step": 109 }, { "clip_ratio/high_max": 0.0021619849212584086, "clip_ratio/high_mean": 0.0009646038633945864, "clip_ratio/low_mean": 0.00101947577422834, "clip_ratio/low_min": 0.00013564843538915738, "clip_ratio/region_mean": 0.001984079666726757, "epoch": 0.5132691746864976, "grad_norm": 0.12327814102172852, "learning_rate": 1e-06, "loss": 0.0551, "step": 110 }, { "clip_ratio/high_max": 0.002472227926773485, "clip_ratio/high_mean": 0.0011939582182094455, "clip_ratio/low_mean": 0.0009464238028158434, "clip_ratio/low_min": 0.00010620513876347104, "clip_ratio/region_mean": 0.0021403820792329498, "epoch": 0.5179352580927384, "grad_norm": 0.1299416869878769, "learning_rate": 1e-06, "loss": -0.0144, "step": 111 }, { "clip_ratio/high_max": 0.002420767195872031, "clip_ratio/high_mean": 0.0010285847820341587, "clip_ratio/low_mean": 0.000996541153654107, "clip_ratio/low_min": 0.00016540102842554916, "clip_ratio/region_mean": 0.002025125941145234, "epoch": 0.5226013414989793, "grad_norm": 0.1284252405166626, "learning_rate": 1e-06, "loss": 0.0276, "step": 112 }, { "clip_ratio/high_max": 0.0030303806852316484, "clip_ratio/high_mean": 0.0013427077501546592, "clip_ratio/low_mean": 0.001215070753460168, "clip_ratio/low_min": 5.0016868954116944e-05, "clip_ratio/region_mean": 0.002557778549089562, "epoch": 0.5272674249052202, "grad_norm": 0.11351756751537323, "learning_rate": 1e-06, "loss": 0.0357, "step": 113 }, { "clip_ratio/high_max": 0.004002993715403136, "clip_ratio/high_mean": 0.0015609030233463272, "clip_ratio/low_mean": 0.001315450201218482, "clip_ratio/low_min": 0.00018036401706922334, "clip_ratio/region_mean": 0.0028763532391167246, "epoch": 0.531933508311461, "grad_norm": 0.12259995937347412, "learning_rate": 1e-06, "loss": -0.0104, "step": 114 }, { "clip_ratio/high_max": 0.00345708731765626, "clip_ratio/high_mean": 0.0013046759195276536, "clip_ratio/low_mean": 0.0011637570751190651, "clip_ratio/low_min": 0.00011631832694547484, "clip_ratio/region_mean": 0.0024684329182491638, "epoch": 0.536599591717702, "grad_norm": 0.11007120460271835, "learning_rate": 1e-06, "loss": -0.0153, "step": 115 }, { "clip_ratio/high_max": 0.002803013594530057, "clip_ratio/high_mean": 0.0012423470107023604, "clip_ratio/low_mean": 0.0014616703265346587, "clip_ratio/low_min": 0.00013409452094492735, "clip_ratio/region_mean": 0.0027040172935812734, "epoch": 0.5412656751239429, "grad_norm": 0.11620628088712692, "learning_rate": 1e-06, "loss": 0.024, "step": 116 }, { "clip_ratio/high_max": 0.003403938375413418, "clip_ratio/high_mean": 0.0014087941308389418, "clip_ratio/low_mean": 0.0012075921622454189, "clip_ratio/low_min": 7.167585863498971e-05, "clip_ratio/region_mean": 0.0026163862421526574, "epoch": 0.5459317585301837, "grad_norm": 0.1281481236219406, "learning_rate": 1e-06, "loss": -0.0548, "step": 117 }, { "clip_ratio/high_max": 0.0035042481977143325, "clip_ratio/high_mean": 0.0014438756406889297, "clip_ratio/low_mean": 0.0013728199410252273, "clip_ratio/low_min": 0.00022288213858701056, "clip_ratio/region_mean": 0.0028166955744381994, "epoch": 0.5505978419364246, "grad_norm": 0.11159789562225342, "learning_rate": 1e-06, "loss": -0.001, "step": 118 }, { "clip_ratio/high_max": 0.002851492405170575, "clip_ratio/high_mean": 0.001359469439194072, "clip_ratio/low_mean": 0.0013244528090581298, "clip_ratio/low_min": 7.965722761582583e-05, "clip_ratio/region_mean": 0.0026839222700800747, "epoch": 0.5552639253426656, "grad_norm": 0.11402370780706406, "learning_rate": 1e-06, "loss": -0.0019, "step": 119 }, { "clip_ratio/high_max": 0.00288427992927609, "clip_ratio/high_mean": 0.0014102475142863113, "clip_ratio/low_mean": 0.0013363800098886713, "clip_ratio/low_min": 7.92927839938784e-05, "clip_ratio/region_mean": 0.00274662759329658, "epoch": 0.5599300087489064, "grad_norm": 0.1175575852394104, "learning_rate": 1e-06, "loss": 0.0274, "step": 120 }, { "clip_ratio/high_max": 0.0030200972105376422, "clip_ratio/high_mean": 0.0014857495116302744, "clip_ratio/low_mean": 0.0013425322867988143, "clip_ratio/low_min": 8.068737861322006e-05, "clip_ratio/region_mean": 0.0028282817220315337, "epoch": 0.5645960921551473, "grad_norm": 0.11188304424285889, "learning_rate": 1e-06, "loss": -0.0448, "step": 121 }, { "clip_ratio/high_max": 0.0032648017731844448, "clip_ratio/high_mean": 0.001307212864048779, "clip_ratio/low_mean": 0.0014480057070613839, "clip_ratio/low_min": 0.00015441594587173313, "clip_ratio/region_mean": 0.00275521854928229, "epoch": 0.5692621755613881, "grad_norm": 0.12039351463317871, "learning_rate": 1e-06, "loss": 0.0311, "step": 122 }, { "clip_ratio/high_max": 0.003350591388880275, "clip_ratio/high_mean": 0.0014234078807930928, "clip_ratio/low_mean": 0.0014780947640247177, "clip_ratio/low_min": 0.00024440218112431467, "clip_ratio/region_mean": 0.002901502579334192, "epoch": 0.573928258967629, "grad_norm": 0.11751885712146759, "learning_rate": 1e-06, "loss": 0.0323, "step": 123 }, { "clip_ratio/high_max": 0.0030487079711747356, "clip_ratio/high_mean": 0.0013791432247671764, "clip_ratio/low_mean": 0.0014340886154968757, "clip_ratio/low_min": 0.00015369544962595683, "clip_ratio/region_mean": 0.002813231825712137, "epoch": 0.57859434237387, "grad_norm": 0.1065361350774765, "learning_rate": 1e-06, "loss": 0.0118, "step": 124 }, { "clip_ratio/high_max": 0.0028889574023196474, "clip_ratio/high_mean": 0.0013628405977215152, "clip_ratio/low_mean": 0.0015056017655297183, "clip_ratio/low_min": 0.0002008831379498588, "clip_ratio/region_mean": 0.0028684423232334666, "epoch": 0.5832604257801108, "grad_norm": 0.10466253012418747, "learning_rate": 1e-06, "loss": 0.0266, "step": 125 }, { "clip_ratio/high_max": 0.002997315699758474, "clip_ratio/high_mean": 0.0013970356740173884, "clip_ratio/low_mean": 0.0016251877023023553, "clip_ratio/low_min": 0.00021143568756087916, "clip_ratio/region_mean": 0.0030222233763197437, "epoch": 0.5879265091863517, "grad_norm": 0.1156531274318695, "learning_rate": 1e-06, "loss": 0.0546, "step": 126 }, { "clip_ratio/high_max": 0.003504602354951203, "clip_ratio/high_mean": 0.0015556918988295365, "clip_ratio/low_mean": 0.0014803831145400181, "clip_ratio/low_min": 0.0001914514014060842, "clip_ratio/region_mean": 0.0030360749224200845, "epoch": 0.5925925925925926, "grad_norm": 0.12074324488639832, "learning_rate": 1e-06, "loss": -0.015, "step": 127 }, { "clip_ratio/high_max": 0.003545554558513686, "clip_ratio/high_mean": 0.0014461224054684862, "clip_ratio/low_mean": 0.0015403323814098258, "clip_ratio/low_min": 0.0003250622494306299, "clip_ratio/region_mean": 0.0029864547614124604, "epoch": 0.5972586759988335, "grad_norm": 0.11859126389026642, "learning_rate": 1e-06, "loss": 0.027, "step": 128 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01708984375, "completions/max_length": 4096.0, "completions/max_terminated_length": 4052.0, "completions/mean_length": 625.4926147460938, "completions/mean_terminated_length": 565.1509399414062, "completions/min_length": 3.0, "completions/min_terminated_length": 3.0, "epoch": 0.6019247594050744, "grad_norm": 0.12504246830940247, "learning_rate": 1e-06, "loss": -0.0211, "num_tokens": 46360250.0, "reward": 0.5486189126968384, "reward_std": 0.22039179503917694, "rewards/simpleverify_reward/mean": 0.5486188530921936, "rewards/simpleverify_reward/std": 0.4976479411125183, "step": 129 }, { "clip_ratio/high_max": 0.0019622973923105747, "clip_ratio/high_mean": 0.00086254951202136, "clip_ratio/low_mean": 0.000690543945893296, "clip_ratio/low_min": 4.503778018261073e-05, "clip_ratio/region_mean": 0.0015530934651906136, "epoch": 0.6065908428113153, "grad_norm": 0.12330756336450577, "learning_rate": 1e-06, "loss": -0.0256, "step": 130 }, { "clip_ratio/high_max": 0.002415656505036168, "clip_ratio/high_mean": 0.0010426417047710856, "clip_ratio/low_mean": 0.0007739345546724508, "clip_ratio/low_min": 0.0001219256500917254, "clip_ratio/region_mean": 0.0018165762594435364, "epoch": 0.6112569262175561, "grad_norm": 0.14477677643299103, "learning_rate": 1e-06, "loss": 0.0041, "step": 131 }, { "clip_ratio/high_max": 0.002318665814527776, "clip_ratio/high_mean": 0.0010169484485231806, "clip_ratio/low_mean": 0.0007320470376726007, "clip_ratio/low_min": 4.6667485548823606e-05, "clip_ratio/region_mean": 0.0017489955134806223, "epoch": 0.615923009623797, "grad_norm": 0.12318412214517593, "learning_rate": 1e-06, "loss": -0.035, "step": 132 }, { "clip_ratio/high_max": 0.002068623303784989, "clip_ratio/high_mean": 0.0009670791041571647, "clip_ratio/low_mean": 0.0007370205366896698, "clip_ratio/low_min": 8.613667068857467e-05, "clip_ratio/region_mean": 0.0017040996535797603, "epoch": 0.620589093030038, "grad_norm": 0.12779073417186737, "learning_rate": 1e-06, "loss": -0.0049, "step": 133 }, { "clip_ratio/high_max": 0.00234722915047314, "clip_ratio/high_mean": 0.000929608129808912, "clip_ratio/low_mean": 0.0007829385194781935, "clip_ratio/low_min": 8.551259543310152e-05, "clip_ratio/region_mean": 0.0017125465965364128, "epoch": 0.6252551764362788, "grad_norm": 0.1303846538066864, "learning_rate": 1e-06, "loss": 0.0277, "step": 134 }, { "clip_ratio/high_max": 0.002241820649942383, "clip_ratio/high_mean": 0.001005620923024253, "clip_ratio/low_mean": 0.0008878043463482754, "clip_ratio/low_min": 0.00010648371426213998, "clip_ratio/region_mean": 0.0018934252475446556, "epoch": 0.6299212598425197, "grad_norm": 0.12360312789678574, "learning_rate": 1e-06, "loss": 0.0062, "step": 135 }, { "clip_ratio/high_max": 0.0022526702232426032, "clip_ratio/high_mean": 0.0009839833837759215, "clip_ratio/low_mean": 0.0009566756398271536, "clip_ratio/low_min": 9.257162764697568e-05, "clip_ratio/region_mean": 0.001940659036336001, "epoch": 0.6345873432487605, "grad_norm": 0.13168051838874817, "learning_rate": 1e-06, "loss": 0.0231, "step": 136 }, { "clip_ratio/high_max": 0.0017736651498125866, "clip_ratio/high_mean": 0.0008472457593597937, "clip_ratio/low_mean": 0.0008509719627909362, "clip_ratio/low_min": 8.851047914504306e-05, "clip_ratio/region_mean": 0.0016982177185127512, "epoch": 0.6392534266550015, "grad_norm": 0.11745944619178772, "learning_rate": 1e-06, "loss": 0.0464, "step": 137 }, { "clip_ratio/high_max": 0.002357255965762306, "clip_ratio/high_mean": 0.0010218969673587708, "clip_ratio/low_mean": 0.0010448472276038956, "clip_ratio/low_min": 3.768465467146598e-05, "clip_ratio/region_mean": 0.0020667441858677194, "epoch": 0.6439195100612424, "grad_norm": 0.13640879094600677, "learning_rate": 1e-06, "loss": -0.0401, "step": 138 }, { "clip_ratio/high_max": 0.002176973233872559, "clip_ratio/high_mean": 0.0010496407194295898, "clip_ratio/low_mean": 0.0011276936093054246, "clip_ratio/low_min": 0.00010892456339206547, "clip_ratio/region_mean": 0.0021773343105451204, "epoch": 0.6485855934674832, "grad_norm": 0.12636250257492065, "learning_rate": 1e-06, "loss": 0.0194, "step": 139 }, { "clip_ratio/high_max": 0.0022459454557974823, "clip_ratio/high_mean": 0.0009187738578475546, "clip_ratio/low_mean": 0.0011709720110957278, "clip_ratio/low_min": 8.018929383979412e-05, "clip_ratio/region_mean": 0.0020897458816762082, "epoch": 0.6532516768737241, "grad_norm": 0.12697508931159973, "learning_rate": 1e-06, "loss": 0.061, "step": 140 }, { "clip_ratio/high_max": 0.0027129300142405555, "clip_ratio/high_mean": 0.0009579164870956447, "clip_ratio/low_mean": 0.0009603455837350339, "clip_ratio/low_min": 7.704151266807457e-05, "clip_ratio/region_mean": 0.0019182620744686574, "epoch": 0.657917760279965, "grad_norm": 0.12945355474948883, "learning_rate": 1e-06, "loss": 0.0254, "step": 141 }, { "clip_ratio/high_max": 0.002652611816301942, "clip_ratio/high_mean": 0.0011574287018447649, "clip_ratio/low_mean": 0.0009167208190774545, "clip_ratio/low_min": 0.0001130971431848593, "clip_ratio/region_mean": 0.0020741495391121134, "epoch": 0.6625838436862059, "grad_norm": 0.12411902099847794, "learning_rate": 1e-06, "loss": -0.0124, "step": 142 }, { "clip_ratio/high_max": 0.00243718452838948, "clip_ratio/high_mean": 0.001045304125000257, "clip_ratio/low_mean": 0.0010213898131041788, "clip_ratio/low_min": 0.00013535294237954076, "clip_ratio/region_mean": 0.0020666939672082663, "epoch": 0.6672499270924468, "grad_norm": 0.12218952178955078, "learning_rate": 1e-06, "loss": -0.0044, "step": 143 }, { "clip_ratio/high_max": 0.0023179636045824736, "clip_ratio/high_mean": 0.0009743330138007877, "clip_ratio/low_mean": 0.0011671989086607937, "clip_ratio/low_min": 0.00010179436776525108, "clip_ratio/region_mean": 0.0021415319279185496, "epoch": 0.6719160104986877, "grad_norm": 0.12230091542005539, "learning_rate": 1e-06, "loss": 0.0498, "step": 144 }, { "clip_ratio/high_max": 0.0030589635425712913, "clip_ratio/high_mean": 0.0014219260010577273, "clip_ratio/low_mean": 0.001473729033023119, "clip_ratio/low_min": 0.00016825882630655542, "clip_ratio/region_mean": 0.002895655117754359, "epoch": 0.6765820939049285, "grad_norm": 0.11068176478147507, "learning_rate": 1e-06, "loss": -0.0216, "step": 145 }, { "clip_ratio/high_max": 0.0030975044719525613, "clip_ratio/high_mean": 0.0012474586364987772, "clip_ratio/low_mean": 0.0014553499822795857, "clip_ratio/low_min": 0.0002019758057940635, "clip_ratio/region_mean": 0.0027028086478821933, "epoch": 0.6812481773111695, "grad_norm": 0.11317021399736404, "learning_rate": 1e-06, "loss": -0.0261, "step": 146 }, { "clip_ratio/high_max": 0.0031326866810559295, "clip_ratio/high_mean": 0.0014611303740821313, "clip_ratio/low_mean": 0.0016066228454292286, "clip_ratio/low_min": 0.0002908540764110512, "clip_ratio/region_mean": 0.0030677532049594447, "epoch": 0.6859142607174104, "grad_norm": 0.12762829661369324, "learning_rate": 1e-06, "loss": 0.0034, "step": 147 }, { "clip_ratio/high_max": 0.003193265845766291, "clip_ratio/high_mean": 0.0013287888978084084, "clip_ratio/low_mean": 0.0015411545864481013, "clip_ratio/low_min": 0.0001754615113895852, "clip_ratio/region_mean": 0.002869943462428637, "epoch": 0.6905803441236512, "grad_norm": 0.11296442151069641, "learning_rate": 1e-06, "loss": -0.0356, "step": 148 }, { "clip_ratio/high_max": 0.003124746464891359, "clip_ratio/high_mean": 0.001424162890543812, "clip_ratio/low_mean": 0.0016049580262915697, "clip_ratio/low_min": 0.00022178129347594222, "clip_ratio/region_mean": 0.0030291209332062863, "epoch": 0.6952464275298921, "grad_norm": 0.11597266048192978, "learning_rate": 1e-06, "loss": -0.0055, "step": 149 }, { "clip_ratio/high_max": 0.003220578819309594, "clip_ratio/high_mean": 0.0013671701453858986, "clip_ratio/low_mean": 0.0016575887439103099, "clip_ratio/low_min": 0.0002167654711229261, "clip_ratio/region_mean": 0.0030247588583733886, "epoch": 0.6999125109361329, "grad_norm": 0.12387190014123917, "learning_rate": 1e-06, "loss": 0.0271, "step": 150 }, { "clip_ratio/high_max": 0.0032617842007311992, "clip_ratio/high_mean": 0.0013756312982877716, "clip_ratio/low_mean": 0.0018368964811088517, "clip_ratio/low_min": 0.0001695835635473486, "clip_ratio/region_mean": 0.0032125277793966234, "epoch": 0.7045785943423739, "grad_norm": 0.11603932082653046, "learning_rate": 1e-06, "loss": 0.0056, "step": 151 }, { "clip_ratio/high_max": 0.0033011011182679795, "clip_ratio/high_mean": 0.0014124352564977016, "clip_ratio/low_mean": 0.0017514135652163532, "clip_ratio/low_min": 0.0002159983496312634, "clip_ratio/region_mean": 0.0031638488871976733, "epoch": 0.7092446777486148, "grad_norm": 0.1222923994064331, "learning_rate": 1e-06, "loss": 0.0225, "step": 152 }, { "clip_ratio/high_max": 0.0023524437201558612, "clip_ratio/high_mean": 0.001154064571892377, "clip_ratio/low_mean": 0.0015891659058979712, "clip_ratio/low_min": 0.0003527866556396475, "clip_ratio/region_mean": 0.0027432304341346025, "epoch": 0.7139107611548556, "grad_norm": 0.10908897966146469, "learning_rate": 1e-06, "loss": 0.0459, "step": 153 }, { "clip_ratio/high_max": 0.003170895215589553, "clip_ratio/high_mean": 0.001523188933788333, "clip_ratio/low_mean": 0.001526602020021528, "clip_ratio/low_min": 0.00013473693070409354, "clip_ratio/region_mean": 0.0030497910047415644, "epoch": 0.7185768445610965, "grad_norm": 0.12282628566026688, "learning_rate": 1e-06, "loss": -0.0408, "step": 154 }, { "clip_ratio/high_max": 0.0032785448056529276, "clip_ratio/high_mean": 0.0015166443372436333, "clip_ratio/low_mean": 0.0017930999092641287, "clip_ratio/low_min": 0.0002642217732500285, "clip_ratio/region_mean": 0.003309744242869783, "epoch": 0.7232429279673375, "grad_norm": 0.11680693924427032, "learning_rate": 1e-06, "loss": 0.0189, "step": 155 }, { "clip_ratio/high_max": 0.0029522183976951055, "clip_ratio/high_mean": 0.0012086244933016133, "clip_ratio/low_mean": 0.0019024655121029355, "clip_ratio/low_min": 0.00035396035673329607, "clip_ratio/region_mean": 0.0031110900454223156, "epoch": 0.7279090113735783, "grad_norm": 0.1127549484372139, "learning_rate": 1e-06, "loss": 0.0605, "step": 156 }, { "clip_ratio/high_max": 0.003389412297110539, "clip_ratio/high_mean": 0.0013614786530524725, "clip_ratio/low_mean": 0.001620078273845138, "clip_ratio/low_min": 9.173921262117801e-05, "clip_ratio/region_mean": 0.0029815569578204304, "epoch": 0.7325750947798192, "grad_norm": 0.11988050490617752, "learning_rate": 1e-06, "loss": 0.0248, "step": 157 }, { "clip_ratio/high_max": 0.0037293742134352215, "clip_ratio/high_mean": 0.0016134379984578118, "clip_ratio/low_mean": 0.0014948346579330973, "clip_ratio/low_min": 0.000297502941066341, "clip_ratio/region_mean": 0.0031082726345630363, "epoch": 0.73724117818606, "grad_norm": 0.11628247797489166, "learning_rate": 1e-06, "loss": -0.013, "step": 158 }, { "clip_ratio/high_max": 0.0031293387728510424, "clip_ratio/high_mean": 0.0014757179706066381, "clip_ratio/low_mean": 0.0015834566875128075, "clip_ratio/low_min": 0.00027981661150988657, "clip_ratio/region_mean": 0.003059174632653594, "epoch": 0.7419072615923009, "grad_norm": 0.11325806379318237, "learning_rate": 1e-06, "loss": -0.0049, "step": 159 }, { "clip_ratio/high_max": 0.0031949883195920847, "clip_ratio/high_mean": 0.0013877581077395007, "clip_ratio/low_mean": 0.0016084506023616996, "clip_ratio/low_min": 0.00015860439998505171, "clip_ratio/region_mean": 0.002996208677359391, "epoch": 0.7465733449985419, "grad_norm": 0.1116633489727974, "learning_rate": 1e-06, "loss": 0.0492, "step": 160 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.016392299107142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 4057.0, "completions/mean_length": 618.296142578125, "completions/mean_terminated_length": 560.3385009765625, "completions/min_length": 20.0, "completions/min_terminated_length": 20.0, "epoch": 0.7512394284047827, "grad_norm": 0.1300407499074936, "learning_rate": 1e-06, "loss": -0.0128, "num_tokens": 55679407.0, "reward": 0.5765904188156128, "reward_std": 0.212571918964386, "rewards/simpleverify_reward/mean": 0.5765904188156128, "rewards/simpleverify_reward/std": 0.49411630630493164, "step": 161 }, { "clip_ratio/high_max": 0.0022728608528268524, "clip_ratio/high_mean": 0.0008833910596877104, "clip_ratio/low_mean": 0.0006150963381514885, "clip_ratio/low_min": 3.628986542025814e-05, "clip_ratio/region_mean": 0.0014984874069341458, "epoch": 0.7559055118110236, "grad_norm": 0.1208866685628891, "learning_rate": 1e-06, "loss": 0.0051, "step": 162 }, { "clip_ratio/high_max": 0.0024660796552780084, "clip_ratio/high_mean": 0.000990775044556358, "clip_ratio/low_mean": 0.000572200100577902, "clip_ratio/low_min": 2.037744525296148e-05, "clip_ratio/region_mean": 0.0015629751433152705, "epoch": 0.7605715952172645, "grad_norm": 0.1179087832570076, "learning_rate": 1e-06, "loss": 0.004, "step": 163 }, { "clip_ratio/high_max": 0.002314921235665679, "clip_ratio/high_mean": 0.0009412051113031339, "clip_ratio/low_mean": 0.000560982139177213, "clip_ratio/low_min": 3.2642863516230136e-05, "clip_ratio/region_mean": 0.0015021872750367038, "epoch": 0.7652376786235054, "grad_norm": 0.11520962417125702, "learning_rate": 1e-06, "loss": 0.0168, "step": 164 }, { "clip_ratio/high_max": 0.0022047006768843858, "clip_ratio/high_mean": 0.0009568603263687692, "clip_ratio/low_mean": 0.0006946019220777089, "clip_ratio/low_min": 3.6372794056660496e-05, "clip_ratio/region_mean": 0.001651462236623047, "epoch": 0.7699037620297463, "grad_norm": 0.12777192890644073, "learning_rate": 1e-06, "loss": 0.0274, "step": 165 }, { "clip_ratio/high_max": 0.002207837824244052, "clip_ratio/high_mean": 0.0009902433957904577, "clip_ratio/low_mean": 0.0007543263109255349, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0017445697449147701, "epoch": 0.7745698454359872, "grad_norm": 0.12327396124601364, "learning_rate": 1e-06, "loss": 0.0022, "step": 166 }, { "clip_ratio/high_max": 0.002686720901692752, "clip_ratio/high_mean": 0.001066892669769004, "clip_ratio/low_mean": 0.0007163599202613113, "clip_ratio/low_min": 4.928340968035627e-05, "clip_ratio/region_mean": 0.001783252570021432, "epoch": 0.779235928842228, "grad_norm": 0.1391931027173996, "learning_rate": 1e-06, "loss": -0.0384, "step": 167 }, { "clip_ratio/high_max": 0.002355117438128218, "clip_ratio/high_mean": 0.0010019054825534113, "clip_ratio/low_mean": 0.0008151353649736848, "clip_ratio/low_min": 5.1902492486988194e-05, "clip_ratio/region_mean": 0.0018170408802689053, "epoch": 0.7839020122484689, "grad_norm": 0.1298067271709442, "learning_rate": 1e-06, "loss": 0.0076, "step": 168 }, { "clip_ratio/high_max": 0.0023464256228180602, "clip_ratio/high_mean": 0.0009794815869099693, "clip_ratio/low_mean": 0.0010000048805522965, "clip_ratio/low_min": 0.00012863148094766075, "clip_ratio/region_mean": 0.001979486441996414, "epoch": 0.7885680956547099, "grad_norm": 0.12335332483053207, "learning_rate": 1e-06, "loss": 0.0305, "step": 169 }, { "clip_ratio/high_max": 0.002366124994296115, "clip_ratio/high_mean": 0.001040869035932701, "clip_ratio/low_mean": 0.0009606007988622878, "clip_ratio/low_min": 7.547083623649087e-05, "clip_ratio/region_mean": 0.00200146983115701, "epoch": 0.7932341790609507, "grad_norm": 0.13561807572841644, "learning_rate": 1e-06, "loss": -0.0016, "step": 170 }, { "clip_ratio/high_max": 0.0028863431507488713, "clip_ratio/high_mean": 0.0011377375631127506, "clip_ratio/low_mean": 0.0008200303054763936, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001957767817657441, "epoch": 0.7979002624671916, "grad_norm": 0.131186380982399, "learning_rate": 1e-06, "loss": -0.0254, "step": 171 }, { "clip_ratio/high_max": 0.0023395269599859603, "clip_ratio/high_mean": 0.0010299486420990434, "clip_ratio/low_mean": 0.0010231237029074691, "clip_ratio/low_min": 0.000147094736348663, "clip_ratio/region_mean": 0.0020530723777483217, "epoch": 0.8025663458734325, "grad_norm": 0.1217319518327713, "learning_rate": 1e-06, "loss": 0.0084, "step": 172 }, { "clip_ratio/high_max": 0.0019382450191187672, "clip_ratio/high_mean": 0.0008459129167022184, "clip_ratio/low_mean": 0.0009874254337773891, "clip_ratio/low_min": 0.00013877356923330808, "clip_ratio/region_mean": 0.0018333383268327452, "epoch": 0.8072324292796734, "grad_norm": 0.11738027632236481, "learning_rate": 1e-06, "loss": 0.0268, "step": 173 }, { "clip_ratio/high_max": 0.0021850022894795984, "clip_ratio/high_mean": 0.0011033032988052582, "clip_ratio/low_mean": 0.001106167743273545, "clip_ratio/low_min": 0.00013648305775859626, "clip_ratio/region_mean": 0.002209471058449708, "epoch": 0.8118985126859143, "grad_norm": 0.12672440707683563, "learning_rate": 1e-06, "loss": -0.0125, "step": 174 }, { "clip_ratio/high_max": 0.0024052742883213796, "clip_ratio/high_mean": 0.0010238092763756867, "clip_ratio/low_mean": 0.0010055394959636033, "clip_ratio/low_min": 7.179893691500183e-05, "clip_ratio/region_mean": 0.002029348775977269, "epoch": 0.8165645960921551, "grad_norm": 0.12982292473316193, "learning_rate": 1e-06, "loss": 0.0364, "step": 175 }, { "clip_ratio/high_max": 0.002506316697690636, "clip_ratio/high_mean": 0.0010294234416505788, "clip_ratio/low_mean": 0.0009139028879872058, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0019433263296377845, "epoch": 0.821230679498396, "grad_norm": 0.12429260462522507, "learning_rate": 1e-06, "loss": 0.0124, "step": 176 }, { "clip_ratio/high_max": 0.003091856327955611, "clip_ratio/high_mean": 0.001399352629960049, "clip_ratio/low_mean": 0.0013897682147216983, "clip_ratio/low_min": 0.0001232874856214039, "clip_ratio/region_mean": 0.002789120793750044, "epoch": 0.8258967629046369, "grad_norm": 0.11247675120830536, "learning_rate": 1e-06, "loss": -0.0133, "step": 177 }, { "clip_ratio/high_max": 0.0028565432658069767, "clip_ratio/high_mean": 0.0012777550236933166, "clip_ratio/low_mean": 0.001388784894515993, "clip_ratio/low_min": 0.0001572829769429518, "clip_ratio/region_mean": 0.002666539869096596, "epoch": 0.8305628463108778, "grad_norm": 0.10792485624551773, "learning_rate": 1e-06, "loss": 0.0045, "step": 178 }, { "clip_ratio/high_max": 0.003086590048042126, "clip_ratio/high_mean": 0.001362356255413033, "clip_ratio/low_mean": 0.0012803557874576654, "clip_ratio/low_min": 4.900712337985169e-05, "clip_ratio/region_mean": 0.0026427120246808045, "epoch": 0.8352289297171187, "grad_norm": 0.10708758980035782, "learning_rate": 1e-06, "loss": 0.0035, "step": 179 }, { "clip_ratio/high_max": 0.0027207307794014923, "clip_ratio/high_mean": 0.0012383792745822575, "clip_ratio/low_mean": 0.0011612797788984608, "clip_ratio/low_min": 6.0781432694057e-05, "clip_ratio/region_mean": 0.002399659075308591, "epoch": 0.8398950131233596, "grad_norm": 0.1075570285320282, "learning_rate": 1e-06, "loss": 0.0163, "step": 180 }, { "clip_ratio/high_max": 0.0028585142354131676, "clip_ratio/high_mean": 0.0013736776054429356, "clip_ratio/low_mean": 0.001392723228491377, "clip_ratio/low_min": 0.00010873119026655331, "clip_ratio/region_mean": 0.002766400830296334, "epoch": 0.8445610965296004, "grad_norm": 0.11319371312856674, "learning_rate": 1e-06, "loss": 0.0268, "step": 181 }, { "clip_ratio/high_max": 0.00307430540851783, "clip_ratio/high_mean": 0.0013480396373779513, "clip_ratio/low_mean": 0.0013493507303792285, "clip_ratio/low_min": 0.00010216513510385994, "clip_ratio/region_mean": 0.002697390373214148, "epoch": 0.8492271799358414, "grad_norm": 0.11054737865924835, "learning_rate": 1e-06, "loss": 0.0016, "step": 182 }, { "clip_ratio/high_max": 0.003473888740700204, "clip_ratio/high_mean": 0.0015409789848490618, "clip_ratio/low_mean": 0.0013487565811374225, "clip_ratio/low_min": 0.00019729429550352506, "clip_ratio/region_mean": 0.0028897355587105267, "epoch": 0.8538932633420823, "grad_norm": 0.11917661875486374, "learning_rate": 1e-06, "loss": -0.0392, "step": 183 }, { "clip_ratio/high_max": 0.0032659345015417784, "clip_ratio/high_mean": 0.0013825950372847728, "clip_ratio/low_mean": 0.0016036020388128236, "clip_ratio/low_min": 0.000264601141680032, "clip_ratio/region_mean": 0.0029861971124773845, "epoch": 0.8585593467483231, "grad_norm": 0.11535173654556274, "learning_rate": 1e-06, "loss": 0.0069, "step": 184 }, { "clip_ratio/high_max": 0.003165465190249961, "clip_ratio/high_mean": 0.0014062658046896104, "clip_ratio/low_mean": 0.0016643798517179675, "clip_ratio/low_min": 0.0002641809987835586, "clip_ratio/region_mean": 0.003070645601837896, "epoch": 0.863225430154564, "grad_norm": 0.11451252549886703, "learning_rate": 1e-06, "loss": 0.0299, "step": 185 }, { "clip_ratio/high_max": 0.003231565046007745, "clip_ratio/high_mean": 0.0015055474432301708, "clip_ratio/low_mean": 0.0017203881870955229, "clip_ratio/low_min": 0.00010887846838159021, "clip_ratio/region_mean": 0.0032259356885333546, "epoch": 0.8678915135608049, "grad_norm": 0.12903904914855957, "learning_rate": 1e-06, "loss": -0.0023, "step": 186 }, { "clip_ratio/high_max": 0.0038990563771221787, "clip_ratio/high_mean": 0.0016840885873534717, "clip_ratio/low_mean": 0.00150715892232256, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0031912474805722013, "epoch": 0.8725575969670458, "grad_norm": 0.12221875786781311, "learning_rate": 1e-06, "loss": -0.0261, "step": 187 }, { "clip_ratio/high_max": 0.0031884782074484974, "clip_ratio/high_mean": 0.0014102132518019062, "clip_ratio/low_mean": 0.0016071983118308708, "clip_ratio/low_min": 0.0002617763111629756, "clip_ratio/region_mean": 0.0030174115672707558, "epoch": 0.8772236803732867, "grad_norm": 0.11409705877304077, "learning_rate": 1e-06, "loss": 0.0079, "step": 188 }, { "clip_ratio/high_max": 0.003140487940981984, "clip_ratio/high_mean": 0.0013090760439808946, "clip_ratio/low_mean": 0.0016345995718438644, "clip_ratio/low_min": 0.0002664353369254968, "clip_ratio/region_mean": 0.0029436755867209285, "epoch": 0.8818897637795275, "grad_norm": 0.10862597823143005, "learning_rate": 1e-06, "loss": 0.0263, "step": 189 }, { "clip_ratio/high_max": 0.003256008174503222, "clip_ratio/high_mean": 0.0014727026427863166, "clip_ratio/low_mean": 0.0016130155454447959, "clip_ratio/low_min": 0.00019578502906369977, "clip_ratio/region_mean": 0.0030857182428007945, "epoch": 0.8865558471857684, "grad_norm": 0.11458273977041245, "learning_rate": 1e-06, "loss": -0.0131, "step": 190 }, { "clip_ratio/high_max": 0.0038847233226988465, "clip_ratio/high_mean": 0.0014843351746094413, "clip_ratio/low_mean": 0.0016515007009729743, "clip_ratio/low_min": 0.00015455462016689125, "clip_ratio/region_mean": 0.003135835941066034, "epoch": 0.8912219305920094, "grad_norm": 0.12143843621015549, "learning_rate": 1e-06, "loss": 0.0358, "step": 191 }, { "clip_ratio/high_max": 0.0037288971070665866, "clip_ratio/high_mean": 0.0016110568758449517, "clip_ratio/low_mean": 0.0014498142299999017, "clip_ratio/low_min": 5.2418817176658195e-05, "clip_ratio/region_mean": 0.0030608711240347475, "epoch": 0.8958880139982502, "grad_norm": 0.11275072395801544, "learning_rate": 1e-06, "loss": 0.0118, "step": 192 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.020298549107142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3853.0, "completions/mean_length": 628.046875, "completions/mean_terminated_length": 556.1939086914062, "completions/min_length": 52.0, "completions/min_terminated_length": 52.0, "epoch": 1.0046660834062409, "grad_norm": 0.11700253933668137, "learning_rate": 1e-06, "loss": 0.0058, "num_tokens": 64943399.0, "reward": 0.5739397406578064, "reward_std": 0.20258471369743347, "rewards/simpleverify_reward/mean": 0.5739397406578064, "rewards/simpleverify_reward/std": 0.4945199489593506, "step": 193 }, { "clip_ratio/high_max": 0.001992187782889232, "clip_ratio/high_mean": 0.0008438729164481629, "clip_ratio/low_mean": 0.0006071827938285423, "clip_ratio/low_min": 1.5078408068802673e-05, "clip_ratio/region_mean": 0.0014510557084577158, "epoch": 1.0093321668124817, "grad_norm": 0.11676914244890213, "learning_rate": 1e-06, "loss": 0.0175, "step": 194 }, { "clip_ratio/high_max": 0.00222089902672451, "clip_ratio/high_mean": 0.0009028679196489975, "clip_ratio/low_mean": 0.0005745327225668007, "clip_ratio/low_min": 4.805685330211418e-05, "clip_ratio/region_mean": 0.0014774006413063034, "epoch": 1.0139982502187226, "grad_norm": 0.13004937767982483, "learning_rate": 1e-06, "loss": -0.0213, "step": 195 }, { "clip_ratio/high_max": 0.0019888310998794623, "clip_ratio/high_mean": 0.0008023303998925257, "clip_ratio/low_mean": 0.0005680742769982317, "clip_ratio/low_min": 3.028301398444455e-05, "clip_ratio/region_mean": 0.0013704047014471143, "epoch": 1.0186643336249634, "grad_norm": 0.12435458600521088, "learning_rate": 1e-06, "loss": 0.0099, "step": 196 }, { "clip_ratio/high_max": 0.002000114789552754, "clip_ratio/high_mean": 0.0009112575116887456, "clip_ratio/low_mean": 0.0005940267574260361, "clip_ratio/low_min": 4.376520973892184e-05, "clip_ratio/region_mean": 0.0015052842791192234, "epoch": 1.0233304170312045, "grad_norm": 0.13317565619945526, "learning_rate": 1e-06, "loss": -0.0231, "step": 197 }, { "clip_ratio/high_max": 0.0018851126296794973, "clip_ratio/high_mean": 0.0007911962311482057, "clip_ratio/low_mean": 0.0007478209172404604, "clip_ratio/low_min": 6.730256882292451e-05, "clip_ratio/region_mean": 0.001539017179311486, "epoch": 1.0279965004374454, "grad_norm": 0.12191758304834366, "learning_rate": 1e-06, "loss": 0.0544, "step": 198 }, { "clip_ratio/high_max": 0.002169060222513508, "clip_ratio/high_mean": 0.0010109215945703909, "clip_ratio/low_mean": 0.0007944734552438604, "clip_ratio/low_min": 5.708780827262672e-05, "clip_ratio/region_mean": 0.001805395026167389, "epoch": 1.0326625838436863, "grad_norm": 0.12367334961891174, "learning_rate": 1e-06, "loss": -0.0171, "step": 199 }, { "clip_ratio/high_max": 0.002637896985106636, "clip_ratio/high_mean": 0.001022678567096591, "clip_ratio/low_mean": 0.000851700087878271, "clip_ratio/low_min": 5.934360342507716e-05, "clip_ratio/region_mean": 0.0018743786931736395, "epoch": 1.0373286672499271, "grad_norm": 0.1231176033616066, "learning_rate": 1e-06, "loss": 0.0116, "step": 200 }, { "clip_ratio/high_max": 0.00252491715218639, "clip_ratio/high_mean": 0.001009621500998037, "clip_ratio/low_mean": 0.0008673445890963194, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0018769660746329464, "epoch": 1.041994750656168, "grad_norm": 0.1276102066040039, "learning_rate": 1e-06, "loss": -0.0234, "step": 201 }, { "clip_ratio/high_max": 0.0021756897986051627, "clip_ratio/high_mean": 0.00102547588176094, "clip_ratio/low_mean": 0.0009182617504848167, "clip_ratio/low_min": 6.389906593540218e-05, "clip_ratio/region_mean": 0.00194373768317746, "epoch": 1.0466608340624088, "grad_norm": 0.12504766881465912, "learning_rate": 1e-06, "loss": -0.0107, "step": 202 }, { "clip_ratio/high_max": 0.002259956090711057, "clip_ratio/high_mean": 0.0010174774397455622, "clip_ratio/low_mean": 0.0010540294388192706, "clip_ratio/low_min": 6.897195817145985e-05, "clip_ratio/region_mean": 0.002071506853098981, "epoch": 1.0513269174686497, "grad_norm": 0.11805891990661621, "learning_rate": 1e-06, "loss": -0.0034, "step": 203 }, { "clip_ratio/high_max": 0.0021944052423350513, "clip_ratio/high_mean": 0.0009023564562085085, "clip_ratio/low_mean": 0.001000316085992381, "clip_ratio/low_min": 0.00016303732536471216, "clip_ratio/region_mean": 0.00190267257130472, "epoch": 1.0559930008748906, "grad_norm": 0.12581489980220795, "learning_rate": 1e-06, "loss": 0.0283, "step": 204 }, { "clip_ratio/high_max": 0.002338005848287139, "clip_ratio/high_mean": 0.0010561580311332364, "clip_ratio/low_mean": 0.0011566291759663727, "clip_ratio/low_min": 0.00016311892795783933, "clip_ratio/region_mean": 0.002212787207099609, "epoch": 1.0606590842811314, "grad_norm": 0.14355507493019104, "learning_rate": 1e-06, "loss": 0.0297, "step": 205 }, { "clip_ratio/high_max": 0.0021115864437888376, "clip_ratio/high_mean": 0.0009700281007098965, "clip_ratio/low_mean": 0.0009522005675535183, "clip_ratio/low_min": 7.406244185403921e-05, "clip_ratio/region_mean": 0.0019222286355216056, "epoch": 1.0653251676873725, "grad_norm": 0.12336955219507217, "learning_rate": 1e-06, "loss": 0.0089, "step": 206 }, { "clip_ratio/high_max": 0.002382258498982992, "clip_ratio/high_mean": 0.0010135391930816695, "clip_ratio/low_mean": 0.0010086361144203693, "clip_ratio/low_min": 3.1394937650475185e-05, "clip_ratio/region_mean": 0.0020221753511577845, "epoch": 1.0699912510936134, "grad_norm": 0.1214800477027893, "learning_rate": 1e-06, "loss": -0.0107, "step": 207 }, { "clip_ratio/high_max": 0.002250693134556059, "clip_ratio/high_mean": 0.0010060775239253417, "clip_ratio/low_mean": 0.000985951299298904, "clip_ratio/low_min": 7.19038780516712e-05, "clip_ratio/region_mean": 0.0019920288177672774, "epoch": 1.0746573344998542, "grad_norm": 0.11997034400701523, "learning_rate": 1e-06, "loss": -0.0186, "step": 208 }, { "clip_ratio/high_max": 0.0029026772317592986, "clip_ratio/high_mean": 0.0013477431457431521, "clip_ratio/low_mean": 0.0014825363869022112, "clip_ratio/low_min": 7.112406728992937e-05, "clip_ratio/region_mean": 0.002830279423505999, "epoch": 1.079323417906095, "grad_norm": 0.10266873240470886, "learning_rate": 1e-06, "loss": 0.0053, "step": 209 }, { "clip_ratio/high_max": 0.002884775363781955, "clip_ratio/high_mean": 0.0012554074091895018, "clip_ratio/low_mean": 0.0013781034140265547, "clip_ratio/low_min": 8.515732315572677e-05, "clip_ratio/region_mean": 0.0026335107686463743, "epoch": 1.083989501312336, "grad_norm": 0.10379394143819809, "learning_rate": 1e-06, "loss": 0.0169, "step": 210 }, { "clip_ratio/high_max": 0.0034519216424087062, "clip_ratio/high_mean": 0.0014074015489313751, "clip_ratio/low_mean": 0.0014037186228961218, "clip_ratio/low_min": 0.00019507929209794383, "clip_ratio/region_mean": 0.0028111201463616453, "epoch": 1.0886555847185768, "grad_norm": 0.1156601682305336, "learning_rate": 1e-06, "loss": -0.022, "step": 211 }, { "clip_ratio/high_max": 0.003109123594185803, "clip_ratio/high_mean": 0.0012554195054690354, "clip_ratio/low_mean": 0.0012679088504228275, "clip_ratio/low_min": 2.6870164219872095e-05, "clip_ratio/region_mean": 0.002523328315874096, "epoch": 1.0933216681248177, "grad_norm": 0.10980050265789032, "learning_rate": 1e-06, "loss": 0.0093, "step": 212 }, { "clip_ratio/high_max": 0.0031063580099726096, "clip_ratio/high_mean": 0.001376570260617882, "clip_ratio/low_mean": 0.0013982853379275184, "clip_ratio/low_min": 0.0001909848433570005, "clip_ratio/region_mean": 0.0027748556021833792, "epoch": 1.0979877515310585, "grad_norm": 0.12302590161561966, "learning_rate": 1e-06, "loss": -0.0238, "step": 213 }, { "clip_ratio/high_max": 0.0028415551787475124, "clip_ratio/high_mean": 0.0012036114148941124, "clip_ratio/low_mean": 0.0016315546963596717, "clip_ratio/low_min": 0.00012248194798303302, "clip_ratio/region_mean": 0.0028351660730550066, "epoch": 1.1026538349372994, "grad_norm": 0.11040081828832626, "learning_rate": 1e-06, "loss": 0.0538, "step": 214 }, { "clip_ratio/high_max": 0.003454074321780354, "clip_ratio/high_mean": 0.0014956598315620795, "clip_ratio/low_mean": 0.0014544298501277808, "clip_ratio/low_min": 0.00010792318607855123, "clip_ratio/region_mean": 0.002950089728983585, "epoch": 1.1073199183435405, "grad_norm": 0.11797984689474106, "learning_rate": 1e-06, "loss": -0.0177, "step": 215 }, { "clip_ratio/high_max": 0.003867048319079913, "clip_ratio/high_mean": 0.0014562339565600269, "clip_ratio/low_mean": 0.001637990069866646, "clip_ratio/low_min": 0.0001992618836084148, "clip_ratio/region_mean": 0.0030942239827709273, "epoch": 1.1119860017497813, "grad_norm": 0.11448762565851212, "learning_rate": 1e-06, "loss": 0.011, "step": 216 }, { "clip_ratio/high_max": 0.0038020240754121915, "clip_ratio/high_mean": 0.0014951990324334474, "clip_ratio/low_mean": 0.0014947549425414763, "clip_ratio/low_min": 1.8848009858629666e-05, "clip_ratio/region_mean": 0.002989953907672316, "epoch": 1.1166520851560222, "grad_norm": 0.11441394686698914, "learning_rate": 1e-06, "loss": -0.024, "step": 217 }, { "clip_ratio/high_max": 0.003183315522619523, "clip_ratio/high_mean": 0.0015579840255668387, "clip_ratio/low_mean": 0.001578374947712291, "clip_ratio/low_min": 8.262306801043451e-05, "clip_ratio/region_mean": 0.003136359024210833, "epoch": 1.121318168562263, "grad_norm": 0.10988708585500717, "learning_rate": 1e-06, "loss": -0.0113, "step": 218 }, { "clip_ratio/high_max": 0.003523297971696593, "clip_ratio/high_mean": 0.0015035541327961255, "clip_ratio/low_mean": 0.001617081888980465, "clip_ratio/low_min": 0.00021564741837210022, "clip_ratio/region_mean": 0.0031206360072246753, "epoch": 1.125984251968504, "grad_norm": 0.1102115735411644, "learning_rate": 1e-06, "loss": -0.0041, "step": 219 }, { "clip_ratio/high_max": 0.00301249188487418, "clip_ratio/high_mean": 0.0013081325560051482, "clip_ratio/low_mean": 0.0017725659417919815, "clip_ratio/low_min": 0.000280503664725984, "clip_ratio/region_mean": 0.0030806985159870237, "epoch": 1.1306503353747448, "grad_norm": 0.11126233637332916, "learning_rate": 1e-06, "loss": 0.0277, "step": 220 }, { "clip_ratio/high_max": 0.003518074328894727, "clip_ratio/high_mean": 0.001540239883979666, "clip_ratio/low_mean": 0.0019393683578527998, "clip_ratio/low_min": 0.0002625341985549312, "clip_ratio/region_mean": 0.003479608232737519, "epoch": 1.1353164187809857, "grad_norm": 0.1257893145084381, "learning_rate": 1e-06, "loss": 0.0288, "step": 221 }, { "clip_ratio/high_max": 0.00316535220190417, "clip_ratio/high_mean": 0.0014314170111902058, "clip_ratio/low_mean": 0.0015908013228909113, "clip_ratio/low_min": 0.0001344400443485938, "clip_ratio/region_mean": 0.003022218370460905, "epoch": 1.1399825021872265, "grad_norm": 0.11075523495674133, "learning_rate": 1e-06, "loss": 0.0084, "step": 222 }, { "clip_ratio/high_max": 0.003336296314955689, "clip_ratio/high_mean": 0.0014599908281525131, "clip_ratio/low_mean": 0.0015942298668960575, "clip_ratio/low_min": 4.5674279135710094e-05, "clip_ratio/region_mean": 0.0030542206950485706, "epoch": 1.1446485855934676, "grad_norm": 0.11315368115901947, "learning_rate": 1e-06, "loss": -0.0113, "step": 223 }, { "clip_ratio/high_max": 0.0034475962165743113, "clip_ratio/high_mean": 0.0014397078775800765, "clip_ratio/low_mean": 0.0014908503944752738, "clip_ratio/low_min": 8.294137023767689e-05, "clip_ratio/region_mean": 0.002930558235675562, "epoch": 1.1493146689997085, "grad_norm": 0.11203787475824356, "learning_rate": 1e-06, "loss": -0.0192, "step": 224 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.020856584821428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 4096.0, "completions/mean_length": 630.7854614257812, "completions/mean_terminated_length": 556.973388671875, "completions/min_length": 4.0, "completions/min_terminated_length": 4.0, "epoch": 1.1539807524059493, "grad_norm": 0.13044968247413635, "learning_rate": 1e-06, "loss": -0.0581, "num_tokens": 74183443.0, "reward": 0.5831473469734192, "reward_std": 0.20123086869716644, "rewards/simpleverify_reward/mean": 0.5831473469734192, "rewards/simpleverify_reward/std": 0.4930552542209625, "step": 225 }, { "clip_ratio/high_max": 0.0024373836640734226, "clip_ratio/high_mean": 0.0009342386965727201, "clip_ratio/low_mean": 0.0006380900795193156, "clip_ratio/low_min": 1.9373837858438492e-05, "clip_ratio/region_mean": 0.0015723287651780993, "epoch": 1.1586468358121902, "grad_norm": 0.1441110074520111, "learning_rate": 1e-06, "loss": -0.0016, "step": 226 }, { "clip_ratio/high_max": 0.0018679511122172698, "clip_ratio/high_mean": 0.0007531731098424643, "clip_ratio/low_mean": 0.0005514680660780868, "clip_ratio/low_min": 3.414710045035463e-05, "clip_ratio/region_mean": 0.0013046411768300459, "epoch": 1.163312919218431, "grad_norm": 0.11731994152069092, "learning_rate": 1e-06, "loss": -0.0113, "step": 227 }, { "clip_ratio/high_max": 0.0016853392771736253, "clip_ratio/high_mean": 0.0007521622155763907, "clip_ratio/low_mean": 0.0007140377692849142, "clip_ratio/low_min": 7.654259661649121e-05, "clip_ratio/region_mean": 0.001466200003051199, "epoch": 1.167979002624672, "grad_norm": 0.129197895526886, "learning_rate": 1e-06, "loss": 0.0722, "step": 228 }, { "clip_ratio/high_max": 0.001995598682697164, "clip_ratio/high_mean": 0.0008241361783802859, "clip_ratio/low_mean": 0.0006181895896588685, "clip_ratio/low_min": 6.0692511397064663e-05, "clip_ratio/region_mean": 0.0014423257962334901, "epoch": 1.1726450860309128, "grad_norm": 0.12741993367671967, "learning_rate": 1e-06, "loss": 0.0191, "step": 229 }, { "clip_ratio/high_max": 0.0019897380007023457, "clip_ratio/high_mean": 0.0007997162192623364, "clip_ratio/low_mean": 0.0007062193435558584, "clip_ratio/low_min": 0.00014848026694380678, "clip_ratio/region_mean": 0.001505935568275163, "epoch": 1.1773111694371536, "grad_norm": 0.12224078178405762, "learning_rate": 1e-06, "loss": 0.0392, "step": 230 }, { "clip_ratio/high_max": 0.0022919785260455683, "clip_ratio/high_mean": 0.0010403427004348487, "clip_ratio/low_mean": 0.0006667175257462077, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0017070602261810564, "epoch": 1.1819772528433945, "grad_norm": 0.12054876983165741, "learning_rate": 1e-06, "loss": -0.0258, "step": 231 }, { "clip_ratio/high_max": 0.002352491552301217, "clip_ratio/high_mean": 0.0009984478128899354, "clip_ratio/low_mean": 0.0007823707128409296, "clip_ratio/low_min": 4.849775905313436e-05, "clip_ratio/region_mean": 0.0017808185220928863, "epoch": 1.1866433362496354, "grad_norm": 0.1264234185218811, "learning_rate": 1e-06, "loss": -0.0172, "step": 232 }, { "clip_ratio/high_max": 0.0024372233201575, "clip_ratio/high_mean": 0.0009753797385201324, "clip_ratio/low_mean": 0.0009172920572382282, "clip_ratio/low_min": 0.0001293046489081462, "clip_ratio/region_mean": 0.0018926717530121095, "epoch": 1.1913094196558764, "grad_norm": 0.13291189074516296, "learning_rate": 1e-06, "loss": 0.0267, "step": 233 }, { "clip_ratio/high_max": 0.002312093994987663, "clip_ratio/high_mean": 0.0010194439801125554, "clip_ratio/low_mean": 0.0009816762267291779, "clip_ratio/low_min": 5.794309436168987e-05, "clip_ratio/region_mean": 0.0020011202359455638, "epoch": 1.1959755030621173, "grad_norm": 0.13842757046222687, "learning_rate": 1e-06, "loss": -0.0025, "step": 234 }, { "clip_ratio/high_max": 0.0026617083785822615, "clip_ratio/high_mean": 0.0010231079741060967, "clip_ratio/low_mean": 0.0010454754701640923, "clip_ratio/low_min": 4.5284529733180534e-05, "clip_ratio/region_mean": 0.0020685834824689664, "epoch": 1.2006415864683582, "grad_norm": 0.1373901069164276, "learning_rate": 1e-06, "loss": 0.0178, "step": 235 }, { "clip_ratio/high_max": 0.0023979547040653415, "clip_ratio/high_mean": 0.0010478047333890572, "clip_ratio/low_mean": 0.0009897357194859069, "clip_ratio/low_min": 4.037210419483017e-05, "clip_ratio/region_mean": 0.0020375404419610277, "epoch": 1.205307669874599, "grad_norm": 0.10937754809856415, "learning_rate": 1e-06, "loss": -0.0111, "step": 236 }, { "clip_ratio/high_max": 0.0026427910634083673, "clip_ratio/high_mean": 0.0010993979885824956, "clip_ratio/low_mean": 0.0009976516230381094, "clip_ratio/low_min": 7.286989603016991e-05, "clip_ratio/region_mean": 0.002097049633448478, "epoch": 1.20997375328084, "grad_norm": 0.12430103123188019, "learning_rate": 1e-06, "loss": 0.0004, "step": 237 }, { "clip_ratio/high_max": 0.002406152125331573, "clip_ratio/high_mean": 0.0010517175251152366, "clip_ratio/low_mean": 0.0009459001103095943, "clip_ratio/low_min": 0.00012347814845270477, "clip_ratio/region_mean": 0.0019976176117779687, "epoch": 1.2146398366870808, "grad_norm": 0.11710339784622192, "learning_rate": 1e-06, "loss": -0.0412, "step": 238 }, { "clip_ratio/high_max": 0.002395392381004058, "clip_ratio/high_mean": 0.0010301377096766373, "clip_ratio/low_mean": 0.0010858089226530865, "clip_ratio/low_min": 7.406985696434276e-05, "clip_ratio/region_mean": 0.0021159466632525437, "epoch": 1.2193059200933216, "grad_norm": 0.12931126356124878, "learning_rate": 1e-06, "loss": 0.0054, "step": 239 }, { "clip_ratio/high_max": 0.0022705463561578654, "clip_ratio/high_mean": 0.0010105919864145108, "clip_ratio/low_mean": 0.0010050178279925603, "clip_ratio/low_min": 4.5728559598501306e-05, "clip_ratio/region_mean": 0.002015609796217177, "epoch": 1.2239720034995625, "grad_norm": 0.11710505932569504, "learning_rate": 1e-06, "loss": 0.0022, "step": 240 }, { "clip_ratio/high_max": 0.0037810509602422826, "clip_ratio/high_mean": 0.0016871471361810109, "clip_ratio/low_mean": 0.001232622227689717, "clip_ratio/low_min": 6.750053580617532e-05, "clip_ratio/region_mean": 0.0029197693656897172, "epoch": 1.2286380869058036, "grad_norm": 0.11273294687271118, "learning_rate": 1e-06, "loss": -0.0586, "step": 241 }, { "clip_ratio/high_max": 0.003505041175230872, "clip_ratio/high_mean": 0.0014232041576178744, "clip_ratio/low_mean": 0.0016376227940781973, "clip_ratio/low_min": 0.00010465596187714254, "clip_ratio/region_mean": 0.0030608269007643685, "epoch": 1.2333041703120444, "grad_norm": 0.12398556619882584, "learning_rate": 1e-06, "loss": -0.0024, "step": 242 }, { "clip_ratio/high_max": 0.003244882362196222, "clip_ratio/high_mean": 0.0012783937017957214, "clip_ratio/low_mean": 0.001400425418978557, "clip_ratio/low_min": 0.00013850336472387426, "clip_ratio/region_mean": 0.0026788191535160877, "epoch": 1.2379702537182853, "grad_norm": 0.10467694699764252, "learning_rate": 1e-06, "loss": -0.0119, "step": 243 }, { "clip_ratio/high_max": 0.0030949080755817704, "clip_ratio/high_mean": 0.0012627633659576531, "clip_ratio/low_mean": 0.0017948421445908025, "clip_ratio/low_min": 0.00030324369254230987, "clip_ratio/region_mean": 0.0030576054559787735, "epoch": 1.2426363371245261, "grad_norm": 0.11333815008401871, "learning_rate": 1e-06, "loss": 0.0715, "step": 244 }, { "clip_ratio/high_max": 0.0029974909775773995, "clip_ratio/high_mean": 0.0012981866748305038, "clip_ratio/low_mean": 0.0014917710614099633, "clip_ratio/low_min": 0.00015549595082120504, "clip_ratio/region_mean": 0.002789957681670785, "epoch": 1.247302420530767, "grad_norm": 0.1137121319770813, "learning_rate": 1e-06, "loss": 0.0185, "step": 245 }, { "clip_ratio/high_max": 0.0029772713023703545, "clip_ratio/high_mean": 0.0012945088965352625, "clip_ratio/low_mean": 0.0016020713519537821, "clip_ratio/low_min": 0.00036996435846958775, "clip_ratio/region_mean": 0.002896580277592875, "epoch": 1.2519685039370079, "grad_norm": 0.10847195982933044, "learning_rate": 1e-06, "loss": 0.0386, "step": 246 }, { "clip_ratio/high_max": 0.0037965277806506492, "clip_ratio/high_mean": 0.0016158244143298361, "clip_ratio/low_mean": 0.0014017973480804358, "clip_ratio/low_min": 1.8447461116011254e-05, "clip_ratio/region_mean": 0.0030176217696862295, "epoch": 1.2566345873432487, "grad_norm": 0.10925734043121338, "learning_rate": 1e-06, "loss": -0.0264, "step": 247 }, { "clip_ratio/high_max": 0.0032282832107739523, "clip_ratio/high_mean": 0.0014799068485444877, "clip_ratio/low_mean": 0.001553031939693028, "clip_ratio/low_min": 4.006410381407477e-05, "clip_ratio/region_mean": 0.0030329387518577278, "epoch": 1.2613006707494896, "grad_norm": 0.11067990958690643, "learning_rate": 1e-06, "loss": -0.0179, "step": 248 }, { "clip_ratio/high_max": 0.0037833037386008073, "clip_ratio/high_mean": 0.0015578158454445656, "clip_ratio/low_mean": 0.0017774082225514576, "clip_ratio/low_min": 0.00024992932321765693, "clip_ratio/region_mean": 0.0033352240279782563, "epoch": 1.2659667541557305, "grad_norm": 0.11833923310041428, "learning_rate": 1e-06, "loss": 0.0258, "step": 249 }, { "clip_ratio/high_max": 0.0030646495142718777, "clip_ratio/high_mean": 0.001412301578966435, "clip_ratio/low_mean": 0.0016754991374909878, "clip_ratio/low_min": 0.00018463748347130604, "clip_ratio/region_mean": 0.003087800709181465, "epoch": 1.2706328375619713, "grad_norm": 0.12140277028083801, "learning_rate": 1e-06, "loss": -0.0031, "step": 250 }, { "clip_ratio/high_max": 0.003906381083652377, "clip_ratio/high_mean": 0.001518819794000592, "clip_ratio/low_mean": 0.0017400763535988517, "clip_ratio/low_min": 0.00014918321903678589, "clip_ratio/region_mean": 0.0032588961912551895, "epoch": 1.2752989209682122, "grad_norm": 0.11883580684661865, "learning_rate": 1e-06, "loss": 0.017, "step": 251 }, { "clip_ratio/high_max": 0.0037435776393976994, "clip_ratio/high_mean": 0.0015063137616380118, "clip_ratio/low_mean": 0.0014569994200428482, "clip_ratio/low_min": 0.00014257947987061925, "clip_ratio/region_mean": 0.0029633131925947964, "epoch": 1.2799650043744533, "grad_norm": 0.10138453543186188, "learning_rate": 1e-06, "loss": -0.0117, "step": 252 }, { "clip_ratio/high_max": 0.0032856406323844567, "clip_ratio/high_mean": 0.0014929415192455053, "clip_ratio/low_mean": 0.0015651887915737461, "clip_ratio/low_min": 6.695614501950331e-05, "clip_ratio/region_mean": 0.0030581303435610607, "epoch": 1.2846310877806941, "grad_norm": 0.11209844052791595, "learning_rate": 1e-06, "loss": -0.0003, "step": 253 }, { "clip_ratio/high_max": 0.003584752899769228, "clip_ratio/high_mean": 0.0015977217735780869, "clip_ratio/low_mean": 0.0014488598208117764, "clip_ratio/low_min": 0.00017075835057767108, "clip_ratio/region_mean": 0.0030465816234936938, "epoch": 1.289297171186935, "grad_norm": 0.10436536371707916, "learning_rate": 1e-06, "loss": -0.0418, "step": 254 }, { "clip_ratio/high_max": 0.0037288929233909585, "clip_ratio/high_mean": 0.0015298034595616627, "clip_ratio/low_mean": 0.0015732653882878367, "clip_ratio/low_min": 0.0001267130828637164, "clip_ratio/region_mean": 0.0031030688260216266, "epoch": 1.2939632545931758, "grad_norm": 0.11299216747283936, "learning_rate": 1e-06, "loss": 0.0048, "step": 255 }, { "clip_ratio/high_max": 0.003397808046429418, "clip_ratio/high_mean": 0.0015400144511659164, "clip_ratio/low_mean": 0.0016235503462667111, "clip_ratio/low_min": 7.976430606504437e-05, "clip_ratio/region_mean": 0.0031635647173970938, "epoch": 1.2986293379994167, "grad_norm": 0.10357529670000076, "learning_rate": 1e-06, "loss": 0.0016, "step": 256 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.023716517857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 4040.0, "completions/mean_length": 650.9552001953125, "completions/mean_terminated_length": 567.265869140625, "completions/min_length": 77.0, "completions/min_terminated_length": 77.0, "epoch": 1.3032954214056576, "grad_norm": 0.13808058202266693, "learning_rate": 1e-06, "loss": 0.0098, "num_tokens": 83570808.0, "reward": 0.5787528157234192, "reward_std": 0.1950569897890091, "rewards/simpleverify_reward/mean": 0.5787528157234192, "rewards/simpleverify_reward/std": 0.4937762916088104, "step": 257 }, { "clip_ratio/high_max": 0.0021518862813536543, "clip_ratio/high_mean": 0.0008497496492054779, "clip_ratio/low_mean": 0.0005704966224584496, "clip_ratio/low_min": 3.926059616787825e-05, "clip_ratio/region_mean": 0.0014202462589310016, "epoch": 1.3079615048118984, "grad_norm": 0.1268431395292282, "learning_rate": 1e-06, "loss": 0.0151, "step": 258 }, { "clip_ratio/high_max": 0.001882116968772607, "clip_ratio/high_mean": 0.0008119259218801744, "clip_ratio/low_mean": 0.0005783470151072834, "clip_ratio/low_min": 2.575498729129322e-05, "clip_ratio/region_mean": 0.0013902729187975638, "epoch": 1.3126275882181395, "grad_norm": 0.11980373412370682, "learning_rate": 1e-06, "loss": -0.0265, "step": 259 }, { "clip_ratio/high_max": 0.0019301171923871152, "clip_ratio/high_mean": 0.0008276496791950194, "clip_ratio/low_mean": 0.0006175059224915458, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014451555980485864, "epoch": 1.3172936716243804, "grad_norm": 0.12828215956687927, "learning_rate": 1e-06, "loss": 0.0095, "step": 260 }, { "clip_ratio/high_max": 0.0023053980694385245, "clip_ratio/high_mean": 0.0009798652172321454, "clip_ratio/low_mean": 0.0006183355890243547, "clip_ratio/low_min": 1.200076803797856e-05, "clip_ratio/region_mean": 0.001598200818989426, "epoch": 1.3219597550306212, "grad_norm": 0.11807168275117874, "learning_rate": 1e-06, "loss": -0.006, "step": 261 }, { "clip_ratio/high_max": 0.0023316209772019647, "clip_ratio/high_mean": 0.0009486436429142486, "clip_ratio/low_mean": 0.0006555089648827561, "clip_ratio/low_min": 6.467975981649943e-05, "clip_ratio/region_mean": 0.0016041525814216584, "epoch": 1.326625838436862, "grad_norm": 0.12681686878204346, "learning_rate": 1e-06, "loss": 0.0073, "step": 262 }, { "clip_ratio/high_max": 0.0022550920439243782, "clip_ratio/high_mean": 0.0009245580640708795, "clip_ratio/low_mean": 0.0006850120917079039, "clip_ratio/low_min": 3.9433081838069484e-05, "clip_ratio/region_mean": 0.0016095701248559635, "epoch": 1.331291921843103, "grad_norm": 0.1202269196510315, "learning_rate": 1e-06, "loss": -0.0027, "step": 263 }, { "clip_ratio/high_max": 0.002182445976359304, "clip_ratio/high_mean": 0.000812479091109708, "clip_ratio/low_mean": 0.0007228911181300646, "clip_ratio/low_min": 6.683496303594438e-05, "clip_ratio/region_mean": 0.0015353702328866348, "epoch": 1.3359580052493438, "grad_norm": 0.12395063787698746, "learning_rate": 1e-06, "loss": 0.0459, "step": 264 }, { "clip_ratio/high_max": 0.0023663923275307752, "clip_ratio/high_mean": 0.0009941451462509576, "clip_ratio/low_mean": 0.0008251246872532647, "clip_ratio/low_min": 4.236000131641049e-05, "clip_ratio/region_mean": 0.0018192698335042223, "epoch": 1.3406240886555847, "grad_norm": 0.12286343425512314, "learning_rate": 1e-06, "loss": -0.0336, "step": 265 }, { "clip_ratio/high_max": 0.001909790084027918, "clip_ratio/high_mean": 0.0008609516589785926, "clip_ratio/low_mean": 0.0010515750764170662, "clip_ratio/low_min": 0.00010901141831709538, "clip_ratio/region_mean": 0.0019125267208437435, "epoch": 1.3452901720618256, "grad_norm": 0.12255705893039703, "learning_rate": 1e-06, "loss": 0.0392, "step": 266 }, { "clip_ratio/high_max": 0.002582270171842538, "clip_ratio/high_mean": 0.0009708337383926846, "clip_ratio/low_mean": 0.0007798731166985817, "clip_ratio/low_min": 3.863987512886524e-05, "clip_ratio/region_mean": 0.0017507069132989272, "epoch": 1.3499562554680664, "grad_norm": 0.11288341134786606, "learning_rate": 1e-06, "loss": -0.0433, "step": 267 }, { "clip_ratio/high_max": 0.0022060873016016558, "clip_ratio/high_mean": 0.0008841539329296211, "clip_ratio/low_mean": 0.000865750517732522, "clip_ratio/low_min": 4.153243025939446e-05, "clip_ratio/region_mean": 0.0017499044406577013, "epoch": 1.3546223388743073, "grad_norm": 0.1235344186425209, "learning_rate": 1e-06, "loss": -0.0102, "step": 268 }, { "clip_ratio/high_max": 0.0021967909269733354, "clip_ratio/high_mean": 0.0009288905166613404, "clip_ratio/low_mean": 0.0009439619279874023, "clip_ratio/low_min": 9.255539043806493e-05, "clip_ratio/region_mean": 0.001872852459200658, "epoch": 1.3592884222805481, "grad_norm": 0.11238186806440353, "learning_rate": 1e-06, "loss": -0.0242, "step": 269 }, { "clip_ratio/high_max": 0.0021392481721704826, "clip_ratio/high_mean": 0.0009035512339323759, "clip_ratio/low_mean": 0.0011324123552185483, "clip_ratio/low_min": 5.951017010374926e-05, "clip_ratio/region_mean": 0.00203596363280667, "epoch": 1.3639545056867892, "grad_norm": 0.12755899131298065, "learning_rate": 1e-06, "loss": 0.0242, "step": 270 }, { "clip_ratio/high_max": 0.0020534530194709077, "clip_ratio/high_mean": 0.0008245141434599645, "clip_ratio/low_mean": 0.0009495596787019167, "clip_ratio/low_min": 2.5625256967032328e-05, "clip_ratio/region_mean": 0.0017740738039719872, "epoch": 1.36862058909303, "grad_norm": 0.12751242518424988, "learning_rate": 1e-06, "loss": -0.0092, "step": 271 }, { "clip_ratio/high_max": 0.002213423460489139, "clip_ratio/high_mean": 0.0009677669040684123, "clip_ratio/low_mean": 0.0010348346986575052, "clip_ratio/low_min": 9.139573558059055e-05, "clip_ratio/region_mean": 0.0020026016354677267, "epoch": 1.373286672499271, "grad_norm": 0.1233547031879425, "learning_rate": 1e-06, "loss": -0.0087, "step": 272 }, { "clip_ratio/high_max": 0.004143118596402928, "clip_ratio/high_mean": 0.0016997970378724858, "clip_ratio/low_mean": 0.0014228126583475387, "clip_ratio/low_min": 0.00011823543354694266, "clip_ratio/region_mean": 0.0031226096907630563, "epoch": 1.3779527559055118, "grad_norm": 0.11155719310045242, "learning_rate": 1e-06, "loss": 0.0091, "step": 273 }, { "clip_ratio/high_max": 0.003690662757435348, "clip_ratio/high_mean": 0.0013826434142174548, "clip_ratio/low_mean": 0.0013987527127028443, "clip_ratio/low_min": 0.00010228711289528292, "clip_ratio/region_mean": 0.0027813961132778786, "epoch": 1.3826188393117527, "grad_norm": 0.11417661607265472, "learning_rate": 1e-06, "loss": 0.0144, "step": 274 }, { "clip_ratio/high_max": 0.0029901251473347656, "clip_ratio/high_mean": 0.001377975564537337, "clip_ratio/low_mean": 0.0013107542326906696, "clip_ratio/low_min": 7.288961751328316e-05, "clip_ratio/region_mean": 0.0026887298372457735, "epoch": 1.3872849227179935, "grad_norm": 0.10374260693788528, "learning_rate": 1e-06, "loss": -0.0271, "step": 275 }, { "clip_ratio/high_max": 0.0033425524743506685, "clip_ratio/high_mean": 0.0013792945865134243, "clip_ratio/low_mean": 0.001423518766387133, "clip_ratio/low_min": 8.59089705045335e-05, "clip_ratio/region_mean": 0.002802813396556303, "epoch": 1.3919510061242344, "grad_norm": 0.11074087768793106, "learning_rate": 1e-06, "loss": 0.0088, "step": 276 }, { "clip_ratio/high_max": 0.0035491459566401318, "clip_ratio/high_mean": 0.0015569902971037664, "clip_ratio/low_mean": 0.0013285467903187964, "clip_ratio/low_min": 8.863146467774641e-05, "clip_ratio/region_mean": 0.0028855371347162873, "epoch": 1.3966170895304755, "grad_norm": 0.1087472066283226, "learning_rate": 1e-06, "loss": -0.0066, "step": 277 }, { "clip_ratio/high_max": 0.0035486265187500976, "clip_ratio/high_mean": 0.0014977077116782311, "clip_ratio/low_mean": 0.0015511686997342622, "clip_ratio/low_min": 0.0001948686931427801, "clip_ratio/region_mean": 0.0030488764023175463, "epoch": 1.4012831729367163, "grad_norm": 0.11436290293931961, "learning_rate": 1e-06, "loss": 0.0067, "step": 278 }, { "clip_ratio/high_max": 0.003244845043809619, "clip_ratio/high_mean": 0.001384616411087336, "clip_ratio/low_mean": 0.0014929241151548922, "clip_ratio/low_min": 0.00020924706313962815, "clip_ratio/region_mean": 0.0028775404862244613, "epoch": 1.4059492563429572, "grad_norm": 0.10665514320135117, "learning_rate": 1e-06, "loss": -0.0034, "step": 279 }, { "clip_ratio/high_max": 0.00314182773581706, "clip_ratio/high_mean": 0.0012556378642329946, "clip_ratio/low_mean": 0.0014716223122377414, "clip_ratio/low_min": 0.00011792662735388149, "clip_ratio/region_mean": 0.002727260216488503, "epoch": 1.410615339749198, "grad_norm": 0.11049500852823257, "learning_rate": 1e-06, "loss": 0.0452, "step": 280 }, { "clip_ratio/high_max": 0.003479857674392406, "clip_ratio/high_mean": 0.0015052516137075145, "clip_ratio/low_mean": 0.0014905521202308591, "clip_ratio/low_min": 8.315634659084026e-05, "clip_ratio/region_mean": 0.002995803763042204, "epoch": 1.415281423155439, "grad_norm": 0.11019416898488998, "learning_rate": 1e-06, "loss": -0.0343, "step": 281 }, { "clip_ratio/high_max": 0.002758573042228818, "clip_ratio/high_mean": 0.0012585490912897512, "clip_ratio/low_mean": 0.0018928376084659249, "clip_ratio/low_min": 0.00024258907433249988, "clip_ratio/region_mean": 0.0031513867143075913, "epoch": 1.4199475065616798, "grad_norm": 0.10782289505004883, "learning_rate": 1e-06, "loss": 0.0386, "step": 282 }, { "clip_ratio/high_max": 0.0035305990313645452, "clip_ratio/high_mean": 0.0013958701529190876, "clip_ratio/low_mean": 0.0015106644386833068, "clip_ratio/low_min": 0.00010343916801502928, "clip_ratio/region_mean": 0.0029065345879644156, "epoch": 1.4246135899679206, "grad_norm": 0.10185535997152328, "learning_rate": 1e-06, "loss": -0.0439, "step": 283 }, { "clip_ratio/high_max": 0.0036924673840985633, "clip_ratio/high_mean": 0.001454448327422142, "clip_ratio/low_mean": 0.0015676819421059918, "clip_ratio/low_min": 6.317899715213571e-05, "clip_ratio/region_mean": 0.003022130193130579, "epoch": 1.4292796733741615, "grad_norm": 0.1135973334312439, "learning_rate": 1e-06, "loss": -0.011, "step": 284 }, { "clip_ratio/high_max": 0.003138421798212221, "clip_ratio/high_mean": 0.001457390211726306, "clip_ratio/low_mean": 0.0014976706479501445, "clip_ratio/low_min": 7.617904338985682e-05, "clip_ratio/region_mean": 0.002955060816020705, "epoch": 1.4339457567804024, "grad_norm": 0.10585653781890869, "learning_rate": 1e-06, "loss": -0.0247, "step": 285 }, { "clip_ratio/high_max": 0.0034529809272498824, "clip_ratio/high_mean": 0.0014299384092737455, "clip_ratio/low_mean": 0.0019528168813849334, "clip_ratio/low_min": 0.00019594030891312286, "clip_ratio/region_mean": 0.0033827553124865517, "epoch": 1.4386118401866432, "grad_norm": 0.11233805119991302, "learning_rate": 1e-06, "loss": 0.0234, "step": 286 }, { "clip_ratio/high_max": 0.003996423431090079, "clip_ratio/high_mean": 0.0014924178685760126, "clip_ratio/low_mean": 0.0016111696968437172, "clip_ratio/low_min": 9.366350059281103e-05, "clip_ratio/region_mean": 0.0031035875290399417, "epoch": 1.443277923592884, "grad_norm": 0.10819321870803833, "learning_rate": 1e-06, "loss": -0.0099, "step": 287 }, { "clip_ratio/high_max": 0.003536757591064088, "clip_ratio/high_mean": 0.0015316535609599669, "clip_ratio/low_mean": 0.00167769544350449, "clip_ratio/low_min": 0.0001826616799007752, "clip_ratio/region_mean": 0.0032093490153783932, "epoch": 1.4479440069991252, "grad_norm": 0.10967979580163956, "learning_rate": 1e-06, "loss": -0.0095, "step": 288 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.026576450892857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 4063.0, "completions/mean_length": 666.6900634765625, "completions/mean_terminated_length": 573.0628662109375, "completions/min_length": 38.0, "completions/min_terminated_length": 38.0, "epoch": 1.452610090405366, "grad_norm": 0.12555056810379028, "learning_rate": 1e-06, "loss": -0.0117, "num_tokens": 93001900.0, "reward": 0.5763811469078064, "reward_std": 0.19148065149784088, "rewards/simpleverify_reward/mean": 0.5763811469078064, "rewards/simpleverify_reward/std": 0.49414870142936707, "step": 289 }, { "clip_ratio/high_max": 0.002160648495191708, "clip_ratio/high_mean": 0.0008633962243038695, "clip_ratio/low_mean": 0.0005861948720848886, "clip_ratio/low_min": 1.3397641851042863e-05, "clip_ratio/region_mean": 0.0014495910872938111, "epoch": 1.457276173811607, "grad_norm": 0.116352379322052, "learning_rate": 1e-06, "loss": 0.0111, "step": 290 }, { "clip_ratio/high_max": 0.0022229286696529016, "clip_ratio/high_mean": 0.0009015193463710602, "clip_ratio/low_mean": 0.00047310482113971375, "clip_ratio/low_min": 3.0259212508099154e-05, "clip_ratio/region_mean": 0.0013746241456829011, "epoch": 1.4619422572178478, "grad_norm": 0.119874507188797, "learning_rate": 1e-06, "loss": 0.0023, "step": 291 }, { "clip_ratio/high_max": 0.002048235764959827, "clip_ratio/high_mean": 0.000786712755143526, "clip_ratio/low_mean": 0.0006537581693919492, "clip_ratio/low_min": 4.4827045712736435e-05, "clip_ratio/region_mean": 0.0014404709399968851, "epoch": 1.4666083406240886, "grad_norm": 0.12702429294586182, "learning_rate": 1e-06, "loss": 0.0025, "step": 292 }, { "clip_ratio/high_max": 0.0021407743806776125, "clip_ratio/high_mean": 0.0008375630550290225, "clip_ratio/low_mean": 0.0006024478880135575, "clip_ratio/low_min": 1.315789450018201e-05, "clip_ratio/region_mean": 0.0014400109103007708, "epoch": 1.4712744240303295, "grad_norm": 0.12075016647577286, "learning_rate": 1e-06, "loss": 0.0081, "step": 293 }, { "clip_ratio/high_max": 0.002063258543785196, "clip_ratio/high_mean": 0.0008076634185272269, "clip_ratio/low_mean": 0.0005785654211649671, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013862288360542152, "epoch": 1.4759405074365703, "grad_norm": 0.12994447350502014, "learning_rate": 1e-06, "loss": -0.0123, "step": 294 }, { "clip_ratio/high_max": 0.001978936474188231, "clip_ratio/high_mean": 0.0008879185297701042, "clip_ratio/low_mean": 0.0006442111534852302, "clip_ratio/low_min": 5.831062844663393e-05, "clip_ratio/region_mean": 0.0015321296777983662, "epoch": 1.4806065908428114, "grad_norm": 0.1255219429731369, "learning_rate": 1e-06, "loss": -0.0137, "step": 295 }, { "clip_ratio/high_max": 0.0021538263717957307, "clip_ratio/high_mean": 0.0009058354498847621, "clip_ratio/low_mean": 0.0007116124215826858, "clip_ratio/low_min": 3.09917340928223e-05, "clip_ratio/region_mean": 0.0016174478550965432, "epoch": 1.4852726742490523, "grad_norm": 0.13035698235034943, "learning_rate": 1e-06, "loss": -0.0243, "step": 296 }, { "clip_ratio/high_max": 0.0019168691142112948, "clip_ratio/high_mean": 0.0008599165921623353, "clip_ratio/low_mean": 0.0007308219292099238, "clip_ratio/low_min": 5.262999366095755e-05, "clip_ratio/region_mean": 0.0015907385241007432, "epoch": 1.4899387576552932, "grad_norm": 0.1258716583251953, "learning_rate": 1e-06, "loss": -0.015, "step": 297 }, { "clip_ratio/high_max": 0.002627965142892208, "clip_ratio/high_mean": 0.0010332556594221387, "clip_ratio/low_mean": 0.0008474831356579671, "clip_ratio/low_min": 4.2870276956819e-05, "clip_ratio/region_mean": 0.0018807388260029256, "epoch": 1.494604841061534, "grad_norm": 0.1218964010477066, "learning_rate": 1e-06, "loss": -0.0256, "step": 298 }, { "clip_ratio/high_max": 0.0021075557669973932, "clip_ratio/high_mean": 0.000935405092604924, "clip_ratio/low_mean": 0.0008960193354141666, "clip_ratio/low_min": 6.579255659744376e-05, "clip_ratio/region_mean": 0.0018314243934582919, "epoch": 1.4992709244677749, "grad_norm": 0.13343334197998047, "learning_rate": 1e-06, "loss": -0.0053, "step": 299 }, { "clip_ratio/high_max": 0.002547697236877866, "clip_ratio/high_mean": 0.0009431301823497051, "clip_ratio/low_mean": 0.0009233878772647586, "clip_ratio/low_min": 6.67174444970442e-05, "clip_ratio/region_mean": 0.0018665180905372836, "epoch": 1.5039370078740157, "grad_norm": 0.12178276479244232, "learning_rate": 1e-06, "loss": 0.0207, "step": 300 }, { "clip_ratio/high_max": 0.002082353639707435, "clip_ratio/high_mean": 0.000841930006572511, "clip_ratio/low_mean": 0.0009710317517601652, "clip_ratio/low_min": 7.222014937724452e-05, "clip_ratio/region_mean": 0.0018129617819795385, "epoch": 1.5086030912802566, "grad_norm": 0.11422529816627502, "learning_rate": 1e-06, "loss": 0.0267, "step": 301 }, { "clip_ratio/high_max": 0.0023214292523334734, "clip_ratio/high_mean": 0.0009400309099873994, "clip_ratio/low_mean": 0.0008903673624445219, "clip_ratio/low_min": 3.713310979946982e-05, "clip_ratio/region_mean": 0.0018303982651559636, "epoch": 1.5132691746864975, "grad_norm": 0.1316445916891098, "learning_rate": 1e-06, "loss": -0.0185, "step": 302 }, { "clip_ratio/high_max": 0.0022857034055050462, "clip_ratio/high_mean": 0.0009954730066965567, "clip_ratio/low_mean": 0.0009809096591197886, "clip_ratio/low_min": 2.8668434424616862e-05, "clip_ratio/region_mean": 0.0019763826931011863, "epoch": 1.5179352580927383, "grad_norm": 0.13324519991874695, "learning_rate": 1e-06, "loss": 0.0231, "step": 303 }, { "clip_ratio/high_max": 0.002196057641413063, "clip_ratio/high_mean": 0.0010242104908684269, "clip_ratio/low_mean": 0.0010243193200949463, "clip_ratio/low_min": 0.0001708466952550225, "clip_ratio/region_mean": 0.002048529793682974, "epoch": 1.5226013414989792, "grad_norm": 0.11277655512094498, "learning_rate": 1e-06, "loss": 0.02, "step": 304 }, { "clip_ratio/high_max": 0.003713684949616436, "clip_ratio/high_mean": 0.0015837133105378598, "clip_ratio/low_mean": 0.0014507636769849341, "clip_ratio/low_min": 0.00016174334996321704, "clip_ratio/region_mean": 0.003034477005712688, "epoch": 1.52726742490522, "grad_norm": 0.10689415782690048, "learning_rate": 1e-06, "loss": -0.0123, "step": 305 }, { "clip_ratio/high_max": 0.00324887359602144, "clip_ratio/high_mean": 0.0013520644970412832, "clip_ratio/low_mean": 0.0014389029602170922, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002790967482724227, "epoch": 1.531933508311461, "grad_norm": 0.10016504675149918, "learning_rate": 1e-06, "loss": 0.0105, "step": 306 }, { "clip_ratio/high_max": 0.0031683656634413637, "clip_ratio/high_mean": 0.001272190758754732, "clip_ratio/low_mean": 0.0013830785392201506, "clip_ratio/low_min": 0.00011225256639590953, "clip_ratio/region_mean": 0.0026552692579571158, "epoch": 1.536599591717702, "grad_norm": 0.10379119217395782, "learning_rate": 1e-06, "loss": 0.0017, "step": 307 }, { "clip_ratio/high_max": 0.003237110322515946, "clip_ratio/high_mean": 0.0012915346887893975, "clip_ratio/low_mean": 0.00157935261449893, "clip_ratio/low_min": 0.0002178343311243225, "clip_ratio/region_mean": 0.002870887314202264, "epoch": 1.5412656751239429, "grad_norm": 0.11277302354574203, "learning_rate": 1e-06, "loss": 0.0018, "step": 308 }, { "clip_ratio/high_max": 0.0034774578671203926, "clip_ratio/high_mean": 0.0013901542624807917, "clip_ratio/low_mean": 0.0014009257902216632, "clip_ratio/low_min": 0.00012018540292046964, "clip_ratio/region_mean": 0.002791080085444264, "epoch": 1.5459317585301837, "grad_norm": 0.10168322175741196, "learning_rate": 1e-06, "loss": 0.0074, "step": 309 }, { "clip_ratio/high_max": 0.003713514954142738, "clip_ratio/high_mean": 0.0014563254699169192, "clip_ratio/low_mean": 0.0015838820581848267, "clip_ratio/low_min": 0.00016683366720826598, "clip_ratio/region_mean": 0.0030402075717574917, "epoch": 1.5505978419364246, "grad_norm": 0.10710260272026062, "learning_rate": 1e-06, "loss": -0.013, "step": 310 }, { "clip_ratio/high_max": 0.003839484168565832, "clip_ratio/high_mean": 0.0015137174414121546, "clip_ratio/low_mean": 0.0014364117378136143, "clip_ratio/low_min": 0.00019619084559963085, "clip_ratio/region_mean": 0.002950129215605557, "epoch": 1.5552639253426657, "grad_norm": 0.11161097139120102, "learning_rate": 1e-06, "loss": -0.0144, "step": 311 }, { "clip_ratio/high_max": 0.0034504338836995885, "clip_ratio/high_mean": 0.0015048963214212563, "clip_ratio/low_mean": 0.0013769024881185032, "clip_ratio/low_min": 7.843084495107178e-05, "clip_ratio/region_mean": 0.002881798856833484, "epoch": 1.5599300087489065, "grad_norm": 0.11442887783050537, "learning_rate": 1e-06, "loss": -0.025, "step": 312 }, { "clip_ratio/high_max": 0.003165156696923077, "clip_ratio/high_mean": 0.0015039196150610223, "clip_ratio/low_mean": 0.0015210767869575648, "clip_ratio/low_min": 0.00011679028193611884, "clip_ratio/region_mean": 0.003024996360181831, "epoch": 1.5645960921551474, "grad_norm": 0.11068805307149887, "learning_rate": 1e-06, "loss": -0.0157, "step": 313 }, { "clip_ratio/high_max": 0.004089017282240093, "clip_ratio/high_mean": 0.0017181080402224325, "clip_ratio/low_mean": 0.0015445030985574704, "clip_ratio/low_min": 8.78369937709067e-05, "clip_ratio/region_mean": 0.0032626111496938393, "epoch": 1.5692621755613883, "grad_norm": 0.10936661064624786, "learning_rate": 1e-06, "loss": -0.0263, "step": 314 }, { "clip_ratio/high_max": 0.003602085984311998, "clip_ratio/high_mean": 0.00157911660062382, "clip_ratio/low_mean": 0.0016133072713273577, "clip_ratio/low_min": 0.00020800574202439748, "clip_ratio/region_mean": 0.0031924238719511777, "epoch": 1.5739282589676291, "grad_norm": 0.11808856576681137, "learning_rate": 1e-06, "loss": -0.006, "step": 315 }, { "clip_ratio/high_max": 0.0038861082066432573, "clip_ratio/high_mean": 0.001470939714636188, "clip_ratio/low_mean": 0.0016469136389787309, "clip_ratio/low_min": 0.0001798182347556576, "clip_ratio/region_mean": 0.00311785341182258, "epoch": 1.57859434237387, "grad_norm": 0.10651401430368423, "learning_rate": 1e-06, "loss": 0.0199, "step": 316 }, { "clip_ratio/high_max": 0.002934512034698855, "clip_ratio/high_mean": 0.0013017165329074487, "clip_ratio/low_mean": 0.0015416514142998494, "clip_ratio/low_min": 0.0001656166659813607, "clip_ratio/region_mean": 0.002843367896275595, "epoch": 1.5832604257801108, "grad_norm": 0.10243922472000122, "learning_rate": 1e-06, "loss": 0.0261, "step": 317 }, { "clip_ratio/high_max": 0.003942028328310698, "clip_ratio/high_mean": 0.0016009552891773637, "clip_ratio/low_mean": 0.0016188754889299162, "clip_ratio/low_min": 7.892062967584934e-05, "clip_ratio/region_mean": 0.003219830774469301, "epoch": 1.5879265091863517, "grad_norm": 0.11441457271575928, "learning_rate": 1e-06, "loss": -0.0193, "step": 318 }, { "clip_ratio/high_max": 0.0030020347621757537, "clip_ratio/high_mean": 0.0014899942070769612, "clip_ratio/low_mean": 0.0018149302995880134, "clip_ratio/low_min": 7.857345190132037e-05, "clip_ratio/region_mean": 0.003304924597614445, "epoch": 1.5925925925925926, "grad_norm": 0.11282162368297577, "learning_rate": 1e-06, "loss": 0.0223, "step": 319 }, { "clip_ratio/high_max": 0.0033832769258879125, "clip_ratio/high_mean": 0.001501935545093147, "clip_ratio/low_mean": 0.0015683064757467946, "clip_ratio/low_min": 0.0002546941541368142, "clip_ratio/region_mean": 0.003070241968089249, "epoch": 1.5972586759988334, "grad_norm": 0.10051309317350388, "learning_rate": 1e-06, "loss": 0.0194, "step": 320 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0239955357142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 4094.0, "completions/mean_length": 651.27587890625, "completions/mean_terminated_length": 566.585693359375, "completions/min_length": 3.0, "completions/min_terminated_length": 3.0, "epoch": 1.6019247594050743, "grad_norm": 0.13548938930034637, "learning_rate": 1e-06, "loss": -0.053, "num_tokens": 102375263.0, "reward": 0.59521484375, "reward_std": 0.18863287568092346, "rewards/simpleverify_reward/mean": 0.59521484375, "rewards/simpleverify_reward/std": 0.4908675253391266, "step": 321 }, { "clip_ratio/high_max": 0.0019266981180408038, "clip_ratio/high_mean": 0.0008372876018256648, "clip_ratio/low_mean": 0.0005748561143263942, "clip_ratio/low_min": 2.7478713491291273e-05, "clip_ratio/region_mean": 0.0014121436652203556, "epoch": 1.6065908428113151, "grad_norm": 0.11016207933425903, "learning_rate": 1e-06, "loss": 0.0208, "step": 322 }, { "clip_ratio/high_max": 0.001917662113555707, "clip_ratio/high_mean": 0.0008604406539234333, "clip_ratio/low_mean": 0.0006482774142568815, "clip_ratio/low_min": 2.8280543119763024e-05, "clip_ratio/region_mean": 0.0015087180436239578, "epoch": 1.611256926217556, "grad_norm": 0.12021241337060928, "learning_rate": 1e-06, "loss": -0.0052, "step": 323 }, { "clip_ratio/high_max": 0.002288687217514962, "clip_ratio/high_mean": 0.0009081361768039642, "clip_ratio/low_mean": 0.0006733211685059359, "clip_ratio/low_min": 1.5439723938470706e-05, "clip_ratio/region_mean": 0.0015814573634997942, "epoch": 1.6159230096237969, "grad_norm": 0.1373334527015686, "learning_rate": 1e-06, "loss": 0.0179, "step": 324 }, { "clip_ratio/high_max": 0.0020306098740547895, "clip_ratio/high_mean": 0.000874390661920188, "clip_ratio/low_mean": 0.0006464388952736044, "clip_ratio/low_min": 2.6329883439757396e-05, "clip_ratio/region_mean": 0.0015208295590127818, "epoch": 1.620589093030038, "grad_norm": 0.12998969852924347, "learning_rate": 1e-06, "loss": 0.0039, "step": 325 }, { "clip_ratio/high_max": 0.0019063415165874176, "clip_ratio/high_mean": 0.000806250416644616, "clip_ratio/low_mean": 0.0007089508962963009, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015152013183978852, "epoch": 1.6252551764362788, "grad_norm": 0.11522424966096878, "learning_rate": 1e-06, "loss": -0.0083, "step": 326 }, { "clip_ratio/high_max": 0.002445299323881045, "clip_ratio/high_mean": 0.0009924290789058432, "clip_ratio/low_mean": 0.0007170979333750438, "clip_ratio/low_min": 1.678979242569767e-05, "clip_ratio/region_mean": 0.0017095269868150353, "epoch": 1.6299212598425197, "grad_norm": 0.12448902428150177, "learning_rate": 1e-06, "loss": -0.0258, "step": 327 }, { "clip_ratio/high_max": 0.0019845134956995025, "clip_ratio/high_mean": 0.0008472294539387804, "clip_ratio/low_mean": 0.0008545283617422683, "clip_ratio/low_min": 4.0416341107629705e-05, "clip_ratio/region_mean": 0.001701757857517805, "epoch": 1.6345873432487605, "grad_norm": 0.1182471439242363, "learning_rate": 1e-06, "loss": 0.0429, "step": 328 }, { "clip_ratio/high_max": 0.002270363645948237, "clip_ratio/high_mean": 0.001017747759760823, "clip_ratio/low_mean": 0.0008699028749106219, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001887650738353841, "epoch": 1.6392534266550016, "grad_norm": 0.1319698691368103, "learning_rate": 1e-06, "loss": 0.0047, "step": 329 }, { "clip_ratio/high_max": 0.0022671481056022458, "clip_ratio/high_mean": 0.0009886706284305546, "clip_ratio/low_mean": 0.0010321784575353377, "clip_ratio/low_min": 4.962312050338369e-05, "clip_ratio/region_mean": 0.0020208490968798287, "epoch": 1.6439195100612425, "grad_norm": 0.12157025188207626, "learning_rate": 1e-06, "loss": 0.0393, "step": 330 }, { "clip_ratio/high_max": 0.0026783751309267245, "clip_ratio/high_mean": 0.0010065884653158719, "clip_ratio/low_mean": 0.0008635749181848951, "clip_ratio/low_min": 6.690670670650434e-05, "clip_ratio/region_mean": 0.0018701634035096504, "epoch": 1.6485855934674833, "grad_norm": 0.11779691278934479, "learning_rate": 1e-06, "loss": -0.0096, "step": 331 }, { "clip_ratio/high_max": 0.002075159442028962, "clip_ratio/high_mean": 0.0009364311827084748, "clip_ratio/low_mean": 0.0010303963372280123, "clip_ratio/low_min": 5.960333510302007e-05, "clip_ratio/region_mean": 0.0019668275272124447, "epoch": 1.6532516768737242, "grad_norm": 0.1396941840648651, "learning_rate": 1e-06, "loss": -0.0202, "step": 332 }, { "clip_ratio/high_max": 0.0020541640915325843, "clip_ratio/high_mean": 0.0009431538928765804, "clip_ratio/low_mean": 0.0010075270402012393, "clip_ratio/low_min": 6.621025659114821e-05, "clip_ratio/region_mean": 0.001950680925801862, "epoch": 1.657917760279965, "grad_norm": 0.12700136005878448, "learning_rate": 1e-06, "loss": 0.0427, "step": 333 }, { "clip_ratio/high_max": 0.0023619765124749392, "clip_ratio/high_mean": 0.0010166736119572306, "clip_ratio/low_mean": 0.0008600363744335482, "clip_ratio/low_min": 6.924876288394444e-05, "clip_ratio/region_mean": 0.0018767099754768424, "epoch": 1.662583843686206, "grad_norm": 0.11084874719381332, "learning_rate": 1e-06, "loss": -0.0348, "step": 334 }, { "clip_ratio/high_max": 0.0026726905489340425, "clip_ratio/high_mean": 0.0010358676845498849, "clip_ratio/low_mean": 0.001002803946903441, "clip_ratio/low_min": 6.115995711297728e-05, "clip_ratio/region_mean": 0.0020386716059874743, "epoch": 1.6672499270924468, "grad_norm": 0.13239501416683197, "learning_rate": 1e-06, "loss": 0.0058, "step": 335 }, { "clip_ratio/high_max": 0.0021409119690360967, "clip_ratio/high_mean": 0.0009473096197325503, "clip_ratio/low_mean": 0.0007952069536258932, "clip_ratio/low_min": 6.538748129969463e-05, "clip_ratio/region_mean": 0.0017425165788154118, "epoch": 1.6719160104986877, "grad_norm": 0.11610430479049683, "learning_rate": 1e-06, "loss": -0.0289, "step": 336 }, { "clip_ratio/high_max": 0.0042418408338562585, "clip_ratio/high_mean": 0.0017963969439733773, "clip_ratio/low_mean": 0.0012557707959786057, "clip_ratio/low_min": 4.86381322843954e-05, "clip_ratio/region_mean": 0.0030521677326760255, "epoch": 1.6765820939049285, "grad_norm": 0.1103362962603569, "learning_rate": 1e-06, "loss": -0.0538, "step": 337 }, { "clip_ratio/high_max": 0.0030311374212033115, "clip_ratio/high_mean": 0.0012728021392831579, "clip_ratio/low_mean": 0.0012046853316860506, "clip_ratio/low_min": 5.457049519463908e-05, "clip_ratio/region_mean": 0.0024774875419097953, "epoch": 1.6812481773111694, "grad_norm": 0.10016685724258423, "learning_rate": 1e-06, "loss": 0.0202, "step": 338 }, { "clip_ratio/high_max": 0.003286378865595907, "clip_ratio/high_mean": 0.0014490114263026044, "clip_ratio/low_mean": 0.0013372733064898057, "clip_ratio/low_min": 2.015478821704164e-05, "clip_ratio/region_mean": 0.00278628476371523, "epoch": 1.6859142607174102, "grad_norm": 0.10625132918357849, "learning_rate": 1e-06, "loss": -0.0059, "step": 339 }, { "clip_ratio/high_max": 0.003658809815533459, "clip_ratio/high_mean": 0.0015765216412546579, "clip_ratio/low_mean": 0.0015578031161567196, "clip_ratio/low_min": 9.62185840762686e-05, "clip_ratio/region_mean": 0.003134324782877229, "epoch": 1.690580344123651, "grad_norm": 0.11801841109991074, "learning_rate": 1e-06, "loss": 0.0171, "step": 340 }, { "clip_ratio/high_max": 0.0037345462405937724, "clip_ratio/high_mean": 0.0014937598061806057, "clip_ratio/low_mean": 0.0013748585370194633, "clip_ratio/low_min": 0.0001134939411713276, "clip_ratio/region_mean": 0.0028686183504760265, "epoch": 1.695246427529892, "grad_norm": 0.11255044490098953, "learning_rate": 1e-06, "loss": 0.0032, "step": 341 }, { "clip_ratio/high_max": 0.0032759173554950394, "clip_ratio/high_mean": 0.001367050999760977, "clip_ratio/low_mean": 0.0015151435254665557, "clip_ratio/low_min": 0.00010566521359578473, "clip_ratio/region_mean": 0.002882194494304713, "epoch": 1.6999125109361328, "grad_norm": 0.10701259225606918, "learning_rate": 1e-06, "loss": -0.0089, "step": 342 }, { "clip_ratio/high_max": 0.0035532793699530885, "clip_ratio/high_mean": 0.001531706984678749, "clip_ratio/low_mean": 0.0013342510792426765, "clip_ratio/low_min": 7.329217260121368e-05, "clip_ratio/region_mean": 0.002865957976609934, "epoch": 1.704578594342374, "grad_norm": 0.1093229353427887, "learning_rate": 1e-06, "loss": -0.0264, "step": 343 }, { "clip_ratio/high_max": 0.0030399296010727994, "clip_ratio/high_mean": 0.0013797879546473268, "clip_ratio/low_mean": 0.0016490490961587057, "clip_ratio/low_min": 0.00018255741997563746, "clip_ratio/region_mean": 0.0030288370471680537, "epoch": 1.7092446777486148, "grad_norm": 0.10501661896705627, "learning_rate": 1e-06, "loss": 0.0422, "step": 344 }, { "clip_ratio/high_max": 0.004481605261389632, "clip_ratio/high_mean": 0.0016860480900504626, "clip_ratio/low_mean": 0.0015173658430285286, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.00320341393671697, "epoch": 1.7139107611548556, "grad_norm": 0.113813616335392, "learning_rate": 1e-06, "loss": 0.0039, "step": 345 }, { "clip_ratio/high_max": 0.0035676207771757618, "clip_ratio/high_mean": 0.0014478334014711436, "clip_ratio/low_mean": 0.0016892831117729656, "clip_ratio/low_min": 2.860411950678099e-05, "clip_ratio/region_mean": 0.003137116495054215, "epoch": 1.7185768445610965, "grad_norm": 0.10499369353055954, "learning_rate": 1e-06, "loss": 0.0386, "step": 346 }, { "clip_ratio/high_max": 0.004160481141298078, "clip_ratio/high_mean": 0.0015307986832340248, "clip_ratio/low_mean": 0.0013833694138156716, "clip_ratio/low_min": 0.0001095561747206375, "clip_ratio/region_mean": 0.002914168122515548, "epoch": 1.7232429279673376, "grad_norm": 0.10324528068304062, "learning_rate": 1e-06, "loss": -0.0102, "step": 347 }, { "clip_ratio/high_max": 0.0032277135105687194, "clip_ratio/high_mean": 0.0014614682186220307, "clip_ratio/low_mean": 0.001711125503788935, "clip_ratio/low_min": 0.0001377127618980012, "clip_ratio/region_mean": 0.0031725938024464995, "epoch": 1.7279090113735784, "grad_norm": 0.12369127571582794, "learning_rate": 1e-06, "loss": -0.0211, "step": 348 }, { "clip_ratio/high_max": 0.003204410008038394, "clip_ratio/high_mean": 0.001322816104220692, "clip_ratio/low_mean": 0.001885768029751489, "clip_ratio/low_min": 0.00017562640277901664, "clip_ratio/region_mean": 0.003208584093954414, "epoch": 1.7325750947798193, "grad_norm": 0.11398528516292572, "learning_rate": 1e-06, "loss": 0.042, "step": 349 }, { "clip_ratio/high_max": 0.0035515979543561116, "clip_ratio/high_mean": 0.0014988111070124432, "clip_ratio/low_mean": 0.0013470172561937943, "clip_ratio/low_min": 0.0001781159771780949, "clip_ratio/region_mean": 0.002845828392310068, "epoch": 1.7372411781860602, "grad_norm": 0.09687419980764389, "learning_rate": 1e-06, "loss": -0.0354, "step": 350 }, { "clip_ratio/high_max": 0.003829060879070312, "clip_ratio/high_mean": 0.0015959006650518859, "clip_ratio/low_mean": 0.0016343101306119934, "clip_ratio/low_min": 0.0002144947229680838, "clip_ratio/region_mean": 0.0032302107429131866, "epoch": 1.741907261592301, "grad_norm": 0.11488457024097443, "learning_rate": 1e-06, "loss": 0.005, "step": 351 }, { "clip_ratio/high_max": 0.003590352505852934, "clip_ratio/high_mean": 0.0015548110677627847, "clip_ratio/low_mean": 0.0013329005905688973, "clip_ratio/low_min": 0.0001250026216439437, "clip_ratio/region_mean": 0.002887711612856947, "epoch": 1.7465733449985419, "grad_norm": 0.10290053486824036, "learning_rate": 1e-06, "loss": -0.0296, "step": 352 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.030622209821428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 4075.0, "completions/mean_length": 682.173583984375, "completions/mean_terminated_length": 574.332275390625, "completions/min_length": 4.0, "completions/min_terminated_length": 4.0, "epoch": 1.7512394284047827, "grad_norm": 0.12827764451503754, "learning_rate": 1e-06, "loss": 0.003, "num_tokens": 111769247.0, "reward": 0.5813337564468384, "reward_std": 0.19285298883914948, "rewards/simpleverify_reward/mean": 0.5813336968421936, "rewards/simpleverify_reward/std": 0.49335768818855286, "step": 353 }, { "clip_ratio/high_max": 0.0020785781234735623, "clip_ratio/high_mean": 0.0008503277485942817, "clip_ratio/low_mean": 0.000629880663836957, "clip_ratio/low_min": 3.462603854131885e-05, "clip_ratio/region_mean": 0.0014802084151597228, "epoch": 1.7559055118110236, "grad_norm": 0.1269812434911728, "learning_rate": 1e-06, "loss": -0.0128, "step": 354 }, { "clip_ratio/high_max": 0.0023443043100996874, "clip_ratio/high_mean": 0.0009015752002596855, "clip_ratio/low_mean": 0.000669885932438774, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015714611363364384, "epoch": 1.7605715952172645, "grad_norm": 0.12718863785266876, "learning_rate": 1e-06, "loss": -0.0011, "step": 355 }, { "clip_ratio/high_max": 0.0019307877255414496, "clip_ratio/high_mean": 0.0008263811923825415, "clip_ratio/low_mean": 0.000640767248114571, "clip_ratio/low_min": 8.741689634916838e-05, "clip_ratio/region_mean": 0.0014671484423161019, "epoch": 1.7652376786235053, "grad_norm": 0.15040023624897003, "learning_rate": 1e-06, "loss": 0.0313, "step": 356 }, { "clip_ratio/high_max": 0.0021755788366135675, "clip_ratio/high_mean": 0.0008778421488386812, "clip_ratio/low_mean": 0.0006345813553707558, "clip_ratio/low_min": 2.4923860109993257e-05, "clip_ratio/region_mean": 0.001512423506937921, "epoch": 1.7699037620297462, "grad_norm": 0.13119052350521088, "learning_rate": 1e-06, "loss": -0.0003, "step": 357 }, { "clip_ratio/high_max": 0.0018839223594113719, "clip_ratio/high_mean": 0.0007673159125261009, "clip_ratio/low_mean": 0.000752088275476126, "clip_ratio/low_min": 8.605176117271185e-05, "clip_ratio/region_mean": 0.001519404184364248, "epoch": 1.774569845435987, "grad_norm": 0.11358306556940079, "learning_rate": 1e-06, "loss": 0.0126, "step": 358 }, { "clip_ratio/high_max": 0.002341660609090468, "clip_ratio/high_mean": 0.0010169257893721806, "clip_ratio/low_mean": 0.0007849286957934964, "clip_ratio/low_min": 1.6715699530323036e-05, "clip_ratio/region_mean": 0.0018018544724327512, "epoch": 1.779235928842228, "grad_norm": 0.127577543258667, "learning_rate": 1e-06, "loss": -0.0076, "step": 359 }, { "clip_ratio/high_max": 0.002158691844670102, "clip_ratio/high_mean": 0.0010219817959296051, "clip_ratio/low_mean": 0.0007234496533783386, "clip_ratio/low_min": 7.750570421194425e-05, "clip_ratio/region_mean": 0.0017454314074711874, "epoch": 1.7839020122484688, "grad_norm": 0.13786529004573822, "learning_rate": 1e-06, "loss": -0.0454, "step": 360 }, { "clip_ratio/high_max": 0.0020796127355424687, "clip_ratio/high_mean": 0.0008258370071416721, "clip_ratio/low_mean": 0.00102595693715557, "clip_ratio/low_min": 5.177436196390772e-05, "clip_ratio/region_mean": 0.0018517939024604857, "epoch": 1.7885680956547099, "grad_norm": 0.12394144386053085, "learning_rate": 1e-06, "loss": 0.0436, "step": 361 }, { "clip_ratio/high_max": 0.002249546305392869, "clip_ratio/high_mean": 0.0010305317664460745, "clip_ratio/low_mean": 0.00095282848997158, "clip_ratio/low_min": 0.00011355648985045264, "clip_ratio/region_mean": 0.001983360263693612, "epoch": 1.7932341790609507, "grad_norm": 0.13357269763946533, "learning_rate": 1e-06, "loss": -0.006, "step": 362 }, { "clip_ratio/high_max": 0.0024230120252468623, "clip_ratio/high_mean": 0.0009460509118071059, "clip_ratio/low_mean": 0.0008383164094993845, "clip_ratio/low_min": 4.249613903084537e-05, "clip_ratio/region_mean": 0.0017843673049355857, "epoch": 1.7979002624671916, "grad_norm": 0.12783877551555634, "learning_rate": 1e-06, "loss": -0.0056, "step": 363 }, { "clip_ratio/high_max": 0.0024057246118900366, "clip_ratio/high_mean": 0.0010915157181443647, "clip_ratio/low_mean": 0.0009038983935170108, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001995414146222174, "epoch": 1.8025663458734325, "grad_norm": 0.12972626090049744, "learning_rate": 1e-06, "loss": 0.0025, "step": 364 }, { "clip_ratio/high_max": 0.002393270769971423, "clip_ratio/high_mean": 0.0011562036888790317, "clip_ratio/low_mean": 0.0008253280907410954, "clip_ratio/low_min": 7.776207621645881e-05, "clip_ratio/region_mean": 0.0019815317864413373, "epoch": 1.8072324292796735, "grad_norm": 0.12222950160503387, "learning_rate": 1e-06, "loss": -0.0397, "step": 365 }, { "clip_ratio/high_max": 0.0026781432788993698, "clip_ratio/high_mean": 0.0011825664332718588, "clip_ratio/low_mean": 0.0006742381956428289, "clip_ratio/low_min": 1.6887328456505202e-05, "clip_ratio/region_mean": 0.0018568046471045818, "epoch": 1.8118985126859144, "grad_norm": 0.12620043754577637, "learning_rate": 1e-06, "loss": -0.063, "step": 366 }, { "clip_ratio/high_max": 0.002211867700680159, "clip_ratio/high_mean": 0.0008838869034661911, "clip_ratio/low_mean": 0.0009511059870419558, "clip_ratio/low_min": 6.131609370640945e-05, "clip_ratio/region_mean": 0.0018349928941461258, "epoch": 1.8165645960921553, "grad_norm": 0.11981648206710815, "learning_rate": 1e-06, "loss": 0.0162, "step": 367 }, { "clip_ratio/high_max": 0.0022732884463039227, "clip_ratio/high_mean": 0.000955751740548294, "clip_ratio/low_mean": 0.0010068793890241068, "clip_ratio/low_min": 6.87444771756418e-05, "clip_ratio/region_mean": 0.001962631147762295, "epoch": 1.8212306794983961, "grad_norm": 0.13691291213035583, "learning_rate": 1e-06, "loss": 0.0307, "step": 368 }, { "clip_ratio/high_max": 0.0033569572915439494, "clip_ratio/high_mean": 0.0015392535206046887, "clip_ratio/low_mean": 0.0015115887472347822, "clip_ratio/low_min": 8.52354169182945e-05, "clip_ratio/region_mean": 0.003050842191441916, "epoch": 1.825896762904637, "grad_norm": 0.10022418200969696, "learning_rate": 1e-06, "loss": 0.0023, "step": 369 }, { "clip_ratio/high_max": 0.004083825915586203, "clip_ratio/high_mean": 0.0015683019264542963, "clip_ratio/low_mean": 0.001630250411835732, "clip_ratio/low_min": 0.00016472469724249095, "clip_ratio/region_mean": 0.0031985523310140707, "epoch": 1.8305628463108778, "grad_norm": 0.11440886557102203, "learning_rate": 1e-06, "loss": -0.0136, "step": 370 }, { "clip_ratio/high_max": 0.0034999630006495863, "clip_ratio/high_mean": 0.0014020929484104272, "clip_ratio/low_mean": 0.0016902677052712534, "clip_ratio/low_min": 0.00014178428682498634, "clip_ratio/region_mean": 0.0030923606973374262, "epoch": 1.8352289297171187, "grad_norm": 0.11437682807445526, "learning_rate": 1e-06, "loss": -0.0018, "step": 371 }, { "clip_ratio/high_max": 0.0035921438175137155, "clip_ratio/high_mean": 0.0014920344583515543, "clip_ratio/low_mean": 0.001933707964781206, "clip_ratio/low_min": 0.0002751626125245821, "clip_ratio/region_mean": 0.0034257424267707393, "epoch": 1.8398950131233596, "grad_norm": 0.11441878974437714, "learning_rate": 1e-06, "loss": 0.0305, "step": 372 }, { "clip_ratio/high_max": 0.004091057162440848, "clip_ratio/high_mean": 0.0015690306317992508, "clip_ratio/low_mean": 0.0017073304352379637, "clip_ratio/low_min": 0.00025986087166529614, "clip_ratio/region_mean": 0.0032763610943220556, "epoch": 1.8445610965296004, "grad_norm": 0.11429788917303085, "learning_rate": 1e-06, "loss": -0.0012, "step": 373 }, { "clip_ratio/high_max": 0.003304840764030814, "clip_ratio/high_mean": 0.001242177508174791, "clip_ratio/low_mean": 0.001681275425653439, "clip_ratio/low_min": 0.00023983259234228171, "clip_ratio/region_mean": 0.0029234528919914737, "epoch": 1.8492271799358413, "grad_norm": 0.10144300013780594, "learning_rate": 1e-06, "loss": 0.012, "step": 374 }, { "clip_ratio/high_max": 0.003399322580662556, "clip_ratio/high_mean": 0.0015267052403942216, "clip_ratio/low_mean": 0.0016492387585458346, "clip_ratio/low_min": 7.255605669342913e-05, "clip_ratio/region_mean": 0.003175943944370374, "epoch": 1.8538932633420822, "grad_norm": 0.11459530889987946, "learning_rate": 1e-06, "loss": -0.0083, "step": 375 }, { "clip_ratio/high_max": 0.004202735843136907, "clip_ratio/high_mean": 0.0017986674974963535, "clip_ratio/low_mean": 0.0014887752040522173, "clip_ratio/low_min": 0.00024920992291299626, "clip_ratio/region_mean": 0.003287442756118253, "epoch": 1.858559346748323, "grad_norm": 0.11719462275505066, "learning_rate": 1e-06, "loss": -0.0462, "step": 376 }, { "clip_ratio/high_max": 0.003653102699900046, "clip_ratio/high_mean": 0.0014300389811978675, "clip_ratio/low_mean": 0.0019336772202223074, "clip_ratio/low_min": 0.00012109909857827006, "clip_ratio/region_mean": 0.0033637161686783656, "epoch": 1.8632254301545639, "grad_norm": 0.11154165863990784, "learning_rate": 1e-06, "loss": 0.0428, "step": 377 }, { "clip_ratio/high_max": 0.0038883319502929226, "clip_ratio/high_mean": 0.0016257368370133918, "clip_ratio/low_mean": 0.0017939439967449289, "clip_ratio/low_min": 0.0002152343495254172, "clip_ratio/region_mean": 0.003419680826482363, "epoch": 1.8678915135608047, "grad_norm": 0.10971345752477646, "learning_rate": 1e-06, "loss": -0.0068, "step": 378 }, { "clip_ratio/high_max": 0.0038878767081769183, "clip_ratio/high_mean": 0.0016080887471616734, "clip_ratio/low_mean": 0.0015735847664473113, "clip_ratio/low_min": 5.1741701099672355e-05, "clip_ratio/region_mean": 0.003181673411745578, "epoch": 1.8725575969670458, "grad_norm": 0.11119736731052399, "learning_rate": 1e-06, "loss": -0.0064, "step": 379 }, { "clip_ratio/high_max": 0.003721888417203445, "clip_ratio/high_mean": 0.001680263831076445, "clip_ratio/low_mean": 0.0016717045691621024, "clip_ratio/low_min": 6.486692564067198e-05, "clip_ratio/region_mean": 0.003351968312927056, "epoch": 1.8772236803732867, "grad_norm": 0.11024277657270432, "learning_rate": 1e-06, "loss": 0.0017, "step": 380 }, { "clip_ratio/high_max": 0.003542009784723632, "clip_ratio/high_mean": 0.0017832112353062257, "clip_ratio/low_mean": 0.0013568868071160978, "clip_ratio/low_min": 0.0001835613584262319, "clip_ratio/region_mean": 0.003140098044241313, "epoch": 1.8818897637795275, "grad_norm": 0.10931335389614105, "learning_rate": 1e-06, "loss": -0.0404, "step": 381 }, { "clip_ratio/high_max": 0.004412916503497399, "clip_ratio/high_mean": 0.001886008605652023, "clip_ratio/low_mean": 0.0012965114874532446, "clip_ratio/low_min": 1.6887328456505202e-05, "clip_ratio/region_mean": 0.0031825201731408015, "epoch": 1.8865558471857684, "grad_norm": 0.11360908299684525, "learning_rate": 1e-06, "loss": -0.0637, "step": 382 }, { "clip_ratio/high_max": 0.003960326328524388, "clip_ratio/high_mean": 0.0015011436080385465, "clip_ratio/low_mean": 0.0014402225497178733, "clip_ratio/low_min": 0.00015322612216550624, "clip_ratio/region_mean": 0.002941366139566526, "epoch": 1.8912219305920095, "grad_norm": 0.10976927727460861, "learning_rate": 1e-06, "loss": 0.0155, "step": 383 }, { "clip_ratio/high_max": 0.003886464415700175, "clip_ratio/high_mean": 0.0016368966280424502, "clip_ratio/low_mean": 0.0016564458092034329, "clip_ratio/low_min": 0.00012087353024980985, "clip_ratio/region_mean": 0.0032933423717622645, "epoch": 1.8958880139982504, "grad_norm": 0.11942066252231598, "learning_rate": 1e-06, "loss": 0.0299, "step": 384 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.030552455357142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 4095.0, "completions/mean_length": 685.3372192382812, "completions/mean_terminated_length": 577.8489990234375, "completions/min_length": 78.0, "completions/min_terminated_length": 78.0, "epoch": 2.004666083406241, "grad_norm": 0.1330665796995163, "learning_rate": 1e-06, "loss": -0.0066, "num_tokens": 121253577.0, "reward": 0.6083984375, "reward_std": 0.18795929849147797, "rewards/simpleverify_reward/mean": 0.6083984375, "rewards/simpleverify_reward/std": 0.48812538385391235, "step": 385 }, { "clip_ratio/high_max": 0.002078529607388191, "clip_ratio/high_mean": 0.000927934241190087, "clip_ratio/low_mean": 0.0004505121960391989, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001378446446324233, "epoch": 2.0093321668124817, "grad_norm": 0.12242374569177628, "learning_rate": 1e-06, "loss": -0.0558, "step": 386 }, { "clip_ratio/high_max": 0.002174368783016689, "clip_ratio/high_mean": 0.0009199488122249022, "clip_ratio/low_mean": 0.0005688992951036198, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014888480873196386, "epoch": 2.0139982502187226, "grad_norm": 0.13828489184379578, "learning_rate": 1e-06, "loss": 0.0362, "step": 387 }, { "clip_ratio/high_max": 0.002613817858218681, "clip_ratio/high_mean": 0.0010798559596878476, "clip_ratio/low_mean": 0.0005303921925587929, "clip_ratio/low_min": 3.116131301794667e-05, "clip_ratio/region_mean": 0.0016102481422421988, "epoch": 2.0186643336249634, "grad_norm": 0.12653018534183502, "learning_rate": 1e-06, "loss": -0.0728, "step": 388 }, { "clip_ratio/high_max": 0.0018697531195357442, "clip_ratio/high_mean": 0.0007625891303177923, "clip_ratio/low_mean": 0.0006168238396639936, "clip_ratio/low_min": 3.9324662793660536e-05, "clip_ratio/region_mean": 0.0013794129736197647, "epoch": 2.0233304170312043, "grad_norm": 0.11558361351490021, "learning_rate": 1e-06, "loss": -0.025, "step": 389 }, { "clip_ratio/high_max": 0.0020666335185524076, "clip_ratio/high_mean": 0.0008618993560958188, "clip_ratio/low_mean": 0.0007046818591334159, "clip_ratio/low_min": 2.6307861844543368e-05, "clip_ratio/region_mean": 0.001566581235238118, "epoch": 2.027996500437445, "grad_norm": 0.13028889894485474, "learning_rate": 1e-06, "loss": 0.012, "step": 390 }, { "clip_ratio/high_max": 0.002014424455410335, "clip_ratio/high_mean": 0.0008683326450409368, "clip_ratio/low_mean": 0.000848259442136623, "clip_ratio/low_min": 6.993009083089419e-05, "clip_ratio/region_mean": 0.0017165920871775597, "epoch": 2.032662583843686, "grad_norm": 0.17612363398075104, "learning_rate": 1e-06, "loss": 0.0136, "step": 391 }, { "clip_ratio/high_max": 0.0024020767777983565, "clip_ratio/high_mean": 0.0009875263131107204, "clip_ratio/low_mean": 0.0008998762186820386, "clip_ratio/low_min": 6.384871176123852e-05, "clip_ratio/region_mean": 0.0018874025117838755, "epoch": 2.037328667249927, "grad_norm": 0.12755659222602844, "learning_rate": 1e-06, "loss": -0.0212, "step": 392 }, { "clip_ratio/high_max": 0.0024498723360011354, "clip_ratio/high_mean": 0.0010138337456737645, "clip_ratio/low_mean": 0.0008478861273033544, "clip_ratio/low_min": 1.234689807461109e-05, "clip_ratio/region_mean": 0.0018617198584252037, "epoch": 2.041994750656168, "grad_norm": 0.11342108249664307, "learning_rate": 1e-06, "loss": 0.0033, "step": 393 }, { "clip_ratio/high_max": 0.0023018126012175344, "clip_ratio/high_mean": 0.0009311229514423758, "clip_ratio/low_mean": 0.000948539696764783, "clip_ratio/low_min": 8.847866683936445e-05, "clip_ratio/region_mean": 0.001879662690043915, "epoch": 2.046660834062409, "grad_norm": 0.12213778495788574, "learning_rate": 1e-06, "loss": -0.0179, "step": 394 }, { "clip_ratio/high_max": 0.002534204766561743, "clip_ratio/high_mean": 0.0010039549215434818, "clip_ratio/low_mean": 0.0009770175674930215, "clip_ratio/low_min": 2.0394843886606395e-05, "clip_ratio/region_mean": 0.0019809725126833655, "epoch": 2.05132691746865, "grad_norm": 0.13308873772621155, "learning_rate": 1e-06, "loss": 0.0124, "step": 395 }, { "clip_ratio/high_max": 0.002023231289058458, "clip_ratio/high_mean": 0.0008096847777778748, "clip_ratio/low_mean": 0.0011474332623038208, "clip_ratio/low_min": 0.00014910923164279666, "clip_ratio/region_mean": 0.0019571179946069606, "epoch": 2.055993000874891, "grad_norm": 0.13124310970306396, "learning_rate": 1e-06, "loss": 0.0595, "step": 396 }, { "clip_ratio/high_max": 0.002735275571467355, "clip_ratio/high_mean": 0.001185059987619752, "clip_ratio/low_mean": 0.0009641533688409254, "clip_ratio/low_min": 7.641969750693534e-05, "clip_ratio/region_mean": 0.002149213381926529, "epoch": 2.0606590842811316, "grad_norm": 0.12203901261091232, "learning_rate": 1e-06, "loss": -0.0026, "step": 397 }, { "clip_ratio/high_max": 0.0024362768526771106, "clip_ratio/high_mean": 0.0009537299538351363, "clip_ratio/low_mean": 0.0010708052013796987, "clip_ratio/low_min": 0.0001378891374770319, "clip_ratio/region_mean": 0.002024535191594623, "epoch": 2.0653251676873725, "grad_norm": 0.1257878690958023, "learning_rate": 1e-06, "loss": 0.0063, "step": 398 }, { "clip_ratio/high_max": 0.002449552484904416, "clip_ratio/high_mean": 0.0011017015058314428, "clip_ratio/low_mean": 0.0010120234292116947, "clip_ratio/low_min": 4.369905400380958e-05, "clip_ratio/region_mean": 0.0021137249132152647, "epoch": 2.0699912510936134, "grad_norm": 0.1251668483018875, "learning_rate": 1e-06, "loss": -0.0198, "step": 399 }, { "clip_ratio/high_max": 0.0021176970039959997, "clip_ratio/high_mean": 0.0010002863800764317, "clip_ratio/low_mean": 0.001083603136066813, "clip_ratio/low_min": 0.0001312460190092679, "clip_ratio/region_mean": 0.002083889441564679, "epoch": 2.0746573344998542, "grad_norm": 0.12752240896224976, "learning_rate": 1e-06, "loss": 0.0162, "step": 400 }, { "clip_ratio/high_max": 0.003637856731074862, "clip_ratio/high_mean": 0.0014870851700834464, "clip_ratio/low_mean": 0.0015769202000228688, "clip_ratio/low_min": 8.633852485218085e-05, "clip_ratio/region_mean": 0.003064005504711531, "epoch": 2.079323417906095, "grad_norm": 0.10936375707387924, "learning_rate": 1e-06, "loss": -0.0073, "step": 401 }, { "clip_ratio/high_max": 0.0035837859904859215, "clip_ratio/high_mean": 0.0015862231666687876, "clip_ratio/low_mean": 0.001181528055894887, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0027677513062371872, "epoch": 2.083989501312336, "grad_norm": 0.10769211500883102, "learning_rate": 1e-06, "loss": -0.0565, "step": 402 }, { "clip_ratio/high_max": 0.003620268478698563, "clip_ratio/high_mean": 0.0015115561182028614, "clip_ratio/low_mean": 0.001711774202703964, "clip_ratio/low_min": 5.112231519888155e-05, "clip_ratio/region_mean": 0.0032233303136308677, "epoch": 2.088655584718577, "grad_norm": 0.11903084069490433, "learning_rate": 1e-06, "loss": 0.0354, "step": 403 }, { "clip_ratio/high_max": 0.004328246366640087, "clip_ratio/high_mean": 0.001920813763717888, "clip_ratio/low_mean": 0.0013620666031783912, "clip_ratio/low_min": 0.0002128134437953122, "clip_ratio/region_mean": 0.003282880366896279, "epoch": 2.0933216681248177, "grad_norm": 0.1078646183013916, "learning_rate": 1e-06, "loss": -0.0736, "step": 404 }, { "clip_ratio/high_max": 0.003231616494304035, "clip_ratio/high_mean": 0.0014701861873618327, "clip_ratio/low_mean": 0.001407937397743808, "clip_ratio/low_min": 5.243288615019992e-05, "clip_ratio/region_mean": 0.0028781236105714925, "epoch": 2.0979877515310585, "grad_norm": 0.10333307832479477, "learning_rate": 1e-06, "loss": -0.0256, "step": 405 }, { "clip_ratio/high_max": 0.003635908695287071, "clip_ratio/high_mean": 0.0015776671170897316, "clip_ratio/low_mean": 0.001695445302175358, "clip_ratio/low_min": 0.0001422545810783049, "clip_ratio/region_mean": 0.0032731124229030684, "epoch": 2.1026538349372994, "grad_norm": 0.11389170587062836, "learning_rate": 1e-06, "loss": 0.0112, "step": 406 }, { "clip_ratio/high_max": 0.0031745763481012546, "clip_ratio/high_mean": 0.0013398703995335381, "clip_ratio/low_mean": 0.0016521694087714422, "clip_ratio/low_min": 0.00010802012184285559, "clip_ratio/region_mean": 0.0029920397282694466, "epoch": 2.1073199183435403, "grad_norm": 0.10970176011323929, "learning_rate": 1e-06, "loss": 0.0129, "step": 407 }, { "clip_ratio/high_max": 0.004052613636304159, "clip_ratio/high_mean": 0.0017313632743025664, "clip_ratio/low_mean": 0.0016591520652582403, "clip_ratio/low_min": 0.00019382750360819045, "clip_ratio/region_mean": 0.0033905152376974, "epoch": 2.111986001749781, "grad_norm": 0.11538665741682053, "learning_rate": 1e-06, "loss": -0.0219, "step": 408 }, { "clip_ratio/high_max": 0.0037640930895577185, "clip_ratio/high_mean": 0.0015785114192112815, "clip_ratio/low_mean": 0.0015149501123232767, "clip_ratio/low_min": 6.529494748974685e-05, "clip_ratio/region_mean": 0.00309346155700041, "epoch": 2.116652085156022, "grad_norm": 0.09933169931173325, "learning_rate": 1e-06, "loss": 0.0026, "step": 409 }, { "clip_ratio/high_max": 0.0036888261674903333, "clip_ratio/high_mean": 0.0015189411169558298, "clip_ratio/low_mean": 0.0016641882066323888, "clip_ratio/low_min": 0.0001844062535383273, "clip_ratio/region_mean": 0.003183129330864176, "epoch": 2.121318168562263, "grad_norm": 0.10626459866762161, "learning_rate": 1e-06, "loss": -0.0186, "step": 410 }, { "clip_ratio/high_max": 0.0037286237566149794, "clip_ratio/high_mean": 0.0016477864810440224, "clip_ratio/low_mean": 0.0017910263013618533, "clip_ratio/low_min": 0.00012334531857050024, "clip_ratio/region_mean": 0.0034388128260616213, "epoch": 2.1259842519685037, "grad_norm": 0.12006929516792297, "learning_rate": 1e-06, "loss": 0.0116, "step": 411 }, { "clip_ratio/high_max": 0.003761015868803952, "clip_ratio/high_mean": 0.0014651818901256775, "clip_ratio/low_mean": 0.002208865902503021, "clip_ratio/low_min": 0.00035455115539662074, "clip_ratio/region_mean": 0.003674047715321649, "epoch": 2.130650335374745, "grad_norm": 0.11688859760761261, "learning_rate": 1e-06, "loss": 0.0587, "step": 412 }, { "clip_ratio/high_max": 0.0043728131568059325, "clip_ratio/high_mean": 0.0017421028860553633, "clip_ratio/low_mean": 0.0016818589210743085, "clip_ratio/low_min": 0.00010463003673066851, "clip_ratio/region_mean": 0.0034239617962157354, "epoch": 2.135316418780986, "grad_norm": 0.10648229718208313, "learning_rate": 1e-06, "loss": -0.0034, "step": 413 }, { "clip_ratio/high_max": 0.003689129662234336, "clip_ratio/high_mean": 0.0015235339596983977, "clip_ratio/low_mean": 0.0017985088379646186, "clip_ratio/low_min": 0.00022967340191826224, "clip_ratio/region_mean": 0.003322042743093334, "epoch": 2.1399825021872267, "grad_norm": 0.11225876957178116, "learning_rate": 1e-06, "loss": 0.0055, "step": 414 }, { "clip_ratio/high_max": 0.003691285601234995, "clip_ratio/high_mean": 0.0017477754154242575, "clip_ratio/low_mean": 0.0017051547329174355, "clip_ratio/low_min": 0.00014654442566097714, "clip_ratio/region_mean": 0.003452930206549354, "epoch": 2.1446485855934676, "grad_norm": 0.11444263905286789, "learning_rate": 1e-06, "loss": -0.0206, "step": 415 }, { "clip_ratio/high_max": 0.003370915168488864, "clip_ratio/high_mean": 0.0015085931081557646, "clip_ratio/low_mean": 0.001830224398872815, "clip_ratio/low_min": 0.00018205205469712382, "clip_ratio/region_mean": 0.003338817463372834, "epoch": 2.1493146689997085, "grad_norm": 0.10421435534954071, "learning_rate": 1e-06, "loss": 0.0154, "step": 416 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.032017299107142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 4065.0, "completions/mean_length": 671.1072387695312, "completions/mean_terminated_length": 557.8244018554688, "completions/min_length": 90.0, "completions/min_terminated_length": 90.0, "epoch": 2.1539807524059493, "grad_norm": 0.12873737514019012, "learning_rate": 1e-06, "loss": 0.0034, "num_tokens": 130388770.0, "reward": 0.6077009439468384, "reward_std": 0.18060724437236786, "rewards/simpleverify_reward/mean": 0.6077008843421936, "rewards/simpleverify_reward/std": 0.488279789686203, "step": 417 }, { "clip_ratio/high_max": 0.0021688674460165203, "clip_ratio/high_mean": 0.0008151452038873686, "clip_ratio/low_mean": 0.0005665232865794678, "clip_ratio/low_min": 3.8096014577604365e-05, "clip_ratio/region_mean": 0.0013816685095662251, "epoch": 2.15864683581219, "grad_norm": 0.12198550999164581, "learning_rate": 1e-06, "loss": 0.0073, "step": 418 }, { "clip_ratio/high_max": 0.0020757316742674448, "clip_ratio/high_mean": 0.0009019742974487599, "clip_ratio/low_mean": 0.0005356578240025556, "clip_ratio/low_min": 1.7472742911195382e-05, "clip_ratio/region_mean": 0.0014376321269082837, "epoch": 2.163312919218431, "grad_norm": 0.13432927429676056, "learning_rate": 1e-06, "loss": 0.0151, "step": 419 }, { "clip_ratio/high_max": 0.002020560459641274, "clip_ratio/high_mean": 0.0008356838707186398, "clip_ratio/low_mean": 0.0005019247291784268, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013376086026255507, "epoch": 2.167979002624672, "grad_norm": 0.12839467823505402, "learning_rate": 1e-06, "loss": -0.0178, "step": 420 }, { "clip_ratio/high_max": 0.002041657640802441, "clip_ratio/high_mean": 0.0008013724054762861, "clip_ratio/low_mean": 0.0005724491579712776, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013738215748162474, "epoch": 2.1726450860309128, "grad_norm": 0.12452315539121628, "learning_rate": 1e-06, "loss": 0.0114, "step": 421 }, { "clip_ratio/high_max": 0.0018366385593253653, "clip_ratio/high_mean": 0.0008400008809985593, "clip_ratio/low_mean": 0.0007201819389592856, "clip_ratio/low_min": 3.400094374228502e-05, "clip_ratio/region_mean": 0.0015601828599756118, "epoch": 2.1773111694371536, "grad_norm": 0.13958384096622467, "learning_rate": 1e-06, "loss": 0.0153, "step": 422 }, { "clip_ratio/high_max": 0.002015688398387283, "clip_ratio/high_mean": 0.000833709207654465, "clip_ratio/low_mean": 0.0006359328890539473, "clip_ratio/low_min": 2.9350784643611405e-05, "clip_ratio/region_mean": 0.0014696421058033593, "epoch": 2.1819772528433945, "grad_norm": 0.14534762501716614, "learning_rate": 1e-06, "loss": 0.0098, "step": 423 }, { "clip_ratio/high_max": 0.0017657078024058137, "clip_ratio/high_mean": 0.0007911612119642086, "clip_ratio/low_mean": 0.000757917701776023, "clip_ratio/low_min": 0.00010911672507063486, "clip_ratio/region_mean": 0.0015490789191971999, "epoch": 2.1866433362496354, "grad_norm": 0.13252082467079163, "learning_rate": 1e-06, "loss": 0.0262, "step": 424 }, { "clip_ratio/high_max": 0.0022240313519432675, "clip_ratio/high_mean": 0.0009690386777947424, "clip_ratio/low_mean": 0.0006769015526515432, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001645940250455169, "epoch": 2.1913094196558762, "grad_norm": 0.1220291256904602, "learning_rate": 1e-06, "loss": -0.0257, "step": 425 }, { "clip_ratio/high_max": 0.002485303550201934, "clip_ratio/high_mean": 0.0009092185991903534, "clip_ratio/low_mean": 0.0007724371753283776, "clip_ratio/low_min": 1.3691128515347373e-05, "clip_ratio/region_mean": 0.0016816557617858052, "epoch": 2.195975503062117, "grad_norm": 0.12133394181728363, "learning_rate": 1e-06, "loss": -0.0181, "step": 426 }, { "clip_ratio/high_max": 0.0024866541862138547, "clip_ratio/high_mean": 0.00095881270863174, "clip_ratio/low_mean": 0.000666898839881469, "clip_ratio/low_min": 3.0958125535107683e-05, "clip_ratio/region_mean": 0.0016257115348707885, "epoch": 2.200641586468358, "grad_norm": 0.12553566694259644, "learning_rate": 1e-06, "loss": -0.0343, "step": 427 }, { "clip_ratio/high_max": 0.002327847312699305, "clip_ratio/high_mean": 0.0009998264904425014, "clip_ratio/low_mean": 0.0006324681171463453, "clip_ratio/low_min": 2.616725214465987e-05, "clip_ratio/region_mean": 0.001632294624869246, "epoch": 2.205307669874599, "grad_norm": 0.13535764813423157, "learning_rate": 1e-06, "loss": -0.0511, "step": 428 }, { "clip_ratio/high_max": 0.0025342936569359154, "clip_ratio/high_mean": 0.0010408837733848486, "clip_ratio/low_mean": 0.0008872068847267656, "clip_ratio/low_min": 0.00011486966286611278, "clip_ratio/region_mean": 0.0019280906490166672, "epoch": 2.20997375328084, "grad_norm": 0.13542316854000092, "learning_rate": 1e-06, "loss": -0.0035, "step": 429 }, { "clip_ratio/high_max": 0.002540831257647369, "clip_ratio/high_mean": 0.00099457749092835, "clip_ratio/low_mean": 0.000833474343380658, "clip_ratio/low_min": 8.961288676800905e-05, "clip_ratio/region_mean": 0.0018280518415849656, "epoch": 2.214639836687081, "grad_norm": 0.1258094757795334, "learning_rate": 1e-06, "loss": -0.0245, "step": 430 }, { "clip_ratio/high_max": 0.0022217575897229835, "clip_ratio/high_mean": 0.0008792716944299173, "clip_ratio/low_mean": 0.00093852799909655, "clip_ratio/low_min": 6.621166448894655e-05, "clip_ratio/region_mean": 0.0018177997117163613, "epoch": 2.219305920093322, "grad_norm": 0.1328386515378952, "learning_rate": 1e-06, "loss": -0.0048, "step": 431 }, { "clip_ratio/high_max": 0.0024301523517351598, "clip_ratio/high_mean": 0.00101914665356162, "clip_ratio/low_mean": 0.0010499246054678224, "clip_ratio/low_min": 9.598919223208213e-05, "clip_ratio/region_mean": 0.002069071306323167, "epoch": 2.2239720034995627, "grad_norm": 0.14714941382408142, "learning_rate": 1e-06, "loss": -0.0078, "step": 432 }, { "clip_ratio/high_max": 0.004045515117468312, "clip_ratio/high_mean": 0.0014495478826574981, "clip_ratio/low_mean": 0.0014891990686010104, "clip_ratio/low_min": 4.9229449359700084e-05, "clip_ratio/region_mean": 0.0029387469112407416, "epoch": 2.2286380869058036, "grad_norm": 0.1016538143157959, "learning_rate": 1e-06, "loss": 0.0027, "step": 433 }, { "clip_ratio/high_max": 0.004073299947776832, "clip_ratio/high_mean": 0.0015483728384424467, "clip_ratio/low_mean": 0.0016881891424418427, "clip_ratio/low_min": 0.0001257203439308796, "clip_ratio/region_mean": 0.0032365619990741834, "epoch": 2.2333041703120444, "grad_norm": 0.10631687194108963, "learning_rate": 1e-06, "loss": 0.0066, "step": 434 }, { "clip_ratio/high_max": 0.00381794516579248, "clip_ratio/high_mean": 0.0015347961962106638, "clip_ratio/low_mean": 0.0018125336719094776, "clip_ratio/low_min": 0.00010464232400408946, "clip_ratio/region_mean": 0.0033473298681201413, "epoch": 2.2379702537182853, "grad_norm": 0.1090458333492279, "learning_rate": 1e-06, "loss": 0.0142, "step": 435 }, { "clip_ratio/high_max": 0.0034262624685652554, "clip_ratio/high_mean": 0.001429756406650995, "clip_ratio/low_mean": 0.0015347723965533078, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0029645288159372285, "epoch": 2.242636337124526, "grad_norm": 0.10741777718067169, "learning_rate": 1e-06, "loss": -0.0186, "step": 436 }, { "clip_ratio/high_max": 0.0035252406960353255, "clip_ratio/high_mean": 0.0013253975375846494, "clip_ratio/low_mean": 0.0015984931960701942, "clip_ratio/low_min": 4.451566928764805e-05, "clip_ratio/region_mean": 0.002923890635429416, "epoch": 2.247302420530767, "grad_norm": 0.10843174159526825, "learning_rate": 1e-06, "loss": 0.0107, "step": 437 }, { "clip_ratio/high_max": 0.003338203954626806, "clip_ratio/high_mean": 0.0015138691378524527, "clip_ratio/low_mean": 0.0018626106248120777, "clip_ratio/low_min": 5.695778600056656e-05, "clip_ratio/region_mean": 0.0033764796826289967, "epoch": 2.251968503937008, "grad_norm": 0.11338115483522415, "learning_rate": 1e-06, "loss": 0.0144, "step": 438 }, { "clip_ratio/high_max": 0.0035849112027790397, "clip_ratio/high_mean": 0.0014993317763583036, "clip_ratio/low_mean": 0.0015981576616468374, "clip_ratio/low_min": 0.0002949760055344086, "clip_ratio/region_mean": 0.0030974893816164695, "epoch": 2.2566345873432487, "grad_norm": 0.1093079000711441, "learning_rate": 1e-06, "loss": 0.009, "step": 439 }, { "clip_ratio/high_max": 0.0032063937760540284, "clip_ratio/high_mean": 0.001391851870721439, "clip_ratio/low_mean": 0.001739134757372085, "clip_ratio/low_min": 0.000219652940359083, "clip_ratio/region_mean": 0.003130986624455545, "epoch": 2.2613006707494896, "grad_norm": 0.1108342856168747, "learning_rate": 1e-06, "loss": 0.0253, "step": 440 }, { "clip_ratio/high_max": 0.0036253995494917035, "clip_ratio/high_mean": 0.0015476660410058685, "clip_ratio/low_mean": 0.0014662232515547657, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0030138892616378143, "epoch": 2.2659667541557305, "grad_norm": 0.10288877040147781, "learning_rate": 1e-06, "loss": -0.0264, "step": 441 }, { "clip_ratio/high_max": 0.0037480466125998646, "clip_ratio/high_mean": 0.0016365422488888726, "clip_ratio/low_mean": 0.00156330502795754, "clip_ratio/low_min": 8.596474799560383e-05, "clip_ratio/region_mean": 0.003199847327778116, "epoch": 2.2706328375619713, "grad_norm": 0.10725315660238266, "learning_rate": 1e-06, "loss": -0.0188, "step": 442 }, { "clip_ratio/high_max": 0.004500964569160715, "clip_ratio/high_mean": 0.001705740451143356, "clip_ratio/low_mean": 0.0012819714902434498, "clip_ratio/low_min": 8.288236676889937e-05, "clip_ratio/region_mean": 0.0029877119523007423, "epoch": 2.275298920968212, "grad_norm": 0.10594787448644638, "learning_rate": 1e-06, "loss": -0.0351, "step": 443 }, { "clip_ratio/high_max": 0.004100797508726828, "clip_ratio/high_mean": 0.001740014151437208, "clip_ratio/low_mean": 0.001421985107299406, "clip_ratio/low_min": 6.356195262924302e-05, "clip_ratio/region_mean": 0.0031619991932529956, "epoch": 2.279965004374453, "grad_norm": 0.10533588379621506, "learning_rate": 1e-06, "loss": -0.0518, "step": 444 }, { "clip_ratio/high_max": 0.004400987891131081, "clip_ratio/high_mean": 0.0017435043409932405, "clip_ratio/low_mean": 0.0017768207253539003, "clip_ratio/low_min": 0.0002692307716642972, "clip_ratio/region_mean": 0.003520325044519268, "epoch": 2.284631087780694, "grad_norm": 0.11279461532831192, "learning_rate": 1e-06, "loss": -0.0043, "step": 445 }, { "clip_ratio/high_max": 0.004288588956114836, "clip_ratio/high_mean": 0.001777282464900054, "clip_ratio/low_mean": 0.001472322539484594, "clip_ratio/low_min": 0.00015669946697016712, "clip_ratio/region_mean": 0.0032496050553163514, "epoch": 2.289297171186935, "grad_norm": 0.10840941220521927, "learning_rate": 1e-06, "loss": -0.0253, "step": 446 }, { "clip_ratio/high_max": 0.004287127871066332, "clip_ratio/high_mean": 0.0015566591318929568, "clip_ratio/low_mean": 0.0017069350724341348, "clip_ratio/low_min": 9.460927503823768e-05, "clip_ratio/region_mean": 0.0032635941461194307, "epoch": 2.2939632545931756, "grad_norm": 0.10871279239654541, "learning_rate": 1e-06, "loss": -0.0057, "step": 447 }, { "clip_ratio/high_max": 0.0036741375806741416, "clip_ratio/high_mean": 0.001712800654786406, "clip_ratio/low_mean": 0.0018228745502710808, "clip_ratio/low_min": 0.00013380825657804962, "clip_ratio/region_mean": 0.003535675161401741, "epoch": 2.298629337999417, "grad_norm": 0.1089814305305481, "learning_rate": 1e-06, "loss": -0.0087, "step": 448 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.031947544642857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 4000.0, "completions/mean_length": 675.1322631835938, "completions/mean_terminated_length": 562.2372436523438, "completions/min_length": 5.0, "completions/min_terminated_length": 5.0, "epoch": 2.303295421405658, "grad_norm": 0.13560453057289124, "learning_rate": 1e-06, "loss": -0.0022, "num_tokens": 139616098.0, "reward": 0.6134207844734192, "reward_std": 0.18269461393356323, "rewards/simpleverify_reward/mean": 0.6134207844734192, "rewards/simpleverify_reward/std": 0.4869828224182129, "step": 449 }, { "clip_ratio/high_max": 0.002662798229721375, "clip_ratio/high_mean": 0.0010066003178508254, "clip_ratio/low_mean": 0.0005860599876541528, "clip_ratio/low_min": 1.711391087155789e-05, "clip_ratio/region_mean": 0.0015926602864055894, "epoch": 2.3079615048118987, "grad_norm": 0.1373986303806305, "learning_rate": 1e-06, "loss": 0.0097, "step": 450 }, { "clip_ratio/high_max": 0.0020532387716230005, "clip_ratio/high_mean": 0.0008312873451359337, "clip_ratio/low_mean": 0.0006628893934248481, "clip_ratio/low_min": 6.048646628187271e-05, "clip_ratio/region_mean": 0.0014941767585696653, "epoch": 2.3126275882181395, "grad_norm": 0.14679957926273346, "learning_rate": 1e-06, "loss": 0.0176, "step": 451 }, { "clip_ratio/high_max": 0.002346332825254649, "clip_ratio/high_mean": 0.0008792419430392329, "clip_ratio/low_mean": 0.0006438411310227821, "clip_ratio/low_min": 5.002196212444687e-05, "clip_ratio/region_mean": 0.0015230830758810043, "epoch": 2.3172936716243804, "grad_norm": 0.15684324502944946, "learning_rate": 1e-06, "loss": -0.0145, "step": 452 }, { "clip_ratio/high_max": 0.0017245245508092921, "clip_ratio/high_mean": 0.0007756291270197835, "clip_ratio/low_mean": 0.000635628761301632, "clip_ratio/low_min": 3.6554008147504646e-05, "clip_ratio/region_mean": 0.001411257868312532, "epoch": 2.3219597550306212, "grad_norm": 0.12472525238990784, "learning_rate": 1e-06, "loss": -0.0057, "step": 453 }, { "clip_ratio/high_max": 0.001852979195973603, "clip_ratio/high_mean": 0.0008053446053963853, "clip_ratio/low_mean": 0.0007463751599061652, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015517197680310346, "epoch": 2.326625838436862, "grad_norm": 0.1205039843916893, "learning_rate": 1e-06, "loss": 0.0192, "step": 454 }, { "clip_ratio/high_max": 0.0019482991265249439, "clip_ratio/high_mean": 0.0007998351611604448, "clip_ratio/low_mean": 0.0007874399507272756, "clip_ratio/low_min": 3.118762469966896e-05, "clip_ratio/region_mean": 0.001587275073688943, "epoch": 2.331291921843103, "grad_norm": 0.1338139921426773, "learning_rate": 1e-06, "loss": 0.007, "step": 455 }, { "clip_ratio/high_max": 0.002057045930996537, "clip_ratio/high_mean": 0.0008334159028891008, "clip_ratio/low_mean": 0.0008740925368329044, "clip_ratio/low_min": 9.247234447684605e-05, "clip_ratio/region_mean": 0.0017075084324460477, "epoch": 2.335958005249344, "grad_norm": 0.12611453235149384, "learning_rate": 1e-06, "loss": -0.0007, "step": 456 }, { "clip_ratio/high_max": 0.002171009764424525, "clip_ratio/high_mean": 0.0008809429327811813, "clip_ratio/low_mean": 0.0008542825289623579, "clip_ratio/low_min": 8.190461539925309e-05, "clip_ratio/region_mean": 0.0017352254217257723, "epoch": 2.3406240886555847, "grad_norm": 0.1341463327407837, "learning_rate": 1e-06, "loss": -0.0094, "step": 457 }, { "clip_ratio/high_max": 0.0023732053887215443, "clip_ratio/high_mean": 0.000965045075645321, "clip_ratio/low_mean": 0.000761050605433411, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0017260956919926684, "epoch": 2.3452901720618256, "grad_norm": 0.15129999816417694, "learning_rate": 1e-06, "loss": -0.0069, "step": 458 }, { "clip_ratio/high_max": 0.0028331815701676533, "clip_ratio/high_mean": 0.0011413144566176925, "clip_ratio/low_mean": 0.0009408555924892426, "clip_ratio/low_min": 3.997015664936043e-05, "clip_ratio/region_mean": 0.0020821700454689562, "epoch": 2.3499562554680664, "grad_norm": 0.1275661438703537, "learning_rate": 1e-06, "loss": -0.0079, "step": 459 }, { "clip_ratio/high_max": 0.0024318585856235586, "clip_ratio/high_mean": 0.0009451883579458809, "clip_ratio/low_mean": 0.0007190724145402783, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0016642607661196962, "epoch": 2.3546223388743073, "grad_norm": 0.11729730665683746, "learning_rate": 1e-06, "loss": -0.0198, "step": 460 }, { "clip_ratio/high_max": 0.002341119004995562, "clip_ratio/high_mean": 0.0009724948085931828, "clip_ratio/low_mean": 0.0007580633891848265, "clip_ratio/low_min": 2.6749410608317703e-05, "clip_ratio/region_mean": 0.001730558229610324, "epoch": 2.359288422280548, "grad_norm": 0.13335779309272766, "learning_rate": 1e-06, "loss": 0.0096, "step": 461 }, { "clip_ratio/high_max": 0.0022573262394871563, "clip_ratio/high_mean": 0.0008976729377536685, "clip_ratio/low_mean": 0.0009814268378249835, "clip_ratio/low_min": 3.0986700039647985e-05, "clip_ratio/region_mean": 0.0018790997346513905, "epoch": 2.363954505686789, "grad_norm": 0.12357683479785919, "learning_rate": 1e-06, "loss": 0.0343, "step": 462 }, { "clip_ratio/high_max": 0.0033517487318022177, "clip_ratio/high_mean": 0.0013386930950218812, "clip_ratio/low_mean": 0.0008741458204895025, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0022128388955025002, "epoch": 2.36862058909303, "grad_norm": 0.13469386100769043, "learning_rate": 1e-06, "loss": -0.0441, "step": 463 }, { "clip_ratio/high_max": 0.0025744532686076127, "clip_ratio/high_mean": 0.001144655772804981, "clip_ratio/low_mean": 0.0009500453143118648, "clip_ratio/low_min": 4.818975867237896e-05, "clip_ratio/region_mean": 0.002094701078021899, "epoch": 2.3732866724992707, "grad_norm": 0.12226388603448868, "learning_rate": 1e-06, "loss": -0.0329, "step": 464 }, { "clip_ratio/high_max": 0.004671756469178945, "clip_ratio/high_mean": 0.001799834550183732, "clip_ratio/low_mean": 0.001565938462590566, "clip_ratio/low_min": 2.939793012046721e-05, "clip_ratio/region_mean": 0.003365772936376743, "epoch": 2.377952755905512, "grad_norm": 0.10790296643972397, "learning_rate": 1e-06, "loss": -0.003, "step": 465 }, { "clip_ratio/high_max": 0.00415375752345426, "clip_ratio/high_mean": 0.0016365406627301127, "clip_ratio/low_mean": 0.0017844852081907447, "clip_ratio/low_min": 6.052293065295089e-05, "clip_ratio/region_mean": 0.003421025918214582, "epoch": 2.382618839311753, "grad_norm": 0.11333378404378891, "learning_rate": 1e-06, "loss": 0.0088, "step": 466 }, { "clip_ratio/high_max": 0.0034713881759671494, "clip_ratio/high_mean": 0.0015430484563694336, "clip_ratio/low_mean": 0.0017231767596967984, "clip_ratio/low_min": 9.146076445176732e-05, "clip_ratio/region_mean": 0.003266225176048465, "epoch": 2.3872849227179938, "grad_norm": 0.12146488577127457, "learning_rate": 1e-06, "loss": 0.0167, "step": 467 }, { "clip_ratio/high_max": 0.0036573573743226007, "clip_ratio/high_mean": 0.0015316211720346473, "clip_ratio/low_mean": 0.001459345188777661, "clip_ratio/low_min": 8.820857601676835e-05, "clip_ratio/region_mean": 0.002990966459037736, "epoch": 2.3919510061242346, "grad_norm": 0.10335852950811386, "learning_rate": 1e-06, "loss": -0.0153, "step": 468 }, { "clip_ratio/high_max": 0.0034409155559842475, "clip_ratio/high_mean": 0.0014419471262954175, "clip_ratio/low_mean": 0.0014170799549901858, "clip_ratio/low_min": 0.00013716963712795405, "clip_ratio/region_mean": 0.002859027103113476, "epoch": 2.3966170895304755, "grad_norm": 0.10907674580812454, "learning_rate": 1e-06, "loss": -0.0065, "step": 469 }, { "clip_ratio/high_max": 0.003288904183136765, "clip_ratio/high_mean": 0.0013959249918116257, "clip_ratio/low_mean": 0.0015320188313125982, "clip_ratio/low_min": 1.6855447029229254e-05, "clip_ratio/region_mean": 0.002927943831309676, "epoch": 2.4012831729367163, "grad_norm": 0.10341474413871765, "learning_rate": 1e-06, "loss": 0.0184, "step": 470 }, { "clip_ratio/high_max": 0.0037991502176737413, "clip_ratio/high_mean": 0.0015618669840478105, "clip_ratio/low_mean": 0.0015101758908713236, "clip_ratio/low_min": 0.00010493427816982148, "clip_ratio/region_mean": 0.0030720428985659964, "epoch": 2.405949256342957, "grad_norm": 0.11208900064229965, "learning_rate": 1e-06, "loss": 0.0062, "step": 471 }, { "clip_ratio/high_max": 0.003399367029487621, "clip_ratio/high_mean": 0.0014939034008421004, "clip_ratio/low_mean": 0.0016324436874128878, "clip_ratio/low_min": 0.00018419170555716846, "clip_ratio/region_mean": 0.003126347146462649, "epoch": 2.410615339749198, "grad_norm": 0.10840333253145218, "learning_rate": 1e-06, "loss": -0.0015, "step": 472 }, { "clip_ratio/high_max": 0.003826958723948337, "clip_ratio/high_mean": 0.0016156801648321562, "clip_ratio/low_mean": 0.0017929284913407173, "clip_ratio/low_min": 0.00014283613563748077, "clip_ratio/region_mean": 0.0034086086525348946, "epoch": 2.415281423155439, "grad_norm": 0.11251238733530045, "learning_rate": 1e-06, "loss": -0.0103, "step": 473 }, { "clip_ratio/high_max": 0.00387181145924842, "clip_ratio/high_mean": 0.0015990150968718808, "clip_ratio/low_mean": 0.0015509948789258488, "clip_ratio/low_min": 1.8876471585826948e-05, "clip_ratio/region_mean": 0.0031500099430559203, "epoch": 2.41994750656168, "grad_norm": 0.10769617557525635, "learning_rate": 1e-06, "loss": -0.0077, "step": 474 }, { "clip_ratio/high_max": 0.004455184505786747, "clip_ratio/high_mean": 0.001851320979767479, "clip_ratio/low_mean": 0.0019092342117801309, "clip_ratio/low_min": 5.718206739402376e-05, "clip_ratio/region_mean": 0.003760555264307186, "epoch": 2.4246135899679206, "grad_norm": 0.11215966194868088, "learning_rate": 1e-06, "loss": -0.0088, "step": 475 }, { "clip_ratio/high_max": 0.003923103271517903, "clip_ratio/high_mean": 0.0016443180502392352, "clip_ratio/low_mean": 0.0014249700361688156, "clip_ratio/low_min": 0.00012167021486675367, "clip_ratio/region_mean": 0.003069288097321987, "epoch": 2.4292796733741615, "grad_norm": 0.10074469447135925, "learning_rate": 1e-06, "loss": -0.0205, "step": 476 }, { "clip_ratio/high_max": 0.004374944735900499, "clip_ratio/high_mean": 0.0017573763761902228, "clip_ratio/low_mean": 0.0016272440589091275, "clip_ratio/low_min": 6.687353015877306e-05, "clip_ratio/region_mean": 0.0033846204460132867, "epoch": 2.4339457567804024, "grad_norm": 0.1102832704782486, "learning_rate": 1e-06, "loss": 0.0088, "step": 477 }, { "clip_ratio/high_max": 0.00414867246945505, "clip_ratio/high_mean": 0.0014914937546564033, "clip_ratio/low_mean": 0.0018508052453398705, "clip_ratio/low_min": 8.579262976127211e-05, "clip_ratio/region_mean": 0.0033422989363316447, "epoch": 2.4386118401866432, "grad_norm": 0.10344887524843216, "learning_rate": 1e-06, "loss": 0.0335, "step": 478 }, { "clip_ratio/high_max": 0.005057324873632751, "clip_ratio/high_mean": 0.002148227460565977, "clip_ratio/low_mean": 0.0016917338070925325, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0038399612676585093, "epoch": 2.443277923592884, "grad_norm": 0.11644785106182098, "learning_rate": 1e-06, "loss": -0.0449, "step": 479 }, { "clip_ratio/high_max": 0.004269847355317324, "clip_ratio/high_mean": 0.0018749643277260475, "clip_ratio/low_mean": 0.0016782127677288372, "clip_ratio/low_min": 0.0001598740491317585, "clip_ratio/region_mean": 0.003553177055437118, "epoch": 2.447944006999125, "grad_norm": 0.10135716199874878, "learning_rate": 1e-06, "loss": -0.0337, "step": 480 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0374581473214286, "completions/max_length": 4096.0, "completions/max_terminated_length": 4092.0, "completions/mean_length": 705.0792846679688, "completions/mean_terminated_length": 573.1185913085938, "completions/min_length": 31.0, "completions/min_terminated_length": 31.0, "epoch": 2.452610090405366, "grad_norm": 0.12852303683757782, "learning_rate": 1e-06, "loss": 0.0317, "num_tokens": 148976402.0, "reward": 0.607979953289032, "reward_std": 0.18433348834514618, "rewards/simpleverify_reward/mean": 0.6079798936843872, "rewards/simpleverify_reward/std": 0.48821812868118286, "step": 481 }, { "clip_ratio/high_max": 0.0021084692125441507, "clip_ratio/high_mean": 0.0008005121344467625, "clip_ratio/low_mean": 0.000607643460170948, "clip_ratio/low_min": 2.724874775594799e-05, "clip_ratio/region_mean": 0.0014081555455049966, "epoch": 2.457276173811607, "grad_norm": 0.1232134997844696, "learning_rate": 1e-06, "loss": -0.0181, "step": 482 }, { "clip_ratio/high_max": 0.0019168363505741581, "clip_ratio/high_mean": 0.0008189441050490132, "clip_ratio/low_mean": 0.0006217357731657103, "clip_ratio/low_min": 3.1742001738166437e-05, "clip_ratio/region_mean": 0.001440679894585628, "epoch": 2.4619422572178475, "grad_norm": 0.1317141354084015, "learning_rate": 1e-06, "loss": -0.0012, "step": 483 }, { "clip_ratio/high_max": 0.0020278677548049018, "clip_ratio/high_mean": 0.0008180289787560469, "clip_ratio/low_mean": 0.0006180010841490002, "clip_ratio/low_min": 5.0158341764472425e-05, "clip_ratio/region_mean": 0.0014360300338012166, "epoch": 2.466608340624089, "grad_norm": 0.12922532856464386, "learning_rate": 1e-06, "loss": -0.0125, "step": 484 }, { "clip_ratio/high_max": 0.0019680234327097423, "clip_ratio/high_mean": 0.0007916820950413239, "clip_ratio/low_mean": 0.0007306614625122165, "clip_ratio/low_min": 0.00012786738079739735, "clip_ratio/region_mean": 0.0015223435693769716, "epoch": 2.4712744240303297, "grad_norm": 0.14038164913654327, "learning_rate": 1e-06, "loss": 0.0214, "step": 485 }, { "clip_ratio/high_max": 0.0025270534606534056, "clip_ratio/high_mean": 0.0010848390593309887, "clip_ratio/low_mean": 0.0007770573247398715, "clip_ratio/low_min": 6.573472091986332e-05, "clip_ratio/region_mean": 0.0018618964386405423, "epoch": 2.4759405074365706, "grad_norm": 0.13785941898822784, "learning_rate": 1e-06, "loss": -0.0327, "step": 486 }, { "clip_ratio/high_max": 0.0022735283491783775, "clip_ratio/high_mean": 0.0009832481373450719, "clip_ratio/low_mean": 0.0008201137879950693, "clip_ratio/low_min": 1.4599392670788802e-05, "clip_ratio/region_mean": 0.001803361956262961, "epoch": 2.4806065908428114, "grad_norm": 0.13082294166088104, "learning_rate": 1e-06, "loss": -0.0369, "step": 487 }, { "clip_ratio/high_max": 0.0020144440277363174, "clip_ratio/high_mean": 0.0008189416512323078, "clip_ratio/low_mean": 0.0007431305002683075, "clip_ratio/low_min": 1.236399566550972e-05, "clip_ratio/region_mean": 0.0015620721314917319, "epoch": 2.4852726742490523, "grad_norm": 0.11946125328540802, "learning_rate": 1e-06, "loss": -0.025, "step": 488 }, { "clip_ratio/high_max": 0.0023797392132109962, "clip_ratio/high_mean": 0.0009919476651703008, "clip_ratio/low_mean": 0.0008940879906731425, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0018860356649383903, "epoch": 2.489938757655293, "grad_norm": 0.13558228313922882, "learning_rate": 1e-06, "loss": -0.0014, "step": 489 }, { "clip_ratio/high_max": 0.0021597225568257272, "clip_ratio/high_mean": 0.0009490915035712533, "clip_ratio/low_mean": 0.0008577941516705323, "clip_ratio/low_min": 4.657228055293672e-05, "clip_ratio/region_mean": 0.0018068856588797644, "epoch": 2.494604841061534, "grad_norm": 0.1347866654396057, "learning_rate": 1e-06, "loss": -0.0099, "step": 490 }, { "clip_ratio/high_max": 0.002171225831261836, "clip_ratio/high_mean": 0.0008730764202482533, "clip_ratio/low_mean": 0.0010358804611314554, "clip_ratio/low_min": 6.224335811566561e-05, "clip_ratio/region_mean": 0.0019089569250354543, "epoch": 2.499270924467775, "grad_norm": 0.14114463329315186, "learning_rate": 1e-06, "loss": 0.022, "step": 491 }, { "clip_ratio/high_max": 0.0021842748901690356, "clip_ratio/high_mean": 0.0008533395957783796, "clip_ratio/low_mean": 0.0008676701436343137, "clip_ratio/low_min": 3.713897876878036e-05, "clip_ratio/region_mean": 0.0017210097139468417, "epoch": 2.5039370078740157, "grad_norm": 0.12093949317932129, "learning_rate": 1e-06, "loss": -0.0087, "step": 492 }, { "clip_ratio/high_max": 0.0028033791968482547, "clip_ratio/high_mean": 0.0011462533238955075, "clip_ratio/low_mean": 0.000855478972880519, "clip_ratio/low_min": 9.531225532555254e-05, "clip_ratio/region_mean": 0.002001732304051984, "epoch": 2.5086030912802566, "grad_norm": 0.12668496370315552, "learning_rate": 1e-06, "loss": -0.0114, "step": 493 }, { "clip_ratio/high_max": 0.0021759943701908924, "clip_ratio/high_mean": 0.0009915331083902856, "clip_ratio/low_mean": 0.0009056482849700842, "clip_ratio/low_min": 4.5057275201543234e-05, "clip_ratio/region_mean": 0.001897181380627444, "epoch": 2.5132691746864975, "grad_norm": 0.13443271815776825, "learning_rate": 1e-06, "loss": 0.0273, "step": 494 }, { "clip_ratio/high_max": 0.002347228000871837, "clip_ratio/high_mean": 0.000997881365037756, "clip_ratio/low_mean": 0.0008956998663052218, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0018935812258860096, "epoch": 2.5179352580927383, "grad_norm": 0.12433648854494095, "learning_rate": 1e-06, "loss": -0.0449, "step": 495 }, { "clip_ratio/high_max": 0.0024005414088605903, "clip_ratio/high_mean": 0.0009374722117172496, "clip_ratio/low_mean": 0.0009103637257794617, "clip_ratio/low_min": 3.594019653974101e-05, "clip_ratio/region_mean": 0.00184783587246784, "epoch": 2.522601341498979, "grad_norm": 0.1413072794675827, "learning_rate": 1e-06, "loss": 0.0159, "step": 496 }, { "clip_ratio/high_max": 0.003674121995572932, "clip_ratio/high_mean": 0.0014186388725647703, "clip_ratio/low_mean": 0.0017598874073883053, "clip_ratio/low_min": 8.525672819814645e-05, "clip_ratio/region_mean": 0.0031785261671757326, "epoch": 2.52726742490522, "grad_norm": 0.09868941456079483, "learning_rate": 1e-06, "loss": 0.0311, "step": 497 }, { "clip_ratio/high_max": 0.003550434521457646, "clip_ratio/high_mean": 0.0014151476934785023, "clip_ratio/low_mean": 0.0015810614640940912, "clip_ratio/low_min": 0.00021174636276555248, "clip_ratio/region_mean": 0.0029962091794004664, "epoch": 2.531933508311461, "grad_norm": 0.10350857675075531, "learning_rate": 1e-06, "loss": -0.0189, "step": 498 }, { "clip_ratio/high_max": 0.003506194698275067, "clip_ratio/high_mean": 0.0014947397576179355, "clip_ratio/low_mean": 0.0016258524665317964, "clip_ratio/low_min": 0.00010182085861742962, "clip_ratio/region_mean": 0.0031205921550281346, "epoch": 2.536599591717702, "grad_norm": 0.11277589946985245, "learning_rate": 1e-06, "loss": -0.002, "step": 499 }, { "clip_ratio/high_max": 0.003773927062866278, "clip_ratio/high_mean": 0.001571871300257044, "clip_ratio/low_mean": 0.0017267957155127078, "clip_ratio/low_min": 7.919558538560523e-05, "clip_ratio/region_mean": 0.003298667012131773, "epoch": 2.5412656751239426, "grad_norm": 0.11263630539178848, "learning_rate": 1e-06, "loss": -0.0133, "step": 500 }, { "clip_ratio/high_max": 0.0039356502165901475, "clip_ratio/high_mean": 0.0015397098522953456, "clip_ratio/low_mean": 0.0017598910235392395, "clip_ratio/low_min": 0.0002002167202590499, "clip_ratio/region_mean": 0.0032996008230838925, "epoch": 2.545931758530184, "grad_norm": 0.11384733021259308, "learning_rate": 1e-06, "loss": 0.0205, "step": 501 }, { "clip_ratio/high_max": 0.004131684509047773, "clip_ratio/high_mean": 0.0018422975626890548, "clip_ratio/low_mean": 0.0018192712050222326, "clip_ratio/low_min": 0.00019052830793953035, "clip_ratio/region_mean": 0.003661568815005012, "epoch": 2.5505978419364244, "grad_norm": 0.11411489546298981, "learning_rate": 1e-06, "loss": -0.0337, "step": 502 }, { "clip_ratio/high_max": 0.00433524209074676, "clip_ratio/high_mean": 0.0018117061408702284, "clip_ratio/low_mean": 0.0017565114831086248, "clip_ratio/low_min": 4.3798176193377e-05, "clip_ratio/region_mean": 0.0035682175657711923, "epoch": 2.5552639253426657, "grad_norm": 0.11059143394231796, "learning_rate": 1e-06, "loss": -0.0377, "step": 503 }, { "clip_ratio/high_max": 0.003451166870945599, "clip_ratio/high_mean": 0.0014772715949220583, "clip_ratio/low_mean": 0.0016109920143208, "clip_ratio/low_min": 3.9388687582686543e-05, "clip_ratio/region_mean": 0.0030882636492606252, "epoch": 2.5599300087489065, "grad_norm": 0.10452582687139511, "learning_rate": 1e-06, "loss": -0.0258, "step": 504 }, { "clip_ratio/high_max": 0.004215137523715384, "clip_ratio/high_mean": 0.00173685846675653, "clip_ratio/low_mean": 0.0019297957078379113, "clip_ratio/low_min": 0.00016768705609138124, "clip_ratio/region_mean": 0.003666654141852632, "epoch": 2.5645960921551474, "grad_norm": 0.11398331820964813, "learning_rate": 1e-06, "loss": -0.0022, "step": 505 }, { "clip_ratio/high_max": 0.0038633923686575145, "clip_ratio/high_mean": 0.0016690843985998072, "clip_ratio/low_mean": 0.0017599783823243342, "clip_ratio/low_min": 7.389907659671735e-05, "clip_ratio/region_mean": 0.003429062824579887, "epoch": 2.5692621755613883, "grad_norm": 0.11204434931278229, "learning_rate": 1e-06, "loss": -0.0108, "step": 506 }, { "clip_ratio/high_max": 0.0037272492627380416, "clip_ratio/high_mean": 0.0015997838127077557, "clip_ratio/low_mean": 0.001997105748159811, "clip_ratio/low_min": 0.00018258004638482817, "clip_ratio/region_mean": 0.0035968895390396938, "epoch": 2.573928258967629, "grad_norm": 0.11457454413175583, "learning_rate": 1e-06, "loss": 0.0211, "step": 507 }, { "clip_ratio/high_max": 0.0034826722694560885, "clip_ratio/high_mean": 0.0014327403987408616, "clip_ratio/low_mean": 0.0015533028235950042, "clip_ratio/low_min": 0.00014523957179335412, "clip_ratio/region_mean": 0.0029860432114219293, "epoch": 2.57859434237387, "grad_norm": 0.1013416275382042, "learning_rate": 1e-06, "loss": -0.0094, "step": 508 }, { "clip_ratio/high_max": 0.003945011740142945, "clip_ratio/high_mean": 0.0017254103004233912, "clip_ratio/low_mean": 0.0017066509208234493, "clip_ratio/low_min": 0.0002684723103811848, "clip_ratio/region_mean": 0.0034320610866416246, "epoch": 2.583260425780111, "grad_norm": 0.10854203253984451, "learning_rate": 1e-06, "loss": -0.0122, "step": 509 }, { "clip_ratio/high_max": 0.003653533356555272, "clip_ratio/high_mean": 0.001625843327929033, "clip_ratio/low_mean": 0.0018112894831574522, "clip_ratio/low_min": 0.00012891844380646944, "clip_ratio/region_mean": 0.003437132829276379, "epoch": 2.5879265091863517, "grad_norm": 0.10890080779790878, "learning_rate": 1e-06, "loss": 0.0265, "step": 510 }, { "clip_ratio/high_max": 0.004187806960544549, "clip_ratio/high_mean": 0.0017740552866598591, "clip_ratio/low_mean": 0.0014635746265412308, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0032376299204770476, "epoch": 2.5925925925925926, "grad_norm": 0.1078527644276619, "learning_rate": 1e-06, "loss": -0.0456, "step": 511 }, { "clip_ratio/high_max": 0.003899440009263344, "clip_ratio/high_mean": 0.0015392374043585733, "clip_ratio/low_mean": 0.0017809617420425639, "clip_ratio/low_min": 8.25409370008856e-05, "clip_ratio/region_mean": 0.0033201991391251795, "epoch": 2.5972586759988334, "grad_norm": 0.10906153172254562, "learning_rate": 1e-06, "loss": 0.0151, "step": 512 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0350864955357143, "completions/max_length": 4096.0, "completions/max_terminated_length": 4094.0, "completions/mean_length": 693.477294921875, "completions/mean_terminated_length": 569.7536010742188, "completions/min_length": 4.0, "completions/min_terminated_length": 4.0, "epoch": 2.6019247594050743, "grad_norm": 0.1475260853767395, "learning_rate": 1e-06, "loss": -0.0097, "num_tokens": 158284748.0, "reward": 0.6111189126968384, "reward_std": 0.18402212858200073, "rewards/simpleverify_reward/mean": 0.6111188530921936, "rewards/simpleverify_reward/std": 0.4875132739543915, "step": 513 }, { "clip_ratio/high_max": 0.0019652151349873748, "clip_ratio/high_mean": 0.0008320316374010872, "clip_ratio/low_mean": 0.0005833359300595475, "clip_ratio/low_min": 1.072593113349285e-05, "clip_ratio/region_mean": 0.0014153675183479208, "epoch": 2.606590842811315, "grad_norm": 0.11962283402681351, "learning_rate": 1e-06, "loss": 0.0187, "step": 514 }, { "clip_ratio/high_max": 0.002113870516041061, "clip_ratio/high_mean": 0.0009243134263670072, "clip_ratio/low_mean": 0.0005054532375652343, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014297666857601143, "epoch": 2.611256926217556, "grad_norm": 0.12318944185972214, "learning_rate": 1e-06, "loss": -0.0669, "step": 515 }, { "clip_ratio/high_max": 0.0018610406150401104, "clip_ratio/high_mean": 0.0007785976849845611, "clip_ratio/low_mean": 0.0006265361207624665, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001405133807566017, "epoch": 2.615923009623797, "grad_norm": 0.12739652395248413, "learning_rate": 1e-06, "loss": 0.0371, "step": 516 }, { "clip_ratio/high_max": 0.00208322043181397, "clip_ratio/high_mean": 0.0009511413118161727, "clip_ratio/low_mean": 0.0006188477600517217, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015699891009717248, "epoch": 2.6205890930300377, "grad_norm": 0.13826248049736023, "learning_rate": 1e-06, "loss": -0.0026, "step": 517 }, { "clip_ratio/high_max": 0.0019689827895490453, "clip_ratio/high_mean": 0.000817127724076272, "clip_ratio/low_mean": 0.0005973431498205173, "clip_ratio/low_min": 1.781641913112253e-05, "clip_ratio/region_mean": 0.0014144708584353793, "epoch": 2.625255176436279, "grad_norm": 0.1283932477235794, "learning_rate": 1e-06, "loss": -0.0237, "step": 518 }, { "clip_ratio/high_max": 0.0023613442754140124, "clip_ratio/high_mean": 0.0009085352576221339, "clip_ratio/low_mean": 0.0007759288200759329, "clip_ratio/low_min": 4.847200125368545e-05, "clip_ratio/region_mean": 0.0016844640485942364, "epoch": 2.6299212598425195, "grad_norm": 0.13844038546085358, "learning_rate": 1e-06, "loss": 0.0214, "step": 519 }, { "clip_ratio/high_max": 0.002220612019300461, "clip_ratio/high_mean": 0.00092753521312261, "clip_ratio/low_mean": 0.0007337414181165514, "clip_ratio/low_min": 5.5997400522755925e-05, "clip_ratio/region_mean": 0.0016612766194157302, "epoch": 2.6345873432487608, "grad_norm": 0.13712015748023987, "learning_rate": 1e-06, "loss": 0.0096, "step": 520 }, { "clip_ratio/high_max": 0.0021743183315265924, "clip_ratio/high_mean": 0.0008042732570174849, "clip_ratio/low_mean": 0.0007825538923498243, "clip_ratio/low_min": 3.0854040232952684e-05, "clip_ratio/region_mean": 0.0015868271730141714, "epoch": 2.6392534266550016, "grad_norm": 0.126194030046463, "learning_rate": 1e-06, "loss": -0.0213, "step": 521 }, { "clip_ratio/high_max": 0.002162649812817108, "clip_ratio/high_mean": 0.0008732019650778966, "clip_ratio/low_mean": 0.0007293393955478678, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0016025413715397008, "epoch": 2.6439195100612425, "grad_norm": 0.11985906213521957, "learning_rate": 1e-06, "loss": -0.0225, "step": 522 }, { "clip_ratio/high_max": 0.0025311114732176065, "clip_ratio/high_mean": 0.0009471539833612042, "clip_ratio/low_mean": 0.0009047878102137474, "clip_ratio/low_min": 1.5031265320430975e-05, "clip_ratio/region_mean": 0.0018519418081268668, "epoch": 2.6485855934674833, "grad_norm": 0.12939739227294922, "learning_rate": 1e-06, "loss": 0.007, "step": 523 }, { "clip_ratio/high_max": 0.0025607263451092876, "clip_ratio/high_mean": 0.0010655549303919543, "clip_ratio/low_mean": 0.0008945987319748383, "clip_ratio/low_min": 1.4501159967039712e-05, "clip_ratio/region_mean": 0.0019601537205744535, "epoch": 2.653251676873724, "grad_norm": 0.12821923196315765, "learning_rate": 1e-06, "loss": -0.0148, "step": 524 }, { "clip_ratio/high_max": 0.0021615474252030253, "clip_ratio/high_mean": 0.0010168604494538158, "clip_ratio/low_mean": 0.0007930749343358912, "clip_ratio/low_min": 4.620494382834295e-05, "clip_ratio/region_mean": 0.001809935401979601, "epoch": 2.657917760279965, "grad_norm": 0.13569261133670807, "learning_rate": 1e-06, "loss": -0.0196, "step": 525 }, { "clip_ratio/high_max": 0.0020904801494907588, "clip_ratio/high_mean": 0.0008589704539190279, "clip_ratio/low_mean": 0.000916332444830914, "clip_ratio/low_min": 3.3943462767638266e-05, "clip_ratio/region_mean": 0.0017753029314917512, "epoch": 2.662583843686206, "grad_norm": 0.11915679275989532, "learning_rate": 1e-06, "loss": 0.0151, "step": 526 }, { "clip_ratio/high_max": 0.002438196010189131, "clip_ratio/high_mean": 0.0009277621356886812, "clip_ratio/low_mean": 0.0010768672018457437, "clip_ratio/low_min": 0.00012085863363608951, "clip_ratio/region_mean": 0.002004629379371181, "epoch": 2.667249927092447, "grad_norm": 0.1261160969734192, "learning_rate": 1e-06, "loss": 0.013, "step": 527 }, { "clip_ratio/high_max": 0.002577171726443339, "clip_ratio/high_mean": 0.0011147954428452067, "clip_ratio/low_mean": 0.0009820488085097168, "clip_ratio/low_min": 4.5479733671527356e-05, "clip_ratio/region_mean": 0.0020968442840967327, "epoch": 2.6719160104986877, "grad_norm": 0.14079050719738007, "learning_rate": 1e-06, "loss": -0.0395, "step": 528 }, { "clip_ratio/high_max": 0.004909978495561518, "clip_ratio/high_mean": 0.0018630345657584257, "clip_ratio/low_mean": 0.0016284975754388142, "clip_ratio/low_min": 8.20120440039318e-05, "clip_ratio/region_mean": 0.0034915321666630916, "epoch": 2.6765820939049285, "grad_norm": 0.10812269896268845, "learning_rate": 1e-06, "loss": -0.0106, "step": 529 }, { "clip_ratio/high_max": 0.003492072835797444, "clip_ratio/high_mean": 0.0015238677005982026, "clip_ratio/low_mean": 0.0017204745818162337, "clip_ratio/low_min": 9.878124183160253e-05, "clip_ratio/region_mean": 0.0032443422969663516, "epoch": 2.6812481773111694, "grad_norm": 0.10290117561817169, "learning_rate": 1e-06, "loss": 0.018, "step": 530 }, { "clip_ratio/high_max": 0.004130755361984484, "clip_ratio/high_mean": 0.0017278304003411904, "clip_ratio/low_mean": 0.0013872321324015502, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0031150625582085922, "epoch": 2.6859142607174102, "grad_norm": 0.10638885945081711, "learning_rate": 1e-06, "loss": -0.0677, "step": 531 }, { "clip_ratio/high_max": 0.0036738228154717945, "clip_ratio/high_mean": 0.0014542073986376636, "clip_ratio/low_mean": 0.0018049408572551329, "clip_ratio/low_min": 1.9177661670255475e-05, "clip_ratio/region_mean": 0.0032591483686701395, "epoch": 2.690580344123651, "grad_norm": 0.10722865909337997, "learning_rate": 1e-06, "loss": 0.0363, "step": 532 }, { "clip_ratio/high_max": 0.004179177354671992, "clip_ratio/high_mean": 0.001712921879516216, "clip_ratio/low_mean": 0.001874919304100331, "clip_ratio/low_min": 0.00018479634309187531, "clip_ratio/region_mean": 0.0035878411144949496, "epoch": 2.695246427529892, "grad_norm": 0.1133003756403923, "learning_rate": 1e-06, "loss": -0.0036, "step": 533 }, { "clip_ratio/high_max": 0.003596190952521283, "clip_ratio/high_mean": 0.0015738239271740895, "clip_ratio/low_mean": 0.0015485044787055813, "clip_ratio/low_min": 0.0001844694052124396, "clip_ratio/region_mean": 0.0031223283949657343, "epoch": 2.699912510936133, "grad_norm": 0.1069144681096077, "learning_rate": 1e-06, "loss": -0.0245, "step": 534 }, { "clip_ratio/high_max": 0.0038797700035502203, "clip_ratio/high_mean": 0.001496370736276731, "clip_ratio/low_mean": 0.0019469347898848355, "clip_ratio/low_min": 0.00012884446368843783, "clip_ratio/region_mean": 0.0034433055989211425, "epoch": 2.704578594342374, "grad_norm": 0.11485174298286438, "learning_rate": 1e-06, "loss": 0.0205, "step": 535 }, { "clip_ratio/high_max": 0.003940434180549346, "clip_ratio/high_mean": 0.0017118309042416513, "clip_ratio/low_mean": 0.0019194923661416396, "clip_ratio/low_min": 9.81588191280025e-05, "clip_ratio/region_mean": 0.0036313232994871214, "epoch": 2.7092446777486145, "grad_norm": 0.10777334868907928, "learning_rate": 1e-06, "loss": 0.0088, "step": 536 }, { "clip_ratio/high_max": 0.004144428370636888, "clip_ratio/high_mean": 0.001616441943042446, "clip_ratio/low_mean": 0.0017348838337056804, "clip_ratio/low_min": 8.14066079328768e-05, "clip_ratio/region_mean": 0.0033513257221784443, "epoch": 2.713910761154856, "grad_norm": 0.10382639616727829, "learning_rate": 1e-06, "loss": -0.022, "step": 537 }, { "clip_ratio/high_max": 0.004053154756547883, "clip_ratio/high_mean": 0.0015738232650619466, "clip_ratio/low_mean": 0.0016331381993950345, "clip_ratio/low_min": 2.6686591809266247e-05, "clip_ratio/region_mean": 0.0032069614389911294, "epoch": 2.7185768445610963, "grad_norm": 0.09988241642713547, "learning_rate": 1e-06, "loss": -0.0233, "step": 538 }, { "clip_ratio/high_max": 0.00402184094855329, "clip_ratio/high_mean": 0.0016831040848046541, "clip_ratio/low_mean": 0.001825773790187668, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.003508877707645297, "epoch": 2.7232429279673376, "grad_norm": 0.10986864566802979, "learning_rate": 1e-06, "loss": 0.0061, "step": 539 }, { "clip_ratio/high_max": 0.004511283041210845, "clip_ratio/high_mean": 0.001832957030273974, "clip_ratio/low_mean": 0.0017417254821339156, "clip_ratio/low_min": 5.5598064136574976e-05, "clip_ratio/region_mean": 0.0035746825742535293, "epoch": 2.7279090113735784, "grad_norm": 0.10535531491041183, "learning_rate": 1e-06, "loss": -0.0156, "step": 540 }, { "clip_ratio/high_max": 0.00435531378025189, "clip_ratio/high_mean": 0.0017407431168976473, "clip_ratio/low_mean": 0.0016569660874665715, "clip_ratio/low_min": 0.00021277108680806123, "clip_ratio/region_mean": 0.0033977091588894837, "epoch": 2.7325750947798193, "grad_norm": 0.105378657579422, "learning_rate": 1e-06, "loss": -0.0205, "step": 541 }, { "clip_ratio/high_max": 0.003990085984696634, "clip_ratio/high_mean": 0.0015394604524772149, "clip_ratio/low_mean": 0.001715981215966167, "clip_ratio/low_min": 7.436196028720587e-05, "clip_ratio/region_mean": 0.0032554416538914666, "epoch": 2.73724117818606, "grad_norm": 0.09573499858379364, "learning_rate": 1e-06, "loss": 0.0144, "step": 542 }, { "clip_ratio/high_max": 0.004024940775707364, "clip_ratio/high_mean": 0.0016183969419216737, "clip_ratio/low_mean": 0.0019635228272818495, "clip_ratio/low_min": 0.00028928117535542697, "clip_ratio/region_mean": 0.0035819198164972477, "epoch": 2.741907261592301, "grad_norm": 0.10836490988731384, "learning_rate": 1e-06, "loss": 0.0122, "step": 543 }, { "clip_ratio/high_max": 0.004349221708253026, "clip_ratio/high_mean": 0.001780041384336073, "clip_ratio/low_mean": 0.001862231467384845, "clip_ratio/low_min": 9.7070508672914e-05, "clip_ratio/region_mean": 0.0036422728589968756, "epoch": 2.746573344998542, "grad_norm": 0.11187615245580673, "learning_rate": 1e-06, "loss": -0.0404, "step": 544 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0426199776785714, "completions/max_length": 4096.0, "completions/max_terminated_length": 4089.0, "completions/mean_length": 717.9034423828125, "completions/mean_terminated_length": 567.5196533203125, "completions/min_length": 5.0, "completions/min_terminated_length": 5.0, "epoch": 2.7512394284047827, "grad_norm": 0.14116381108760834, "learning_rate": 1e-06, "loss": -0.0136, "num_tokens": 167471355.0, "reward": 0.6053990125656128, "reward_std": 0.18358783423900604, "rewards/simpleverify_reward/mean": 0.6053990125656128, "rewards/simpleverify_reward/std": 0.4887818992137909, "step": 545 }, { "clip_ratio/high_max": 0.0020968794779037125, "clip_ratio/high_mean": 0.0008818594124022638, "clip_ratio/low_mean": 0.0005725626460844069, "clip_ratio/low_min": 2.2285612431005575e-05, "clip_ratio/region_mean": 0.0014544220321113244, "epoch": 2.7559055118110236, "grad_norm": 0.13967737555503845, "learning_rate": 1e-06, "loss": -0.0144, "step": 546 }, { "clip_ratio/high_max": 0.0021594412537524477, "clip_ratio/high_mean": 0.0008386442477785749, "clip_ratio/low_mean": 0.0006798493777750991, "clip_ratio/low_min": 3.586220555007458e-05, "clip_ratio/region_mean": 0.0015184936492005363, "epoch": 2.7605715952172645, "grad_norm": 0.1388244926929474, "learning_rate": 1e-06, "loss": 0.0146, "step": 547 }, { "clip_ratio/high_max": 0.0021169140527490526, "clip_ratio/high_mean": 0.0009170684188575251, "clip_ratio/low_mean": 0.0006611868520849384, "clip_ratio/low_min": 6.811607181589352e-05, "clip_ratio/region_mean": 0.00157825525093358, "epoch": 2.7652376786235053, "grad_norm": 0.12524600327014923, "learning_rate": 1e-06, "loss": -0.0265, "step": 548 }, { "clip_ratio/high_max": 0.001675629973760806, "clip_ratio/high_mean": 0.0006925517864146968, "clip_ratio/low_mean": 0.0007028551681287354, "clip_ratio/low_min": 2.647488236107165e-05, "clip_ratio/region_mean": 0.0013954069509054534, "epoch": 2.769903762029746, "grad_norm": 0.13385963439941406, "learning_rate": 1e-06, "loss": 0.0006, "step": 549 }, { "clip_ratio/high_max": 0.0024384894568356685, "clip_ratio/high_mean": 0.0009183941892842995, "clip_ratio/low_mean": 0.000669166764964757, "clip_ratio/low_min": 1.547412648505997e-05, "clip_ratio/region_mean": 0.0015875609278737102, "epoch": 2.774569845435987, "grad_norm": 0.1454942524433136, "learning_rate": 1e-06, "loss": -0.0139, "step": 550 }, { "clip_ratio/high_max": 0.0026716771826613694, "clip_ratio/high_mean": 0.0010955473135254579, "clip_ratio/low_mean": 0.0009681672636361327, "clip_ratio/low_min": 8.554321993869962e-05, "clip_ratio/region_mean": 0.002063714615360368, "epoch": 2.779235928842228, "grad_norm": 0.13865989446640015, "learning_rate": 1e-06, "loss": 0.0056, "step": 551 }, { "clip_ratio/high_max": 0.0021705419858335517, "clip_ratio/high_mean": 0.0008571414418838685, "clip_ratio/low_mean": 0.0007973707952260156, "clip_ratio/low_min": 4.931644525640877e-05, "clip_ratio/region_mean": 0.0016545122125535272, "epoch": 2.783902012248469, "grad_norm": 0.12419628351926804, "learning_rate": 1e-06, "loss": -0.0065, "step": 552 }, { "clip_ratio/high_max": 0.002367948087339755, "clip_ratio/high_mean": 0.0009834785960265435, "clip_ratio/low_mean": 0.0009141218979493715, "clip_ratio/low_min": 9.0511259259074e-05, "clip_ratio/region_mean": 0.0018976005376316607, "epoch": 2.7885680956547096, "grad_norm": 0.13322414457798004, "learning_rate": 1e-06, "loss": 0.0248, "step": 553 }, { "clip_ratio/high_max": 0.0021278387284837663, "clip_ratio/high_mean": 0.0009322490532213124, "clip_ratio/low_mean": 0.0008498225506627932, "clip_ratio/low_min": 6.091130489949137e-05, "clip_ratio/region_mean": 0.0017820715947891586, "epoch": 2.793234179060951, "grad_norm": 0.13163484632968903, "learning_rate": 1e-06, "loss": 0.0048, "step": 554 }, { "clip_ratio/high_max": 0.0026034091933979653, "clip_ratio/high_mean": 0.001054678532455, "clip_ratio/low_mean": 0.0007656920643057674, "clip_ratio/low_min": 2.7412281269789673e-05, "clip_ratio/region_mean": 0.0018203706094936933, "epoch": 2.7979002624671914, "grad_norm": 0.1273442655801773, "learning_rate": 1e-06, "loss": -0.022, "step": 555 }, { "clip_ratio/high_max": 0.002598668186692521, "clip_ratio/high_mean": 0.001138136452937033, "clip_ratio/low_mean": 0.0007196783690233133, "clip_ratio/low_min": 2.2202486434252933e-05, "clip_ratio/region_mean": 0.0018578148301457986, "epoch": 2.8025663458734327, "grad_norm": 0.1400269716978073, "learning_rate": 1e-06, "loss": -0.0288, "step": 556 }, { "clip_ratio/high_max": 0.0021816730441059917, "clip_ratio/high_mean": 0.0008486886363243684, "clip_ratio/low_mean": 0.0009521437132207211, "clip_ratio/low_min": 6.399961603165139e-05, "clip_ratio/region_mean": 0.0018008323313551955, "epoch": 2.8072324292796735, "grad_norm": 0.12354227900505066, "learning_rate": 1e-06, "loss": 0.0202, "step": 557 }, { "clip_ratio/high_max": 0.0021771731480839662, "clip_ratio/high_mean": 0.001063119969330728, "clip_ratio/low_mean": 0.0008848187626426807, "clip_ratio/low_min": 4.150066524744034e-05, "clip_ratio/region_mean": 0.0019479387410683557, "epoch": 2.8118985126859144, "grad_norm": 0.1312863975763321, "learning_rate": 1e-06, "loss": -0.0616, "step": 558 }, { "clip_ratio/high_max": 0.0024527662608306855, "clip_ratio/high_mean": 0.001107616431909264, "clip_ratio/low_mean": 0.0009406776443938725, "clip_ratio/low_min": 5.859469729330158e-05, "clip_ratio/region_mean": 0.002048294038104359, "epoch": 2.8165645960921553, "grad_norm": 0.13248229026794434, "learning_rate": 1e-06, "loss": -0.0344, "step": 559 }, { "clip_ratio/high_max": 0.002234244191640755, "clip_ratio/high_mean": 0.0009262936237064423, "clip_ratio/low_mean": 0.0007471953795175068, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0016734890195948537, "epoch": 2.821230679498396, "grad_norm": 0.13744542002677917, "learning_rate": 1e-06, "loss": -0.0018, "step": 560 }, { "clip_ratio/high_max": 0.004274173661542591, "clip_ratio/high_mean": 0.0018019320341409184, "clip_ratio/low_mean": 0.0017173339365399443, "clip_ratio/low_min": 0.00020110520017624367, "clip_ratio/region_mean": 0.003519265985232778, "epoch": 2.825896762904637, "grad_norm": 0.1059526652097702, "learning_rate": 1e-06, "loss": -0.0145, "step": 561 }, { "clip_ratio/high_max": 0.004140589953749441, "clip_ratio/high_mean": 0.0016246105260506738, "clip_ratio/low_mean": 0.0016780312598712044, "clip_ratio/low_min": 0.00012833722303184913, "clip_ratio/region_mean": 0.0033026418095687404, "epoch": 2.830562846310878, "grad_norm": 0.10314527153968811, "learning_rate": 1e-06, "loss": -0.0153, "step": 562 }, { "clip_ratio/high_max": 0.003760451581911184, "clip_ratio/high_mean": 0.0016546626247873064, "clip_ratio/low_mean": 0.0018367113661952317, "clip_ratio/low_min": 0.0002478587775840424, "clip_ratio/region_mean": 0.003491373936412856, "epoch": 2.8352289297171187, "grad_norm": 0.11692745983600616, "learning_rate": 1e-06, "loss": 0.0136, "step": 563 }, { "clip_ratio/high_max": 0.003624541684985161, "clip_ratio/high_mean": 0.001824067108827876, "clip_ratio/low_mean": 0.001636762019188609, "clip_ratio/low_min": 0.00021367341287259478, "clip_ratio/region_mean": 0.003460829073446803, "epoch": 2.8398950131233596, "grad_norm": 0.10747841000556946, "learning_rate": 1e-06, "loss": -0.0273, "step": 564 }, { "clip_ratio/high_max": 0.0031877466826699674, "clip_ratio/high_mean": 0.0013683579563803505, "clip_ratio/low_mean": 0.0017441871532355435, "clip_ratio/low_min": 0.00013237440725788474, "clip_ratio/region_mean": 0.0031125450477702543, "epoch": 2.8445610965296004, "grad_norm": 0.09903515130281448, "learning_rate": 1e-06, "loss": -0.0002, "step": 565 }, { "clip_ratio/high_max": 0.00380195549223572, "clip_ratio/high_mean": 0.0015198203218460549, "clip_ratio/low_mean": 0.0015914174073259346, "clip_ratio/low_min": 2.9460286896210164e-05, "clip_ratio/region_mean": 0.003111237718258053, "epoch": 2.8492271799358413, "grad_norm": 0.10769470781087875, "learning_rate": 1e-06, "loss": -0.0147, "step": 566 }, { "clip_ratio/high_max": 0.004457996037672274, "clip_ratio/high_mean": 0.0017717695991450455, "clip_ratio/low_mean": 0.00208011174981948, "clip_ratio/low_min": 0.00032777949672890827, "clip_ratio/region_mean": 0.003851881338050589, "epoch": 2.853893263342082, "grad_norm": 0.11015890538692474, "learning_rate": 1e-06, "loss": 0.0048, "step": 567 }, { "clip_ratio/high_max": 0.004142918303841725, "clip_ratio/high_mean": 0.0017271442848141305, "clip_ratio/low_mean": 0.0015439957460330334, "clip_ratio/low_min": 0.00016872864944161847, "clip_ratio/region_mean": 0.00327114001265727, "epoch": 2.858559346748323, "grad_norm": 0.10232267528772354, "learning_rate": 1e-06, "loss": -0.0072, "step": 568 }, { "clip_ratio/high_max": 0.004255149920936674, "clip_ratio/high_mean": 0.0017199331705342047, "clip_ratio/low_mean": 0.001891954270831775, "clip_ratio/low_min": 0.0002598880819277838, "clip_ratio/region_mean": 0.0036118874413659796, "epoch": 2.863225430154564, "grad_norm": 0.10648967325687408, "learning_rate": 1e-06, "loss": 0.0239, "step": 569 }, { "clip_ratio/high_max": 0.0038735465932404622, "clip_ratio/high_mean": 0.001664111128775403, "clip_ratio/low_mean": 0.0018075093867082614, "clip_ratio/low_min": 0.0001073148669092916, "clip_ratio/region_mean": 0.0034716205555014312, "epoch": 2.8678915135608047, "grad_norm": 0.11051440984010696, "learning_rate": 1e-06, "loss": 0.0039, "step": 570 }, { "clip_ratio/high_max": 0.0039921955758472905, "clip_ratio/high_mean": 0.001765259828971466, "clip_ratio/low_mean": 0.0016162668107426725, "clip_ratio/low_min": 2.7412281269789673e-05, "clip_ratio/region_mean": 0.003381526650628075, "epoch": 2.872557596967046, "grad_norm": 0.10644158720970154, "learning_rate": 1e-06, "loss": -0.0228, "step": 571 }, { "clip_ratio/high_max": 0.004905263805994764, "clip_ratio/high_mean": 0.0019660901816678233, "clip_ratio/low_mean": 0.0016763871171860956, "clip_ratio/low_min": 0.00010509254934731871, "clip_ratio/region_mean": 0.0036424772697500885, "epoch": 2.8772236803732865, "grad_norm": 0.11353842914104462, "learning_rate": 1e-06, "loss": -0.0297, "step": 572 }, { "clip_ratio/high_max": 0.0035181549537810497, "clip_ratio/high_mean": 0.0014502295816782862, "clip_ratio/low_mean": 0.0019052155621466227, "clip_ratio/low_min": 0.00018920008369605057, "clip_ratio/region_mean": 0.003355445194756612, "epoch": 2.8818897637795278, "grad_norm": 0.1028515174984932, "learning_rate": 1e-06, "loss": 0.0194, "step": 573 }, { "clip_ratio/high_max": 0.004392321279738098, "clip_ratio/high_mean": 0.002010108612012118, "clip_ratio/low_mean": 0.0015710492880316451, "clip_ratio/low_min": 5.187583155930042e-05, "clip_ratio/region_mean": 0.0035811579728033394, "epoch": 2.886555847185768, "grad_norm": 0.1130344569683075, "learning_rate": 1e-06, "loss": -0.0624, "step": 574 }, { "clip_ratio/high_max": 0.00486399355577305, "clip_ratio/high_mean": 0.002024112342041917, "clip_ratio/low_mean": 0.0018441505380906165, "clip_ratio/low_min": 0.000149856223288225, "clip_ratio/region_mean": 0.003868263040203601, "epoch": 2.8912219305920095, "grad_norm": 0.11336081475019455, "learning_rate": 1e-06, "loss": -0.0352, "step": 575 }, { "clip_ratio/high_max": 0.003587546780181583, "clip_ratio/high_mean": 0.0014841021857137093, "clip_ratio/low_mean": 0.0015968015486578224, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0030809037198196165, "epoch": 2.8958880139982504, "grad_norm": 0.10892689228057861, "learning_rate": 1e-06, "loss": -0.0027, "step": 576 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0394810267857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 4086.0, "completions/mean_length": 705.7078247070312, "completions/mean_terminated_length": 566.3536987304688, "completions/min_length": 5.0, "completions/min_terminated_length": 5.0, "epoch": 3.004666083406241, "grad_norm": 0.1297464817762375, "learning_rate": 1e-06, "loss": -0.029, "num_tokens": 176704134.0, "reward": 0.618722140789032, "reward_std": 0.16918261349201202, "rewards/simpleverify_reward/mean": 0.6187220811843872, "rewards/simpleverify_reward/std": 0.4857175350189209, "step": 577 }, { "clip_ratio/high_max": 0.0018989417585544288, "clip_ratio/high_mean": 0.000783735473305569, "clip_ratio/low_mean": 0.0005315109619914438, "clip_ratio/low_min": 4.201998035568977e-05, "clip_ratio/region_mean": 0.0013152464307495393, "epoch": 3.0093321668124817, "grad_norm": 0.13070416450500488, "learning_rate": 1e-06, "loss": 0.0218, "step": 578 }, { "clip_ratio/high_max": 0.0021109459921717644, "clip_ratio/high_mean": 0.0008489151005051099, "clip_ratio/low_mean": 0.0006225379947863985, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014714530880155507, "epoch": 3.0139982502187226, "grad_norm": 0.12053510546684265, "learning_rate": 1e-06, "loss": -0.0079, "step": 579 }, { "clip_ratio/high_max": 0.0019092993134108838, "clip_ratio/high_mean": 0.000706828361217049, "clip_ratio/low_mean": 0.0006537435820064275, "clip_ratio/low_min": 2.4892449800972827e-05, "clip_ratio/region_mean": 0.0013605719541374128, "epoch": 3.0186643336249634, "grad_norm": 0.13016866147518158, "learning_rate": 1e-06, "loss": 0.005, "step": 580 }, { "clip_ratio/high_max": 0.0017881428502732888, "clip_ratio/high_mean": 0.000659828618154279, "clip_ratio/low_mean": 0.000642074968709494, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013019035795878153, "epoch": 3.0233304170312043, "grad_norm": 0.136167511343956, "learning_rate": 1e-06, "loss": 0.0218, "step": 581 }, { "clip_ratio/high_max": 0.0018641004899109248, "clip_ratio/high_mean": 0.0008369101287826197, "clip_ratio/low_mean": 0.000613246332250128, "clip_ratio/low_min": 2.5461544282734394e-05, "clip_ratio/region_mean": 0.0014501564692182, "epoch": 3.027996500437445, "grad_norm": 0.13248851895332336, "learning_rate": 1e-06, "loss": -0.014, "step": 582 }, { "clip_ratio/high_max": 0.002190739138313802, "clip_ratio/high_mean": 0.0010033369308075635, "clip_ratio/low_mean": 0.0006626780996157322, "clip_ratio/low_min": 3.0676131245854776e-05, "clip_ratio/region_mean": 0.001666015014052391, "epoch": 3.032662583843686, "grad_norm": 0.13862930238246918, "learning_rate": 1e-06, "loss": -0.0561, "step": 583 }, { "clip_ratio/high_max": 0.002345991673792014, "clip_ratio/high_mean": 0.0008923112932279764, "clip_ratio/low_mean": 0.0008589484114054358, "clip_ratio/low_min": 8.714826981304213e-05, "clip_ratio/region_mean": 0.00175125971873058, "epoch": 3.037328667249927, "grad_norm": 0.15334631502628326, "learning_rate": 1e-06, "loss": -0.0111, "step": 584 }, { "clip_ratio/high_max": 0.0027672940341290087, "clip_ratio/high_mean": 0.0009380742576468037, "clip_ratio/low_mean": 0.0007088610309438081, "clip_ratio/low_min": 1.2254901776032057e-05, "clip_ratio/region_mean": 0.0016469352704007179, "epoch": 3.041994750656168, "grad_norm": 0.12957873940467834, "learning_rate": 1e-06, "loss": -0.0108, "step": 585 }, { "clip_ratio/high_max": 0.002369338530115783, "clip_ratio/high_mean": 0.000981785746262176, "clip_ratio/low_mean": 0.000874948367709294, "clip_ratio/low_min": 6.245668737392407e-05, "clip_ratio/region_mean": 0.0018567340957815759, "epoch": 3.046660834062409, "grad_norm": 0.1311190277338028, "learning_rate": 1e-06, "loss": 0.0056, "step": 586 }, { "clip_ratio/high_max": 0.0024873573711374775, "clip_ratio/high_mean": 0.0008692332012287807, "clip_ratio/low_mean": 0.0007939138668007217, "clip_ratio/low_min": 3.510250098770484e-05, "clip_ratio/region_mean": 0.0016631471080472693, "epoch": 3.05132691746865, "grad_norm": 0.13392719626426697, "learning_rate": 1e-06, "loss": -0.0227, "step": 587 }, { "clip_ratio/high_max": 0.0024968598809209652, "clip_ratio/high_mean": 0.000883779732248513, "clip_ratio/low_mean": 0.0008742020499994396, "clip_ratio/low_min": 0.0001033503267535707, "clip_ratio/region_mean": 0.0017579817795194685, "epoch": 3.055993000874891, "grad_norm": 0.14168329536914825, "learning_rate": 1e-06, "loss": 0.0495, "step": 588 }, { "clip_ratio/high_max": 0.0021550678720814176, "clip_ratio/high_mean": 0.000981088429398369, "clip_ratio/low_mean": 0.0007827279569028178, "clip_ratio/low_min": 3.882812688971171e-05, "clip_ratio/region_mean": 0.0017638164063100703, "epoch": 3.0606590842811316, "grad_norm": 0.12165799736976624, "learning_rate": 1e-06, "loss": -0.046, "step": 589 }, { "clip_ratio/high_max": 0.0021453929803101346, "clip_ratio/high_mean": 0.0009457574033149285, "clip_ratio/low_mean": 0.0008421002603427041, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001787857705494389, "epoch": 3.0653251676873725, "grad_norm": 0.1449015587568283, "learning_rate": 1e-06, "loss": -0.0336, "step": 590 }, { "clip_ratio/high_max": 0.0020800196180061903, "clip_ratio/high_mean": 0.0009002483166113961, "clip_ratio/low_mean": 0.0010150725538551342, "clip_ratio/low_min": 8.30836297609494e-05, "clip_ratio/region_mean": 0.0019153208268107846, "epoch": 3.0699912510936134, "grad_norm": 0.1379612535238266, "learning_rate": 1e-06, "loss": -0.0001, "step": 591 }, { "clip_ratio/high_max": 0.0024064477256615646, "clip_ratio/high_mean": 0.0008938748833315913, "clip_ratio/low_mean": 0.000986194250799599, "clip_ratio/low_min": 1.27577059174655e-05, "clip_ratio/region_mean": 0.0018800691541400738, "epoch": 3.0746573344998542, "grad_norm": 0.12321005761623383, "learning_rate": 1e-06, "loss": -0.0018, "step": 592 }, { "clip_ratio/high_max": 0.003995479084551334, "clip_ratio/high_mean": 0.0016497129763592966, "clip_ratio/low_mean": 0.0013652391789946705, "clip_ratio/low_min": 4.703963713836856e-05, "clip_ratio/region_mean": 0.0030149521626299247, "epoch": 3.079323417906095, "grad_norm": 0.09836620837450027, "learning_rate": 1e-06, "loss": -0.0296, "step": 593 }, { "clip_ratio/high_max": 0.003406795542105101, "clip_ratio/high_mean": 0.0014184886276780162, "clip_ratio/low_mean": 0.0017059965448424919, "clip_ratio/low_min": 0.00016521043380635092, "clip_ratio/region_mean": 0.003124485163425561, "epoch": 3.083989501312336, "grad_norm": 0.10543031245470047, "learning_rate": 1e-06, "loss": 0.0209, "step": 594 }, { "clip_ratio/high_max": 0.0038427935942308977, "clip_ratio/high_mean": 0.001502671653724974, "clip_ratio/low_mean": 0.0016468072208226658, "clip_ratio/low_min": 3.391209975234233e-05, "clip_ratio/region_mean": 0.0031494788199779578, "epoch": 3.088655584718577, "grad_norm": 0.100646011531353, "learning_rate": 1e-06, "loss": -0.0087, "step": 595 }, { "clip_ratio/high_max": 0.0034824540598492604, "clip_ratio/high_mean": 0.0013763015685981372, "clip_ratio/low_mean": 0.0017292783086304553, "clip_ratio/low_min": 0.00014441743041970767, "clip_ratio/region_mean": 0.003105579802650027, "epoch": 3.0933216681248177, "grad_norm": 0.09727215021848679, "learning_rate": 1e-06, "loss": 0.0042, "step": 596 }, { "clip_ratio/high_max": 0.0036293155135354027, "clip_ratio/high_mean": 0.0013377864597714506, "clip_ratio/low_mean": 0.001752502575982362, "clip_ratio/low_min": 6.800896517233923e-05, "clip_ratio/region_mean": 0.0030902890430297703, "epoch": 3.0979877515310585, "grad_norm": 0.10720931738615036, "learning_rate": 1e-06, "loss": 0.0209, "step": 597 }, { "clip_ratio/high_max": 0.00363532191113336, "clip_ratio/high_mean": 0.0016436097466794308, "clip_ratio/low_mean": 0.0015543411936960183, "clip_ratio/low_min": 6.810110426158644e-05, "clip_ratio/region_mean": 0.003197950849425979, "epoch": 3.1026538349372994, "grad_norm": 0.1042325496673584, "learning_rate": 1e-06, "loss": -0.0149, "step": 598 }, { "clip_ratio/high_max": 0.004205332188575994, "clip_ratio/high_mean": 0.001850219203333836, "clip_ratio/low_mean": 0.0016532176814507693, "clip_ratio/low_min": 0.00016409991803811863, "clip_ratio/region_mean": 0.0035034368629567325, "epoch": 3.1073199183435403, "grad_norm": 0.1130886971950531, "learning_rate": 1e-06, "loss": -0.057, "step": 599 }, { "clip_ratio/high_max": 0.004173622306552716, "clip_ratio/high_mean": 0.00169884116621688, "clip_ratio/low_mean": 0.0019548626769392285, "clip_ratio/low_min": 0.00018240689678350464, "clip_ratio/region_mean": 0.003653703795862384, "epoch": 3.111986001749781, "grad_norm": 0.11387147754430771, "learning_rate": 1e-06, "loss": -0.0122, "step": 600 }, { "clip_ratio/high_max": 0.005090958613436669, "clip_ratio/high_mean": 0.001800794587325072, "clip_ratio/low_mean": 0.0015632430186087731, "clip_ratio/low_min": 6.127451342763379e-05, "clip_ratio/region_mean": 0.0033640376204857603, "epoch": 3.116652085156022, "grad_norm": 0.10405631363391876, "learning_rate": 1e-06, "loss": -0.0116, "step": 601 }, { "clip_ratio/high_max": 0.004095646385394502, "clip_ratio/high_mean": 0.001665675237745745, "clip_ratio/low_mean": 0.0021039583953097463, "clip_ratio/low_min": 0.00010477626165084075, "clip_ratio/region_mean": 0.0037696336294175126, "epoch": 3.121318168562263, "grad_norm": 0.10751927644014359, "learning_rate": 1e-06, "loss": 0.0048, "step": 602 }, { "clip_ratio/high_max": 0.003909106089849956, "clip_ratio/high_mean": 0.0016336360713467002, "clip_ratio/low_mean": 0.001738358405418694, "clip_ratio/low_min": 7.020500197540969e-05, "clip_ratio/region_mean": 0.003371994462213479, "epoch": 3.1259842519685037, "grad_norm": 0.1027611568570137, "learning_rate": 1e-06, "loss": -0.0236, "step": 603 }, { "clip_ratio/high_max": 0.00411385262850672, "clip_ratio/high_mean": 0.0016044451585912611, "clip_ratio/low_mean": 0.002101535770634655, "clip_ratio/low_min": 0.00020444583424250595, "clip_ratio/region_mean": 0.003705980983795598, "epoch": 3.130650335374745, "grad_norm": 0.1110607162117958, "learning_rate": 1e-06, "loss": 0.0486, "step": 604 }, { "clip_ratio/high_max": 0.004037739403429441, "clip_ratio/high_mean": 0.0017334331569145434, "clip_ratio/low_mean": 0.0014916374548192834, "clip_ratio/low_min": 0.00012129297465435229, "clip_ratio/region_mean": 0.003225070620828774, "epoch": 3.135316418780986, "grad_norm": 0.10643091797828674, "learning_rate": 1e-06, "loss": -0.0468, "step": 605 }, { "clip_ratio/high_max": 0.004383464198326692, "clip_ratio/high_mean": 0.001803305080102291, "clip_ratio/low_mean": 0.0015659096425224561, "clip_ratio/low_min": 1.4835034562565852e-05, "clip_ratio/region_mean": 0.0033692147189867683, "epoch": 3.1399825021872267, "grad_norm": 0.11684251576662064, "learning_rate": 1e-06, "loss": -0.0345, "step": 606 }, { "clip_ratio/high_max": 0.004195768618956208, "clip_ratio/high_mean": 0.0017371565772918984, "clip_ratio/low_mean": 0.001987223055039067, "clip_ratio/low_min": 0.0002798161658574827, "clip_ratio/region_mean": 0.003724379712366499, "epoch": 3.1446485855934676, "grad_norm": 0.11397506296634674, "learning_rate": 1e-06, "loss": -0.0011, "step": 607 }, { "clip_ratio/high_max": 0.004217505091219209, "clip_ratio/high_mean": 0.0016205803403863683, "clip_ratio/low_mean": 0.0016421983236796223, "clip_ratio/low_min": 6.37885314063169e-05, "clip_ratio/region_mean": 0.003262778678617906, "epoch": 3.1493146689997085, "grad_norm": 0.10415603220462799, "learning_rate": 1e-06, "loss": -0.0026, "step": 608 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0494559151785714, "completions/max_length": 4096.0, "completions/max_terminated_length": 4051.0, "completions/mean_length": 762.7545776367188, "completions/mean_terminated_length": 589.3289184570312, "completions/min_length": 90.0, "completions/min_terminated_length": 90.0, "epoch": 3.1539807524059493, "grad_norm": 0.13022832572460175, "learning_rate": 1e-06, "loss": -0.0051, "num_tokens": 186201039.0, "reward": 0.5954939126968384, "reward_std": 0.1724170297384262, "rewards/simpleverify_reward/mean": 0.5954938530921936, "rewards/simpleverify_reward/std": 0.49081334471702576, "step": 609 }, { "clip_ratio/high_max": 0.0019000004795088898, "clip_ratio/high_mean": 0.0007194360860012239, "clip_ratio/low_mean": 0.0005228891932347324, "clip_ratio/low_min": 1.2891914593637921e-05, "clip_ratio/region_mean": 0.001242325291968882, "epoch": 3.15864683581219, "grad_norm": 0.11680217832326889, "learning_rate": 1e-06, "loss": -0.0179, "step": 610 }, { "clip_ratio/high_max": 0.0022567846390302293, "clip_ratio/high_mean": 0.0009244008051609853, "clip_ratio/low_mean": 0.0004673724870372098, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001391773283103248, "epoch": 3.163312919218431, "grad_norm": 0.1458456665277481, "learning_rate": 1e-06, "loss": -0.0447, "step": 611 }, { "clip_ratio/high_max": 0.0016577017377130687, "clip_ratio/high_mean": 0.0008093183132587001, "clip_ratio/low_mean": 0.0005865902840014314, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013959085808892269, "epoch": 3.167979002624672, "grad_norm": 0.1305447667837143, "learning_rate": 1e-06, "loss": -0.0085, "step": 612 }, { "clip_ratio/high_max": 0.0016875612163858023, "clip_ratio/high_mean": 0.0007692748695262708, "clip_ratio/low_mean": 0.0005504302171175368, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013197050830058288, "epoch": 3.1726450860309128, "grad_norm": 0.13962143659591675, "learning_rate": 1e-06, "loss": -0.0421, "step": 613 }, { "clip_ratio/high_max": 0.002256116677017417, "clip_ratio/high_mean": 0.0007989624200490653, "clip_ratio/low_mean": 0.0006242389827093575, "clip_ratio/low_min": 3.251244652346941e-05, "clip_ratio/region_mean": 0.001423201410943875, "epoch": 3.1773111694371536, "grad_norm": 0.13130025565624237, "learning_rate": 1e-06, "loss": -0.0506, "step": 614 }, { "clip_ratio/high_max": 0.001926960190758109, "clip_ratio/high_mean": 0.0007885514232839341, "clip_ratio/low_mean": 0.0006558587447216269, "clip_ratio/low_min": 1.0154346455237828e-05, "clip_ratio/region_mean": 0.001444410183466971, "epoch": 3.1819772528433945, "grad_norm": 0.12116069346666336, "learning_rate": 1e-06, "loss": -0.0044, "step": 615 }, { "clip_ratio/high_max": 0.0018085795381921344, "clip_ratio/high_mean": 0.0007075285757309757, "clip_ratio/low_mean": 0.0007380584811471635, "clip_ratio/low_min": 5.704748764401302e-05, "clip_ratio/region_mean": 0.0014455870550591499, "epoch": 3.1866433362496354, "grad_norm": 0.12010941654443741, "learning_rate": 1e-06, "loss": 0.0089, "step": 616 }, { "clip_ratio/high_max": 0.002363851781410631, "clip_ratio/high_mean": 0.0008664549350214656, "clip_ratio/low_mean": 0.0009849494599620812, "clip_ratio/low_min": 8.205053927667905e-05, "clip_ratio/region_mean": 0.0018514044058974832, "epoch": 3.1913094196558762, "grad_norm": 0.1326657384634018, "learning_rate": 1e-06, "loss": 0.0028, "step": 617 }, { "clip_ratio/high_max": 0.002366474516747985, "clip_ratio/high_mean": 0.000962843279921799, "clip_ratio/low_mean": 0.0008817527968858485, "clip_ratio/low_min": 3.0019211408216506e-05, "clip_ratio/region_mean": 0.001844596081355121, "epoch": 3.195975503062117, "grad_norm": 0.141387939453125, "learning_rate": 1e-06, "loss": -0.0082, "step": 618 }, { "clip_ratio/high_max": 0.002450492946081795, "clip_ratio/high_mean": 0.0008913672099879477, "clip_ratio/low_mean": 0.0009695914650365012, "clip_ratio/low_min": 3.617877428041538e-05, "clip_ratio/region_mean": 0.0018609586695674807, "epoch": 3.200641586468358, "grad_norm": 0.12486181408166885, "learning_rate": 1e-06, "loss": -0.0077, "step": 619 }, { "clip_ratio/high_max": 0.002591315336758271, "clip_ratio/high_mean": 0.001013109886116581, "clip_ratio/low_mean": 0.0009311078229075065, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0019442177144810557, "epoch": 3.205307669874599, "grad_norm": 0.12602552771568298, "learning_rate": 1e-06, "loss": -0.0221, "step": 620 }, { "clip_ratio/high_max": 0.002283390895172488, "clip_ratio/high_mean": 0.000866814221808454, "clip_ratio/low_mean": 0.0008843828763929196, "clip_ratio/low_min": 6.866859803267289e-05, "clip_ratio/region_mean": 0.0017511970654595643, "epoch": 3.20997375328084, "grad_norm": 0.12433513253927231, "learning_rate": 1e-06, "loss": 0.0062, "step": 621 }, { "clip_ratio/high_max": 0.002088221055601025, "clip_ratio/high_mean": 0.0008685759203217458, "clip_ratio/low_mean": 0.0009978582966141403, "clip_ratio/low_min": 0.00010648265742929652, "clip_ratio/region_mean": 0.0018664341587282252, "epoch": 3.214639836687081, "grad_norm": 0.1304394006729126, "learning_rate": 1e-06, "loss": -0.0128, "step": 622 }, { "clip_ratio/high_max": 0.002259496206534095, "clip_ratio/high_mean": 0.000953817056142725, "clip_ratio/low_mean": 0.0008947519145294791, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0018485689797671512, "epoch": 3.219305920093322, "grad_norm": 0.12420185655355453, "learning_rate": 1e-06, "loss": 0.0366, "step": 623 }, { "clip_ratio/high_max": 0.002494355743692722, "clip_ratio/high_mean": 0.0009324685634055641, "clip_ratio/low_mean": 0.0010561314247752307, "clip_ratio/low_min": 3.200204810127616e-05, "clip_ratio/region_mean": 0.001988599957257975, "epoch": 3.2239720034995627, "grad_norm": 0.14329595863819122, "learning_rate": 1e-06, "loss": 0.0012, "step": 624 }, { "clip_ratio/high_max": 0.004006963696156163, "clip_ratio/high_mean": 0.001710728040052345, "clip_ratio/low_mean": 0.0016071704230853356, "clip_ratio/low_min": 0.0001188762835226953, "clip_ratio/region_mean": 0.0033178984813275747, "epoch": 3.2286380869058036, "grad_norm": 0.10261663049459457, "learning_rate": 1e-06, "loss": -0.0059, "step": 625 }, { "clip_ratio/high_max": 0.003251519039622508, "clip_ratio/high_mean": 0.001407215306244325, "clip_ratio/low_mean": 0.001471388230129378, "clip_ratio/low_min": 6.833876796008553e-05, "clip_ratio/region_mean": 0.0028786034672521055, "epoch": 3.2333041703120444, "grad_norm": 0.10264348238706589, "learning_rate": 1e-06, "loss": -0.0186, "step": 626 }, { "clip_ratio/high_max": 0.004369752772618085, "clip_ratio/high_mean": 0.001973792695935117, "clip_ratio/low_mean": 0.0014771976602787618, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0034509903198340908, "epoch": 3.2379702537182853, "grad_norm": 0.12080986052751541, "learning_rate": 1e-06, "loss": -0.0457, "step": 627 }, { "clip_ratio/high_max": 0.0036728047343785875, "clip_ratio/high_mean": 0.0016646357980789617, "clip_ratio/low_mean": 0.0016309115417243447, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.00329554732888937, "epoch": 3.242636337124526, "grad_norm": 0.10374809056520462, "learning_rate": 1e-06, "loss": -0.0093, "step": 628 }, { "clip_ratio/high_max": 0.004053356067743152, "clip_ratio/high_mean": 0.0017904860578710213, "clip_ratio/low_mean": 0.0015496700107178185, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0033401561086066067, "epoch": 3.247302420530767, "grad_norm": 0.11321540921926498, "learning_rate": 1e-06, "loss": -0.0431, "step": 629 }, { "clip_ratio/high_max": 0.004582806330290623, "clip_ratio/high_mean": 0.0016558061324758455, "clip_ratio/low_mean": 0.0014693157536385115, "clip_ratio/low_min": 0.00010000446127378382, "clip_ratio/region_mean": 0.003125121831544675, "epoch": 3.251968503937008, "grad_norm": 0.10362175107002258, "learning_rate": 1e-06, "loss": -0.0514, "step": 630 }, { "clip_ratio/high_max": 0.003786268789554015, "clip_ratio/high_mean": 0.0016751951952755917, "clip_ratio/low_mean": 0.001524067236459814, "clip_ratio/low_min": 3.046303754672408e-05, "clip_ratio/region_mean": 0.0031992623989935964, "epoch": 3.2566345873432487, "grad_norm": 0.09969361126422882, "learning_rate": 1e-06, "loss": -0.0051, "step": 631 }, { "clip_ratio/high_max": 0.0034105085287592374, "clip_ratio/high_mean": 0.0014426092493522447, "clip_ratio/low_mean": 0.0016897187815629877, "clip_ratio/low_min": 0.00019282766515971161, "clip_ratio/region_mean": 0.003132328056381084, "epoch": 3.2613006707494896, "grad_norm": 0.09795522689819336, "learning_rate": 1e-06, "loss": 0.0082, "step": 632 }, { "clip_ratio/high_max": 0.004529601872491185, "clip_ratio/high_mean": 0.0015576269252051134, "clip_ratio/low_mean": 0.0020816438482142985, "clip_ratio/low_min": 0.00024160535394912586, "clip_ratio/region_mean": 0.0036392707261256874, "epoch": 3.2659667541557305, "grad_norm": 0.10518714785575867, "learning_rate": 1e-06, "loss": 0.002, "step": 633 }, { "clip_ratio/high_max": 0.004199200586299412, "clip_ratio/high_mean": 0.0017250252967642155, "clip_ratio/low_mean": 0.0018690196302486584, "clip_ratio/low_min": 7.511227704526391e-05, "clip_ratio/region_mean": 0.003594044828787446, "epoch": 3.2706328375619713, "grad_norm": 0.10764548182487488, "learning_rate": 1e-06, "loss": -0.0091, "step": 634 }, { "clip_ratio/high_max": 0.00408559955394594, "clip_ratio/high_mean": 0.001564082154800417, "clip_ratio/low_mean": 0.0018179331382270902, "clip_ratio/low_min": 8.483331384923076e-05, "clip_ratio/region_mean": 0.003382015318493359, "epoch": 3.275298920968212, "grad_norm": 0.10013151168823242, "learning_rate": 1e-06, "loss": -0.0085, "step": 635 }, { "clip_ratio/high_max": 0.004208264799672179, "clip_ratio/high_mean": 0.0016985590955300722, "clip_ratio/low_mean": 0.0018666411215235712, "clip_ratio/low_min": 8.387854177271947e-05, "clip_ratio/region_mean": 0.00356520019704476, "epoch": 3.279965004374453, "grad_norm": 0.1021655797958374, "learning_rate": 1e-06, "loss": -0.0229, "step": 636 }, { "clip_ratio/high_max": 0.0040453202309436165, "clip_ratio/high_mean": 0.001501298051152844, "clip_ratio/low_mean": 0.0018029722741630394, "clip_ratio/low_min": 8.979059930425137e-05, "clip_ratio/region_mean": 0.003304270372609608, "epoch": 3.284631087780694, "grad_norm": 0.09687436372041702, "learning_rate": 1e-06, "loss": 0.0054, "step": 637 }, { "clip_ratio/high_max": 0.004028237104648724, "clip_ratio/high_mean": 0.0016927702417888213, "clip_ratio/low_mean": 0.0019515422736731125, "clip_ratio/low_min": 0.0002449123130645603, "clip_ratio/region_mean": 0.003644312593678478, "epoch": 3.289297171186935, "grad_norm": 0.10302521288394928, "learning_rate": 1e-06, "loss": -0.0136, "step": 638 }, { "clip_ratio/high_max": 0.0036676239251391962, "clip_ratio/high_mean": 0.0015250520373228937, "clip_ratio/low_mean": 0.0016797626303741708, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0032048146094894037, "epoch": 3.2939632545931756, "grad_norm": 0.09853555262088776, "learning_rate": 1e-06, "loss": 0.0359, "step": 639 }, { "clip_ratio/high_max": 0.004594740836182609, "clip_ratio/high_mean": 0.0017249968768737745, "clip_ratio/low_mean": 0.0019127596024191007, "clip_ratio/low_min": 3.200204810127616e-05, "clip_ratio/region_mean": 0.00363775640289532, "epoch": 3.298629337999417, "grad_norm": 0.11317408084869385, "learning_rate": 1e-06, "loss": 0.0003, "step": 640 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0474330357142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 4082.0, "completions/mean_length": 726.4277954101562, "completions/mean_terminated_length": 558.6400146484375, "completions/min_length": 103.0, "completions/min_terminated_length": 103.0, "epoch": 3.303295421405658, "grad_norm": 0.14014358818531036, "learning_rate": 1e-06, "loss": -0.0393, "num_tokens": 195299795.0, "reward": 0.61767578125, "reward_std": 0.17741146683692932, "rewards/simpleverify_reward/mean": 0.61767578125, "rewards/simpleverify_reward/std": 0.4859721064567566, "step": 641 }, { "clip_ratio/high_max": 0.002066903663944686, "clip_ratio/high_mean": 0.000848774168844102, "clip_ratio/low_mean": 0.0005408977258412051, "clip_ratio/low_min": 1.4602804185415152e-05, "clip_ratio/region_mean": 0.0013896718883188441, "epoch": 3.3079615048118987, "grad_norm": 0.13495449721813202, "learning_rate": 1e-06, "loss": -0.0119, "step": 642 }, { "clip_ratio/high_max": 0.002042150324996328, "clip_ratio/high_mean": 0.0008760671298659872, "clip_ratio/low_mean": 0.0005293992844599416, "clip_ratio/low_min": 1.200076803797856e-05, "clip_ratio/region_mean": 0.0014054664097784553, "epoch": 3.3126275882181395, "grad_norm": 0.1311677098274231, "learning_rate": 1e-06, "loss": 0.0224, "step": 643 }, { "clip_ratio/high_max": 0.0022365580007317476, "clip_ratio/high_mean": 0.00092781108651252, "clip_ratio/low_mean": 0.0004887322784270509, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014165434149617795, "epoch": 3.3172936716243804, "grad_norm": 0.15040592849254608, "learning_rate": 1e-06, "loss": -0.0069, "step": 644 }, { "clip_ratio/high_max": 0.002318853665201459, "clip_ratio/high_mean": 0.0009354324811283732, "clip_ratio/low_mean": 0.0006117621805969975, "clip_ratio/low_min": 1.4272664884629194e-05, "clip_ratio/region_mean": 0.0015471946680918336, "epoch": 3.3219597550306212, "grad_norm": 0.15013548731803894, "learning_rate": 1e-06, "loss": -0.0075, "step": 645 }, { "clip_ratio/high_max": 0.0018589166793390177, "clip_ratio/high_mean": 0.0008079504314082442, "clip_ratio/low_mean": 0.0007287500648089917, "clip_ratio/low_min": 8.046938546613092e-05, "clip_ratio/region_mean": 0.0015367004998552147, "epoch": 3.326625838436862, "grad_norm": 0.13903145492076874, "learning_rate": 1e-06, "loss": 0.0002, "step": 646 }, { "clip_ratio/high_max": 0.0021955265001452062, "clip_ratio/high_mean": 0.0008579009081586264, "clip_ratio/low_mean": 0.0008154346633091336, "clip_ratio/low_min": 7.345266931224614e-05, "clip_ratio/region_mean": 0.0016733355587348342, "epoch": 3.331291921843103, "grad_norm": 0.1499493271112442, "learning_rate": 1e-06, "loss": 0.0529, "step": 647 }, { "clip_ratio/high_max": 0.0022960445130593143, "clip_ratio/high_mean": 0.000984575308393687, "clip_ratio/low_mean": 0.0006591099700017367, "clip_ratio/low_min": 1.3141295312379953e-05, "clip_ratio/region_mean": 0.00164368528930936, "epoch": 3.335958005249344, "grad_norm": 0.13286639750003815, "learning_rate": 1e-06, "loss": -0.0664, "step": 648 }, { "clip_ratio/high_max": 0.0024783850894891657, "clip_ratio/high_mean": 0.0010142752071260475, "clip_ratio/low_mean": 0.0009281232760258717, "clip_ratio/low_min": 6.058152575860731e-05, "clip_ratio/region_mean": 0.0019423984558670782, "epoch": 3.3406240886555847, "grad_norm": 0.1306142956018448, "learning_rate": 1e-06, "loss": 0.0138, "step": 649 }, { "clip_ratio/high_max": 0.0021204129516263492, "clip_ratio/high_mean": 0.0010155941927223466, "clip_ratio/low_mean": 0.0008665206842124462, "clip_ratio/low_min": 0.00011700736376951681, "clip_ratio/region_mean": 0.0018821148696588352, "epoch": 3.3452901720618256, "grad_norm": 0.13773073256015778, "learning_rate": 1e-06, "loss": -0.0292, "step": 650 }, { "clip_ratio/high_max": 0.002631053372169845, "clip_ratio/high_mean": 0.0011039887067454401, "clip_ratio/low_mean": 0.0009044942235050257, "clip_ratio/low_min": 8.548718324163929e-05, "clip_ratio/region_mean": 0.0020084829593542963, "epoch": 3.3499562554680664, "grad_norm": 0.1380700320005417, "learning_rate": 1e-06, "loss": -0.0206, "step": 651 }, { "clip_ratio/high_max": 0.0021688921624445356, "clip_ratio/high_mean": 0.000924098196264822, "clip_ratio/low_mean": 0.0010092142838402651, "clip_ratio/low_min": 4.021879067295231e-05, "clip_ratio/region_mean": 0.0019333124801050872, "epoch": 3.3546223388743073, "grad_norm": 0.1590724140405655, "learning_rate": 1e-06, "loss": 0.004, "step": 652 }, { "clip_ratio/high_max": 0.0027883377697435208, "clip_ratio/high_mean": 0.0011420621449360624, "clip_ratio/low_mean": 0.0009596440140740015, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002101706115354318, "epoch": 3.359288422280548, "grad_norm": 0.146029531955719, "learning_rate": 1e-06, "loss": 0.0183, "step": 653 }, { "clip_ratio/high_max": 0.002291004937433172, "clip_ratio/high_mean": 0.0009532289805065375, "clip_ratio/low_mean": 0.0009386378169438103, "clip_ratio/low_min": 4.9195223255082965e-05, "clip_ratio/region_mean": 0.0018918667992693372, "epoch": 3.363954505686789, "grad_norm": 0.147115558385849, "learning_rate": 1e-06, "loss": -0.0084, "step": 654 }, { "clip_ratio/high_max": 0.00254554464481771, "clip_ratio/high_mean": 0.0010649877858668333, "clip_ratio/low_mean": 0.0009034327740664594, "clip_ratio/low_min": 1.7780939742806368e-05, "clip_ratio/region_mean": 0.0019684206054080278, "epoch": 3.36862058909303, "grad_norm": 0.14575709402561188, "learning_rate": 1e-06, "loss": -0.0317, "step": 655 }, { "clip_ratio/high_max": 0.0022353297972586006, "clip_ratio/high_mean": 0.0009694115087768296, "clip_ratio/low_mean": 0.0010076715643663192, "clip_ratio/low_min": 4.455798625713214e-05, "clip_ratio/region_mean": 0.0019770830549532548, "epoch": 3.3732866724992707, "grad_norm": 0.1381767988204956, "learning_rate": 1e-06, "loss": 0.0039, "step": 656 }, { "clip_ratio/high_max": 0.005044901394285262, "clip_ratio/high_mean": 0.0020702473557321355, "clip_ratio/low_mean": 0.0015968887137205456, "clip_ratio/low_min": 5.5203670854098164e-05, "clip_ratio/region_mean": 0.0036671360285254195, "epoch": 3.377952755905512, "grad_norm": 0.10805666446685791, "learning_rate": 1e-06, "loss": -0.0401, "step": 657 }, { "clip_ratio/high_max": 0.0041514738331898116, "clip_ratio/high_mean": 0.001632462299312465, "clip_ratio/low_mean": 0.0019125303733744659, "clip_ratio/low_min": 0.00019378033903194591, "clip_ratio/region_mean": 0.003544992723618634, "epoch": 3.382618839311753, "grad_norm": 0.11008191853761673, "learning_rate": 1e-06, "loss": -0.0127, "step": 658 }, { "clip_ratio/high_max": 0.003644034164608456, "clip_ratio/high_mean": 0.0014865095872664824, "clip_ratio/low_mean": 0.001745428398862714, "clip_ratio/low_min": 6.600791130040307e-05, "clip_ratio/region_mean": 0.0032319379679393023, "epoch": 3.3872849227179938, "grad_norm": 0.10289491713047028, "learning_rate": 1e-06, "loss": 0.0216, "step": 659 }, { "clip_ratio/high_max": 0.004477594993659295, "clip_ratio/high_mean": 0.0016738822268962394, "clip_ratio/low_mean": 0.001609667502634693, "clip_ratio/low_min": 0.00015565632565994747, "clip_ratio/region_mean": 0.0032835496531333774, "epoch": 3.3919510061242346, "grad_norm": 0.10740148276090622, "learning_rate": 1e-06, "loss": -0.0078, "step": 660 }, { "clip_ratio/high_max": 0.004334258119342849, "clip_ratio/high_mean": 0.0018854044683394022, "clip_ratio/low_mean": 0.0018751603001874173, "clip_ratio/low_min": 0.0002083036470139632, "clip_ratio/region_mean": 0.0037605647230520844, "epoch": 3.3966170895304755, "grad_norm": 0.11443742364645004, "learning_rate": 1e-06, "loss": -0.0084, "step": 661 }, { "clip_ratio/high_max": 0.00394822473754175, "clip_ratio/high_mean": 0.001523849739896832, "clip_ratio/low_mean": 0.002100512880133465, "clip_ratio/low_min": 0.00021312579519872088, "clip_ratio/region_mean": 0.0036243626673240215, "epoch": 3.4012831729367163, "grad_norm": 0.11889758706092834, "learning_rate": 1e-06, "loss": -0.0008, "step": 662 }, { "clip_ratio/high_max": 0.004420377197675407, "clip_ratio/high_mean": 0.0016236180381383747, "clip_ratio/low_mean": 0.0022603662764595356, "clip_ratio/low_min": 0.0002510506074031582, "clip_ratio/region_mean": 0.0038839843473397195, "epoch": 3.405949256342957, "grad_norm": 0.12119041383266449, "learning_rate": 1e-06, "loss": 0.0518, "step": 663 }, { "clip_ratio/high_max": 0.0045833216136088595, "clip_ratio/high_mean": 0.0019681698868225794, "clip_ratio/low_mean": 0.0016429882125521544, "clip_ratio/low_min": 3.5765546272159554e-05, "clip_ratio/region_mean": 0.0036111581575823948, "epoch": 3.410615339749198, "grad_norm": 0.1092284768819809, "learning_rate": 1e-06, "loss": -0.0673, "step": 664 }, { "clip_ratio/high_max": 0.003909898172423709, "clip_ratio/high_mean": 0.0016008672064344864, "clip_ratio/low_mean": 0.001996095677895937, "clip_ratio/low_min": 0.00024429691620753147, "clip_ratio/region_mean": 0.0035969629389001057, "epoch": 3.415281423155439, "grad_norm": 0.11005459725856781, "learning_rate": 1e-06, "loss": 0.013, "step": 665 }, { "clip_ratio/high_max": 0.003609065810451284, "clip_ratio/high_mean": 0.0018118260086339433, "clip_ratio/low_mean": 0.001869842908490682, "clip_ratio/low_min": 0.00023208119455375709, "clip_ratio/region_mean": 0.003681668938952498, "epoch": 3.41994750656168, "grad_norm": 0.11277961730957031, "learning_rate": 1e-06, "loss": -0.0301, "step": 666 }, { "clip_ratio/high_max": 0.00498825368413236, "clip_ratio/high_mean": 0.0020805437670787796, "clip_ratio/low_mean": 0.001959514291229425, "clip_ratio/low_min": 0.00035709681105799973, "clip_ratio/region_mean": 0.0040400580619461834, "epoch": 3.4246135899679206, "grad_norm": 0.11262824386358261, "learning_rate": 1e-06, "loss": -0.0216, "step": 667 }, { "clip_ratio/high_max": 0.004037264792714268, "clip_ratio/high_mean": 0.0016997149759845342, "clip_ratio/low_mean": 0.0018412630597595125, "clip_ratio/low_min": 0.00029850217470084317, "clip_ratio/region_mean": 0.0035409779811743647, "epoch": 3.4292796733741615, "grad_norm": 0.10545282810926437, "learning_rate": 1e-06, "loss": 0.0031, "step": 668 }, { "clip_ratio/high_max": 0.0048037907254183665, "clip_ratio/high_mean": 0.0020303584351495374, "clip_ratio/low_mean": 0.0019416738614381757, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.003972032252931967, "epoch": 3.4339457567804024, "grad_norm": 0.10566996037960052, "learning_rate": 1e-06, "loss": 0.0174, "step": 669 }, { "clip_ratio/high_max": 0.004213619977235794, "clip_ratio/high_mean": 0.001741656604281161, "clip_ratio/low_mean": 0.0019449365645414218, "clip_ratio/low_min": 0.00013560805746237747, "clip_ratio/region_mean": 0.003686593263410032, "epoch": 3.4386118401866432, "grad_norm": 0.12119334936141968, "learning_rate": 1e-06, "loss": -0.0095, "step": 670 }, { "clip_ratio/high_max": 0.004940413469739724, "clip_ratio/high_mean": 0.0019349591239006259, "clip_ratio/low_mean": 0.001774188542185584, "clip_ratio/low_min": 0.0001243501501448918, "clip_ratio/region_mean": 0.003709147684276104, "epoch": 3.443277923592884, "grad_norm": 0.11867986619472504, "learning_rate": 1e-06, "loss": -0.0327, "step": 671 }, { "clip_ratio/high_max": 0.004274389721103944, "clip_ratio/high_mean": 0.0015988001032383181, "clip_ratio/low_mean": 0.0018123163608834147, "clip_ratio/low_min": 0.0001417928287992254, "clip_ratio/region_mean": 0.003411116515053436, "epoch": 3.447944006999125, "grad_norm": 0.1104457825422287, "learning_rate": 1e-06, "loss": 0.003, "step": 672 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.051967075892857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3944.0, "completions/mean_length": 735.8509521484375, "completions/mean_terminated_length": 551.6620483398438, "completions/min_length": 5.0, "completions/min_terminated_length": 5.0, "epoch": 3.452610090405366, "grad_norm": 0.17222283780574799, "learning_rate": 1e-06, "loss": -0.0115, "num_tokens": 204250578.0, "reward": 0.6261160969734192, "reward_std": 0.18874570727348328, "rewards/simpleverify_reward/mean": 0.6261160969734192, "rewards/simpleverify_reward/std": 0.48385027050971985, "step": 673 }, { "clip_ratio/high_max": 0.001980857821763493, "clip_ratio/high_mean": 0.0008756244969845284, "clip_ratio/low_mean": 0.0006200690604600823, "clip_ratio/low_min": 4.0948902096715756e-05, "clip_ratio/region_mean": 0.0014956935483496636, "epoch": 3.457276173811607, "grad_norm": 0.16398774087429047, "learning_rate": 1e-06, "loss": -0.0162, "step": 674 }, { "clip_ratio/high_max": 0.0025196804563165642, "clip_ratio/high_mean": 0.0009564410756865982, "clip_ratio/low_mean": 0.0005882041787117487, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015446452744072303, "epoch": 3.4619422572178475, "grad_norm": 0.1514146476984024, "learning_rate": 1e-06, "loss": -0.0373, "step": 675 }, { "clip_ratio/high_max": 0.0019539371314749587, "clip_ratio/high_mean": 0.0008573754857934546, "clip_ratio/low_mean": 0.0007221938067232259, "clip_ratio/low_min": 1.3278096048452426e-05, "clip_ratio/region_mean": 0.0015795693107065745, "epoch": 3.466608340624089, "grad_norm": 0.13135387003421783, "learning_rate": 1e-06, "loss": -0.0114, "step": 676 }, { "clip_ratio/high_max": 0.0029063174151815474, "clip_ratio/high_mean": 0.001123106456361711, "clip_ratio/low_mean": 0.0007890469278208911, "clip_ratio/low_min": 7.141552760003833e-05, "clip_ratio/region_mean": 0.0019121534278383479, "epoch": 3.4712744240303297, "grad_norm": 0.16005004942417145, "learning_rate": 1e-06, "loss": -0.005, "step": 677 }, { "clip_ratio/high_max": 0.002180146570026409, "clip_ratio/high_mean": 0.0009474923335801577, "clip_ratio/low_mean": 0.0008806268087937497, "clip_ratio/low_min": 4.369026646600105e-05, "clip_ratio/region_mean": 0.0018281191150890663, "epoch": 3.4759405074365706, "grad_norm": 0.14573457837104797, "learning_rate": 1e-06, "loss": -0.0089, "step": 678 }, { "clip_ratio/high_max": 0.002764007069345098, "clip_ratio/high_mean": 0.0010274022351950407, "clip_ratio/low_mean": 0.0008278212535515195, "clip_ratio/low_min": 9.704004332888871e-05, "clip_ratio/region_mean": 0.0018552234905655496, "epoch": 3.4806065908428114, "grad_norm": 0.1608944535255432, "learning_rate": 1e-06, "loss": -0.0167, "step": 679 }, { "clip_ratio/high_max": 0.0024787839938653633, "clip_ratio/high_mean": 0.0011237841135880444, "clip_ratio/low_mean": 0.0006414969884644961, "clip_ratio/low_min": 1.306438116444042e-05, "clip_ratio/region_mean": 0.0017652811075095087, "epoch": 3.4852726742490523, "grad_norm": 0.14125318825244904, "learning_rate": 1e-06, "loss": -0.0583, "step": 680 }, { "clip_ratio/high_max": 0.0020170112620689906, "clip_ratio/high_mean": 0.0008679190195834963, "clip_ratio/low_mean": 0.0009032525049406104, "clip_ratio/low_min": 3.626025045377901e-05, "clip_ratio/region_mean": 0.001771171526343096, "epoch": 3.489938757655293, "grad_norm": 0.15285366773605347, "learning_rate": 1e-06, "loss": 0.0259, "step": 681 }, { "clip_ratio/high_max": 0.0027065124959335662, "clip_ratio/high_mean": 0.001050167626090115, "clip_ratio/low_mean": 0.0007910495078249369, "clip_ratio/low_min": 3.331556581542827e-05, "clip_ratio/region_mean": 0.0018412171375530306, "epoch": 3.494604841061534, "grad_norm": 0.14905956387519836, "learning_rate": 1e-06, "loss": -0.0171, "step": 682 }, { "clip_ratio/high_max": 0.0028013944829581305, "clip_ratio/high_mean": 0.0011154164167237468, "clip_ratio/low_mean": 0.0009514720532024512, "clip_ratio/low_min": 6.775842302886304e-05, "clip_ratio/region_mean": 0.0020668884317274205, "epoch": 3.499270924467775, "grad_norm": 0.1458147019147873, "learning_rate": 1e-06, "loss": -0.0258, "step": 683 }, { "clip_ratio/high_max": 0.0025980886930483393, "clip_ratio/high_mean": 0.0012200003511679824, "clip_ratio/low_mean": 0.0010374091580160893, "clip_ratio/low_min": 5.84668205192429e-05, "clip_ratio/region_mean": 0.0022574094764422625, "epoch": 3.5039370078740157, "grad_norm": 0.13864310085773468, "learning_rate": 1e-06, "loss": -0.0204, "step": 684 }, { "clip_ratio/high_max": 0.002224741372629069, "clip_ratio/high_mean": 0.000967970525380224, "clip_ratio/low_mean": 0.0009146253260041703, "clip_ratio/low_min": 5.269441317068413e-05, "clip_ratio/region_mean": 0.0018825958541128784, "epoch": 3.5086030912802566, "grad_norm": 0.14988622069358826, "learning_rate": 1e-06, "loss": 0.0141, "step": 685 }, { "clip_ratio/high_max": 0.002817836204485502, "clip_ratio/high_mean": 0.0011961388008785434, "clip_ratio/low_mean": 0.0011763399579649558, "clip_ratio/low_min": 0.00013196072632126743, "clip_ratio/region_mean": 0.0023724787679384463, "epoch": 3.5132691746864975, "grad_norm": 0.14847752451896667, "learning_rate": 1e-06, "loss": -0.0221, "step": 686 }, { "clip_ratio/high_max": 0.0022620249874307774, "clip_ratio/high_mean": 0.00096858550386969, "clip_ratio/low_mean": 0.0007860835721658077, "clip_ratio/low_min": 2.2119978893897496e-05, "clip_ratio/region_mean": 0.0017546690651215613, "epoch": 3.5179352580927383, "grad_norm": 0.1235848218202591, "learning_rate": 1e-06, "loss": -0.0494, "step": 687 }, { "clip_ratio/high_max": 0.0031135671088122763, "clip_ratio/high_mean": 0.0011754685547202826, "clip_ratio/low_mean": 0.0009287907469115453, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0021042592998128384, "epoch": 3.522601341498979, "grad_norm": 0.13883580267429352, "learning_rate": 1e-06, "loss": -0.0316, "step": 688 }, { "clip_ratio/high_max": 0.005382393283070996, "clip_ratio/high_mean": 0.0020826052932534367, "clip_ratio/low_mean": 0.0018338317822781391, "clip_ratio/low_min": 0.00018301716954738367, "clip_ratio/region_mean": 0.0039164370828075334, "epoch": 3.52726742490522, "grad_norm": 0.11869249492883682, "learning_rate": 1e-06, "loss": -0.0125, "step": 689 }, { "clip_ratio/high_max": 0.0048578141577309, "clip_ratio/high_mean": 0.002021719170443248, "clip_ratio/low_mean": 0.002312681608600542, "clip_ratio/low_min": 6.89756761858007e-05, "clip_ratio/region_mean": 0.004334400757215917, "epoch": 3.531933508311461, "grad_norm": 0.1230340451002121, "learning_rate": 1e-06, "loss": -0.0174, "step": 690 }, { "clip_ratio/high_max": 0.005167898998479359, "clip_ratio/high_mean": 0.0019759668575716205, "clip_ratio/low_mean": 0.0020079830937902443, "clip_ratio/low_min": 5.4811773225083016e-05, "clip_ratio/region_mean": 0.003983949951361865, "epoch": 3.536599591717702, "grad_norm": 0.11668127030134201, "learning_rate": 1e-06, "loss": -0.0384, "step": 691 }, { "clip_ratio/high_max": 0.004001097360742278, "clip_ratio/high_mean": 0.0017204966279678047, "clip_ratio/low_mean": 0.002034656143223401, "clip_ratio/low_min": 0.00013382639008341357, "clip_ratio/region_mean": 0.0037551527348114178, "epoch": 3.5412656751239426, "grad_norm": 0.10663443803787231, "learning_rate": 1e-06, "loss": -0.0123, "step": 692 }, { "clip_ratio/high_max": 0.004988473068806343, "clip_ratio/high_mean": 0.002163104945793748, "clip_ratio/low_mean": 0.0022495699595310725, "clip_ratio/low_min": 0.00022599931253353134, "clip_ratio/region_mean": 0.004412674781633541, "epoch": 3.545931758530184, "grad_norm": 14.693273544311523, "learning_rate": 1e-06, "loss": -0.0056, "step": 693 }, { "clip_ratio/high_max": 0.00434461300028488, "clip_ratio/high_mean": 0.0017873449978651479, "clip_ratio/low_mean": 0.002183643708121963, "clip_ratio/low_min": 0.0001980489178095013, "clip_ratio/region_mean": 0.003970988735090941, "epoch": 3.5505978419364244, "grad_norm": 0.11578813195228577, "learning_rate": 1e-06, "loss": -0.0098, "step": 694 }, { "clip_ratio/high_max": 0.005354479304514825, "clip_ratio/high_mean": 0.0020086369186174124, "clip_ratio/low_mean": 0.002013766898016911, "clip_ratio/low_min": 0.00017393503731000237, "clip_ratio/region_mean": 0.004022403809358366, "epoch": 3.5552639253426657, "grad_norm": 0.1182750016450882, "learning_rate": 1e-06, "loss": -0.0178, "step": 695 }, { "clip_ratio/high_max": 0.005097582499729469, "clip_ratio/high_mean": 0.0022468328097602352, "clip_ratio/low_mean": 0.001675467807217501, "clip_ratio/low_min": 1.628028076083865e-05, "clip_ratio/region_mean": 0.003922300529666245, "epoch": 3.5599300087489065, "grad_norm": 0.161624476313591, "learning_rate": 1e-06, "loss": -0.059, "step": 696 }, { "clip_ratio/high_max": 0.003914291868568398, "clip_ratio/high_mean": 0.0017340208778477972, "clip_ratio/low_mean": 0.002251035850349581, "clip_ratio/low_min": 0.00014183842358761467, "clip_ratio/region_mean": 0.00398505678458605, "epoch": 3.5645960921551474, "grad_norm": 0.11544261127710342, "learning_rate": 1e-06, "loss": 0.0249, "step": 697 }, { "clip_ratio/high_max": 0.00518498920428101, "clip_ratio/high_mean": 0.0020181063227937557, "clip_ratio/low_mean": 0.0020307432969275396, "clip_ratio/low_min": 0.00014992004435043782, "clip_ratio/region_mean": 0.004048849550599698, "epoch": 3.5692621755613883, "grad_norm": 0.1165173128247261, "learning_rate": 1e-06, "loss": -0.0181, "step": 698 }, { "clip_ratio/high_max": 0.004927886839141138, "clip_ratio/high_mean": 0.00218375600525178, "clip_ratio/low_mean": 0.0022608401050092652, "clip_ratio/low_min": 0.00014047945114725735, "clip_ratio/region_mean": 0.004444596197572537, "epoch": 3.573928258967629, "grad_norm": 0.11463657766580582, "learning_rate": 1e-06, "loss": -0.0268, "step": 699 }, { "clip_ratio/high_max": 0.005064605502411723, "clip_ratio/high_mean": 0.0022345846955431625, "clip_ratio/low_mean": 0.002261064939375501, "clip_ratio/low_min": 0.00012846695790358353, "clip_ratio/region_mean": 0.00449564955488313, "epoch": 3.57859434237387, "grad_norm": 0.12003596127033234, "learning_rate": 1e-06, "loss": -0.0212, "step": 700 }, { "clip_ratio/high_max": 0.00493295572232455, "clip_ratio/high_mean": 0.002002900408115238, "clip_ratio/low_mean": 0.0022291371333267307, "clip_ratio/low_min": 5.5010803407640196e-05, "clip_ratio/region_mean": 0.004232037594192661, "epoch": 3.583260425780111, "grad_norm": 0.1145007386803627, "learning_rate": 1e-06, "loss": 0.013, "step": 701 }, { "clip_ratio/high_max": 0.005295188268064521, "clip_ratio/high_mean": 0.0022089182639319915, "clip_ratio/low_mean": 0.0024476660655636806, "clip_ratio/low_min": 0.00035798074532067403, "clip_ratio/region_mean": 0.004656584424083121, "epoch": 3.5879265091863517, "grad_norm": 0.11989594250917435, "learning_rate": 1e-06, "loss": -0.0231, "step": 702 }, { "clip_ratio/high_max": 0.004397689423058182, "clip_ratio/high_mean": 0.0018595462242956273, "clip_ratio/low_mean": 0.0016044462136051152, "clip_ratio/low_min": 7.313091009564232e-05, "clip_ratio/region_mean": 0.003463992426986806, "epoch": 3.5925925925925926, "grad_norm": 0.11015208065509796, "learning_rate": 1e-06, "loss": -0.0501, "step": 703 }, { "clip_ratio/high_max": 0.00532678839226719, "clip_ratio/high_mean": 0.002088845882099122, "clip_ratio/low_mean": 0.002012509139603935, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.004101354934391566, "epoch": 3.5972586759988334, "grad_norm": 0.11353772133588791, "learning_rate": 1e-06, "loss": -0.0325, "step": 704 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.049037388392857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 4058.0, "completions/mean_length": 732.408935546875, "completions/mean_terminated_length": 558.9617919921875, "completions/min_length": 109.0, "completions/min_terminated_length": 109.0, "epoch": 3.6019247594050743, "grad_norm": 0.13796812295913696, "learning_rate": 1e-06, "loss": -0.0226, "num_tokens": 213268688.0, "reward": 0.6263253688812256, "reward_std": 0.17682276666164398, "rewards/simpleverify_reward/mean": 0.6263253092765808, "rewards/simpleverify_reward/std": 0.4837956726551056, "step": 705 }, { "clip_ratio/high_max": 0.001707718682155246, "clip_ratio/high_mean": 0.0007608414143760456, "clip_ratio/low_mean": 0.0005688443379767705, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013296857287059538, "epoch": 3.606590842811315, "grad_norm": 0.13988927006721497, "learning_rate": 1e-06, "loss": -0.043, "step": 706 }, { "clip_ratio/high_max": 0.0017335513039142825, "clip_ratio/high_mean": 0.0006721417103108251, "clip_ratio/low_mean": 0.0006290203837124864, "clip_ratio/low_min": 2.7062134904554114e-05, "clip_ratio/region_mean": 0.0013011620903853327, "epoch": 3.611256926217556, "grad_norm": 0.1380269080400467, "learning_rate": 1e-06, "loss": 0.0052, "step": 707 }, { "clip_ratio/high_max": 0.0017275933714699931, "clip_ratio/high_mean": 0.0008268210658570752, "clip_ratio/low_mean": 0.0006764011086488608, "clip_ratio/low_min": 1.4912908227415755e-05, "clip_ratio/region_mean": 0.0015032221417641267, "epoch": 3.615923009623797, "grad_norm": 0.13663262128829956, "learning_rate": 1e-06, "loss": -0.0011, "step": 708 }, { "clip_ratio/high_max": 0.0021695724135497585, "clip_ratio/high_mean": 0.000924284162465483, "clip_ratio/low_mean": 0.000647056125671952, "clip_ratio/low_min": 1.771792994986754e-05, "clip_ratio/region_mean": 0.0015713403045083396, "epoch": 3.6205890930300377, "grad_norm": 0.1439342051744461, "learning_rate": 1e-06, "loss": 0.0254, "step": 709 }, { "clip_ratio/high_max": 0.001976675044716103, "clip_ratio/high_mean": 0.0008985688309621764, "clip_ratio/low_mean": 0.0006537300268973922, "clip_ratio/low_min": 2.824326656991616e-05, "clip_ratio/region_mean": 0.0015522988360316958, "epoch": 3.625255176436279, "grad_norm": 0.1437269002199173, "learning_rate": 1e-06, "loss": -0.0648, "step": 710 }, { "clip_ratio/high_max": 0.0021579306994681247, "clip_ratio/high_mean": 0.0008526164310751483, "clip_ratio/low_mean": 0.0006334783101920038, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001486094726715237, "epoch": 3.6299212598425195, "grad_norm": 0.1405710130929947, "learning_rate": 1e-06, "loss": 0.0011, "step": 711 }, { "clip_ratio/high_max": 0.001778824458597228, "clip_ratio/high_mean": 0.0007698830795561662, "clip_ratio/low_mean": 0.0007373526987066725, "clip_ratio/low_min": 3.947056393371895e-05, "clip_ratio/region_mean": 0.0015072357964527328, "epoch": 3.6345873432487608, "grad_norm": 0.1362738460302353, "learning_rate": 1e-06, "loss": 0.0148, "step": 712 }, { "clip_ratio/high_max": 0.0019354814721737057, "clip_ratio/high_mean": 0.000855296657391591, "clip_ratio/low_mean": 0.0007056312952045118, "clip_ratio/low_min": 3.554165596142411e-05, "clip_ratio/region_mean": 0.001560927979880944, "epoch": 3.6392534266550016, "grad_norm": 0.13625776767730713, "learning_rate": 1e-06, "loss": -0.0527, "step": 713 }, { "clip_ratio/high_max": 0.0020584117446560413, "clip_ratio/high_mean": 0.0008291475460282527, "clip_ratio/low_mean": 0.0008382462865483831, "clip_ratio/low_min": 2.7179821699974127e-05, "clip_ratio/region_mean": 0.0016673938189342152, "epoch": 3.6439195100612425, "grad_norm": 0.1302632838487625, "learning_rate": 1e-06, "loss": 0.0012, "step": 714 }, { "clip_ratio/high_max": 0.0022871950786793604, "clip_ratio/high_mean": 0.0009641977485443931, "clip_ratio/low_mean": 0.0009316934483649675, "clip_ratio/low_min": 3.747002483578399e-05, "clip_ratio/region_mean": 0.0018958911459776573, "epoch": 3.6485855934674833, "grad_norm": 0.1516473889350891, "learning_rate": 1e-06, "loss": -0.0079, "step": 715 }, { "clip_ratio/high_max": 0.002037832535279449, "clip_ratio/high_mean": 0.0008557994042348582, "clip_ratio/low_mean": 0.0008380950330320047, "clip_ratio/low_min": 1.6267569662886672e-05, "clip_ratio/region_mean": 0.00169389444636181, "epoch": 3.653251676873724, "grad_norm": 0.14155632257461548, "learning_rate": 1e-06, "loss": -0.0375, "step": 716 }, { "clip_ratio/high_max": 0.0021008499279560056, "clip_ratio/high_mean": 0.0009644745805417188, "clip_ratio/low_mean": 0.0008633770030428423, "clip_ratio/low_min": 2.870044409064576e-05, "clip_ratio/region_mean": 0.0018278515781275928, "epoch": 3.657917760279965, "grad_norm": 0.14588196575641632, "learning_rate": 1e-06, "loss": -0.0756, "step": 717 }, { "clip_ratio/high_max": 0.0020923038537148386, "clip_ratio/high_mean": 0.0009377236256113974, "clip_ratio/low_mean": 0.0008512311378581217, "clip_ratio/low_min": 7.207609451143071e-05, "clip_ratio/region_mean": 0.0017889547816594131, "epoch": 3.662583843686206, "grad_norm": 0.12750868499279022, "learning_rate": 1e-06, "loss": -0.0528, "step": 718 }, { "clip_ratio/high_max": 0.0023973630159161985, "clip_ratio/high_mean": 0.0010299165951437317, "clip_ratio/low_mean": 0.0011046579948015278, "clip_ratio/low_min": 6.793599095544778e-05, "clip_ratio/region_mean": 0.0021345745408325456, "epoch": 3.667249927092447, "grad_norm": 0.15035173296928406, "learning_rate": 1e-06, "loss": 0.0367, "step": 719 }, { "clip_ratio/high_max": 0.002176510948629584, "clip_ratio/high_mean": 0.0009333652178611374, "clip_ratio/low_mean": 0.0010895014584093587, "clip_ratio/low_min": 4.154180714976974e-05, "clip_ratio/region_mean": 0.002022866661718581, "epoch": 3.6719160104986877, "grad_norm": 0.14976878464221954, "learning_rate": 1e-06, "loss": 0.0137, "step": 720 }, { "clip_ratio/high_max": 0.0043633645691443235, "clip_ratio/high_mean": 0.0016300785264320439, "clip_ratio/low_mean": 0.0014974916593928356, "clip_ratio/low_min": 7.270823698490858e-05, "clip_ratio/region_mean": 0.003127570220385678, "epoch": 3.6765820939049285, "grad_norm": 0.09291525930166245, "learning_rate": 1e-06, "loss": -0.0233, "step": 721 }, { "clip_ratio/high_max": 0.004633081814972684, "clip_ratio/high_mean": 0.0018503991814213805, "clip_ratio/low_mean": 0.0015906114531389903, "clip_ratio/low_min": 0.00016880440580280265, "clip_ratio/region_mean": 0.003441010703681968, "epoch": 3.6812481773111694, "grad_norm": 0.10746114701032639, "learning_rate": 1e-06, "loss": -0.0439, "step": 722 }, { "clip_ratio/high_max": 0.0035965180722996593, "clip_ratio/high_mean": 0.0014306397024483886, "clip_ratio/low_mean": 0.0018339150337851606, "clip_ratio/low_min": 8.446964966424275e-05, "clip_ratio/region_mean": 0.003264554761699401, "epoch": 3.6859142607174102, "grad_norm": 0.1101069375872612, "learning_rate": 1e-06, "loss": 0.0043, "step": 723 }, { "clip_ratio/high_max": 0.003934079228201881, "clip_ratio/high_mean": 0.001699183452728903, "clip_ratio/low_mean": 0.0018036453984677792, "clip_ratio/low_min": 0.0001386386156809749, "clip_ratio/region_mean": 0.0035028288402827457, "epoch": 3.690580344123651, "grad_norm": 0.10536739230155945, "learning_rate": 1e-06, "loss": -0.002, "step": 724 }, { "clip_ratio/high_max": 0.004065997738507576, "clip_ratio/high_mean": 0.001666085132455919, "clip_ratio/low_mean": 0.0019013668497791514, "clip_ratio/low_min": 0.0002288434116053395, "clip_ratio/region_mean": 0.0035674519749591127, "epoch": 3.695246427529892, "grad_norm": 0.11310018599033356, "learning_rate": 1e-06, "loss": 0.0245, "step": 725 }, { "clip_ratio/high_max": 0.0047646881866967306, "clip_ratio/high_mean": 0.002020320054725744, "clip_ratio/low_mean": 0.0017728058555803727, "clip_ratio/low_min": 9.492444587522186e-05, "clip_ratio/region_mean": 0.0037931259139440954, "epoch": 3.699912510936133, "grad_norm": 0.11736725270748138, "learning_rate": 1e-06, "loss": -0.0658, "step": 726 }, { "clip_ratio/high_max": 0.004748114384710789, "clip_ratio/high_mean": 0.001793054208974354, "clip_ratio/low_mean": 0.0017506387230241671, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.003543693048413843, "epoch": 3.704578594342374, "grad_norm": 0.11295180022716522, "learning_rate": 1e-06, "loss": 0.0002, "step": 727 }, { "clip_ratio/high_max": 0.003740929299965501, "clip_ratio/high_mean": 0.0014832897886662977, "clip_ratio/low_mean": 0.001872063810878899, "clip_ratio/low_min": 0.00022408315999200568, "clip_ratio/region_mean": 0.0033553535904502496, "epoch": 3.7092446777486145, "grad_norm": 0.10654005408287048, "learning_rate": 1e-06, "loss": 0.0139, "step": 728 }, { "clip_ratio/high_max": 0.004244787895004265, "clip_ratio/high_mean": 0.001821457306505181, "clip_ratio/low_mean": 0.0015946164749038871, "clip_ratio/low_min": 6.794709042878821e-05, "clip_ratio/region_mean": 0.0034160738141508773, "epoch": 3.713910761154856, "grad_norm": 0.11029591411352158, "learning_rate": 1e-06, "loss": -0.0535, "step": 729 }, { "clip_ratio/high_max": 0.003686222968099173, "clip_ratio/high_mean": 0.0015619132755091414, "clip_ratio/low_mean": 0.001800961108529009, "clip_ratio/low_min": 6.676781413261779e-05, "clip_ratio/region_mean": 0.003362874369486235, "epoch": 3.7185768445610963, "grad_norm": 0.10085614770650864, "learning_rate": 1e-06, "loss": 0.0003, "step": 730 }, { "clip_ratio/high_max": 0.004310803866246715, "clip_ratio/high_mean": 0.0018141360524168704, "clip_ratio/low_mean": 0.0019254810213169549, "clip_ratio/low_min": 7.618234667461365e-05, "clip_ratio/region_mean": 0.0037396170082502067, "epoch": 3.7232429279673376, "grad_norm": 0.11083915084600449, "learning_rate": 1e-06, "loss": -0.0089, "step": 731 }, { "clip_ratio/high_max": 0.004351902585767675, "clip_ratio/high_mean": 0.001663338793150615, "clip_ratio/low_mean": 0.0016601150891801808, "clip_ratio/low_min": 2.6349072868470103e-05, "clip_ratio/region_mean": 0.003323453784105368, "epoch": 3.7279090113735784, "grad_norm": 0.10564175993204117, "learning_rate": 1e-06, "loss": -0.0384, "step": 732 }, { "clip_ratio/high_max": 0.004283978501916863, "clip_ratio/high_mean": 0.0019504584197420627, "clip_ratio/low_mean": 0.001666402011323953, "clip_ratio/low_min": 6.592826684936881e-05, "clip_ratio/region_mean": 0.0036168604128761217, "epoch": 3.7325750947798193, "grad_norm": 0.11122928559780121, "learning_rate": 1e-06, "loss": -0.0765, "step": 733 }, { "clip_ratio/high_max": 0.004652982868719846, "clip_ratio/high_mean": 0.001839220931287855, "clip_ratio/low_mean": 0.00168227751419181, "clip_ratio/low_min": 0.00013934968228568323, "clip_ratio/region_mean": 0.0035214984964113683, "epoch": 3.73724117818606, "grad_norm": 0.10405309498310089, "learning_rate": 1e-06, "loss": -0.0536, "step": 734 }, { "clip_ratio/high_max": 0.004151810164330527, "clip_ratio/high_mean": 0.0017772249411791563, "clip_ratio/low_mean": 0.002403065747785149, "clip_ratio/low_min": 0.0002291443834110396, "clip_ratio/region_mean": 0.004180290721706115, "epoch": 3.741907261592301, "grad_norm": 0.11087476462125778, "learning_rate": 1e-06, "loss": 0.0357, "step": 735 }, { "clip_ratio/high_max": 0.003940795955713838, "clip_ratio/high_mean": 0.0016709093397366814, "clip_ratio/low_mean": 0.0021931754090473987, "clip_ratio/low_min": 0.00013855856741429307, "clip_ratio/region_mean": 0.0038640848215436563, "epoch": 3.746573344998542, "grad_norm": 0.17536933720111847, "learning_rate": 1e-06, "loss": 0.0127, "step": 736 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0526646205357143, "completions/max_length": 4096.0, "completions/max_terminated_length": 4070.0, "completions/mean_length": 750.3714599609375, "completions/mean_terminated_length": 564.3800659179688, "completions/min_length": 98.0, "completions/min_terminated_length": 98.0, "epoch": 3.7512394284047827, "grad_norm": 0.17477920651435852, "learning_rate": 1e-06, "loss": -0.0647, "num_tokens": 222321741.0, "reward": 0.6285575032234192, "reward_std": 0.17536064982414246, "rewards/simpleverify_reward/mean": 0.6285575032234192, "rewards/simpleverify_reward/std": 0.48320725560188293, "step": 737 }, { "clip_ratio/high_max": 0.002359038648137357, "clip_ratio/high_mean": 0.0008411856597376755, "clip_ratio/low_mean": 0.0006273077415244188, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.00146849341399502, "epoch": 3.7559055118110236, "grad_norm": 2.43017578125, "learning_rate": 1e-06, "loss": -0.0092, "step": 738 }, { "clip_ratio/high_max": 0.00226938795458409, "clip_ratio/high_mean": 0.0008588581622461788, "clip_ratio/low_mean": 0.0005710975647161831, "clip_ratio/low_min": 1.537893695058301e-05, "clip_ratio/region_mean": 0.0014299557333288249, "epoch": 3.7605715952172645, "grad_norm": 0.14628635346889496, "learning_rate": 1e-06, "loss": -0.0438, "step": 739 }, { "clip_ratio/high_max": 0.002076726992527256, "clip_ratio/high_mean": 0.0008326972883878625, "clip_ratio/low_mean": 0.000561808639758965, "clip_ratio/low_min": 1.3464024050335865e-05, "clip_ratio/region_mean": 0.0013945059217803646, "epoch": 3.7652376786235053, "grad_norm": 0.14165563881397247, "learning_rate": 1e-06, "loss": -0.0115, "step": 740 }, { "clip_ratio/high_max": 0.001913825712108519, "clip_ratio/high_mean": 0.0007372847721853759, "clip_ratio/low_mean": 0.0006441420955525246, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013814268313581124, "epoch": 3.769903762029746, "grad_norm": 0.13239187002182007, "learning_rate": 1e-06, "loss": -0.0009, "step": 741 }, { "clip_ratio/high_max": 0.002100650694046635, "clip_ratio/high_mean": 0.0007973564133862965, "clip_ratio/low_mean": 0.000842191906485823, "clip_ratio/low_min": 1.2804753168893512e-05, "clip_ratio/region_mean": 0.0016395483107771724, "epoch": 3.774569845435987, "grad_norm": 0.12319058179855347, "learning_rate": 1e-06, "loss": -0.0042, "step": 742 }, { "clip_ratio/high_max": 0.0018320144154131413, "clip_ratio/high_mean": 0.0008133291703416035, "clip_ratio/low_mean": 0.0009640983225835953, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0017774275038391352, "epoch": 3.779235928842228, "grad_norm": 0.140071302652359, "learning_rate": 1e-06, "loss": 0.0448, "step": 743 }, { "clip_ratio/high_max": 0.002301309519680217, "clip_ratio/high_mean": 0.0010237305596092483, "clip_ratio/low_mean": 0.0007424470386467874, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0017661775927990675, "epoch": 3.783902012248469, "grad_norm": 0.16917061805725098, "learning_rate": 1e-06, "loss": -0.0396, "step": 744 }, { "clip_ratio/high_max": 0.0018802807844622293, "clip_ratio/high_mean": 0.0007775138351462374, "clip_ratio/low_mean": 0.0008676103370817145, "clip_ratio/low_min": 4.0618530874780845e-05, "clip_ratio/region_mean": 0.0016451241608592682, "epoch": 3.7885680956547096, "grad_norm": 0.13434794545173645, "learning_rate": 1e-06, "loss": 0.0187, "step": 745 }, { "clip_ratio/high_max": 0.0022772336305934004, "clip_ratio/high_mean": 0.000921388669667067, "clip_ratio/low_mean": 0.0008603563092037803, "clip_ratio/low_min": 1.625487675482873e-05, "clip_ratio/region_mean": 0.0017817449843278155, "epoch": 3.793234179060951, "grad_norm": 0.13847526907920837, "learning_rate": 1e-06, "loss": -0.0228, "step": 746 }, { "clip_ratio/high_max": 0.002215390544733964, "clip_ratio/high_mean": 0.0009477393650740851, "clip_ratio/low_mean": 0.0010245876692351885, "clip_ratio/low_min": 1.4060742614674382e-05, "clip_ratio/region_mean": 0.0019723270306712948, "epoch": 3.7979002624671914, "grad_norm": 0.15017037093639374, "learning_rate": 1e-06, "loss": 0.0196, "step": 747 }, { "clip_ratio/high_max": 0.002312032134796027, "clip_ratio/high_mean": 0.0009924015939759556, "clip_ratio/low_mean": 0.0009686341563792666, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001961035763088148, "epoch": 3.8025663458734327, "grad_norm": 0.13074450194835663, "learning_rate": 1e-06, "loss": -0.0257, "step": 748 }, { "clip_ratio/high_max": 0.0021165127254789695, "clip_ratio/high_mean": 0.0008259641408585594, "clip_ratio/low_mean": 0.0009743193113536108, "clip_ratio/low_min": 3.6923986044712365e-05, "clip_ratio/region_mean": 0.0018002834476646967, "epoch": 3.8072324292796735, "grad_norm": 0.13358743488788605, "learning_rate": 1e-06, "loss": 0.0086, "step": 749 }, { "clip_ratio/high_max": 0.0030714675958734006, "clip_ratio/high_mean": 0.0011656206497718813, "clip_ratio/low_mean": 0.0007731245386821683, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0019387452557566576, "epoch": 3.8118985126859144, "grad_norm": 0.12120359390974045, "learning_rate": 1e-06, "loss": -0.0747, "step": 750 }, { "clip_ratio/high_max": 0.002324391760339495, "clip_ratio/high_mean": 0.0009623090481909458, "clip_ratio/low_mean": 0.0009358588449686067, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0018981679022544995, "epoch": 3.8165645960921553, "grad_norm": 0.13132305443286896, "learning_rate": 1e-06, "loss": -0.0451, "step": 751 }, { "clip_ratio/high_max": 0.002670203997695353, "clip_ratio/high_mean": 0.001091261779947672, "clip_ratio/low_mean": 0.0009214749770762865, "clip_ratio/low_min": 3.470266346994322e-05, "clip_ratio/region_mean": 0.002012736767937895, "epoch": 3.821230679498396, "grad_norm": 0.12459760904312134, "learning_rate": 1e-06, "loss": -0.0283, "step": 752 }, { "clip_ratio/high_max": 0.005750593772972934, "clip_ratio/high_mean": 0.002223786694230512, "clip_ratio/low_mean": 0.0017182409756060224, "clip_ratio/low_min": 9.561421393300407e-05, "clip_ratio/region_mean": 0.003942027629818767, "epoch": 3.825896762904637, "grad_norm": 0.11602294445037842, "learning_rate": 1e-06, "loss": -0.0657, "step": 753 }, { "clip_ratio/high_max": 0.003535622003255412, "clip_ratio/high_mean": 0.0013698389630008023, "clip_ratio/low_mean": 0.0014545201884175185, "clip_ratio/low_min": 4.64082168036839e-05, "clip_ratio/region_mean": 0.0028243591805221513, "epoch": 3.830562846310878, "grad_norm": 0.13256803154945374, "learning_rate": 1e-06, "loss": -0.0097, "step": 754 }, { "clip_ratio/high_max": 0.004794902604771778, "clip_ratio/high_mean": 0.0018382197958999313, "clip_ratio/low_mean": 0.0016228220520133618, "clip_ratio/low_min": 4.613681085174903e-05, "clip_ratio/region_mean": 0.0034610418369993567, "epoch": 3.8352289297171187, "grad_norm": 0.11848633736371994, "learning_rate": 1e-06, "loss": -0.0447, "step": 755 }, { "clip_ratio/high_max": 0.004067458568897564, "clip_ratio/high_mean": 0.0017436861216992838, "clip_ratio/low_mean": 0.0018153616292693187, "clip_ratio/low_min": 0.00011876183998538181, "clip_ratio/region_mean": 0.0035590476327342913, "epoch": 3.8398950131233596, "grad_norm": 0.11235970258712769, "learning_rate": 1e-06, "loss": -0.0123, "step": 756 }, { "clip_ratio/high_max": 0.0035849043051712215, "clip_ratio/high_mean": 0.0015424811754201073, "clip_ratio/low_mean": 0.0017083663333323784, "clip_ratio/low_min": 0.00012292051178519614, "clip_ratio/region_mean": 0.0032508474978385493, "epoch": 3.8445610965296004, "grad_norm": 0.11216001212596893, "learning_rate": 1e-06, "loss": -0.0017, "step": 757 }, { "clip_ratio/high_max": 0.0038531647078343667, "clip_ratio/high_mean": 0.0014799288255744614, "clip_ratio/low_mean": 0.001976608797122026, "clip_ratio/low_min": 5.913903260079678e-05, "clip_ratio/region_mean": 0.0034565376408863813, "epoch": 3.8492271799358413, "grad_norm": 0.10212117433547974, "learning_rate": 1e-06, "loss": -0.0048, "step": 758 }, { "clip_ratio/high_max": 0.004134741029702127, "clip_ratio/high_mean": 0.0014983863366069272, "clip_ratio/low_mean": 0.0022383331670425832, "clip_ratio/low_min": 9.652051085140556e-05, "clip_ratio/region_mean": 0.0037367193726822734, "epoch": 3.853893263342082, "grad_norm": 0.1095103770494461, "learning_rate": 1e-06, "loss": 0.0439, "step": 759 }, { "clip_ratio/high_max": 0.004754321649670601, "clip_ratio/high_mean": 0.001988091695238836, "clip_ratio/low_mean": 0.0017386630606779363, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0037267547741066664, "epoch": 3.858559346748323, "grad_norm": 0.11243551969528198, "learning_rate": 1e-06, "loss": -0.0406, "step": 760 }, { "clip_ratio/high_max": 0.004078614703757921, "clip_ratio/high_mean": 0.0015099863321665907, "clip_ratio/low_mean": 0.0018661021022126079, "clip_ratio/low_min": 5.624297045869753e-05, "clip_ratio/region_mean": 0.0033760884834919125, "epoch": 3.863225430154564, "grad_norm": 0.10717935115098953, "learning_rate": 1e-06, "loss": 0.0179, "step": 761 }, { "clip_ratio/high_max": 0.004397791883093305, "clip_ratio/high_mean": 0.0017535864826641046, "clip_ratio/low_mean": 0.0017320508231932763, "clip_ratio/low_min": 0.00013003901403862983, "clip_ratio/region_mean": 0.003485637265839614, "epoch": 3.8678915135608047, "grad_norm": 0.1085558608174324, "learning_rate": 1e-06, "loss": -0.0237, "step": 762 }, { "clip_ratio/high_max": 0.004115596319024917, "clip_ratio/high_mean": 0.0016645620198687539, "clip_ratio/low_mean": 0.002103723309119232, "clip_ratio/low_min": 1.449107367079705e-05, "clip_ratio/region_mean": 0.003768285343539901, "epoch": 3.872557596967046, "grad_norm": 0.11942525953054428, "learning_rate": 1e-06, "loss": 0.0187, "step": 763 }, { "clip_ratio/high_max": 0.00424246299371589, "clip_ratio/high_mean": 0.0017170476203318685, "clip_ratio/low_mean": 0.001973378617549315, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0036904262378811836, "epoch": 3.8772236803732865, "grad_norm": 0.10570453852415085, "learning_rate": 1e-06, "loss": -0.0265, "step": 764 }, { "clip_ratio/high_max": 0.0040037437283899635, "clip_ratio/high_mean": 0.0015547644688922446, "clip_ratio/low_mean": 0.001978310403501382, "clip_ratio/low_min": 7.850350084481761e-05, "clip_ratio/region_mean": 0.003533074908773415, "epoch": 3.8818897637795278, "grad_norm": 0.1083567664027214, "learning_rate": 1e-06, "loss": 0.0077, "step": 765 }, { "clip_ratio/high_max": 0.0046975701552582905, "clip_ratio/high_mean": 0.001908313533931505, "clip_ratio/low_mean": 0.0014650588782387786, "clip_ratio/low_min": 3.4473247069399804e-05, "clip_ratio/region_mean": 0.003373372441274114, "epoch": 3.886555847185768, "grad_norm": 0.09803938865661621, "learning_rate": 1e-06, "loss": -0.0754, "step": 766 }, { "clip_ratio/high_max": 0.004197909656795673, "clip_ratio/high_mean": 0.0016492087124788668, "clip_ratio/low_mean": 0.0016601090392214246, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.003309317777166143, "epoch": 3.8912219305920095, "grad_norm": 0.10391347110271454, "learning_rate": 1e-06, "loss": -0.0459, "step": 767 }, { "clip_ratio/high_max": 0.004114445735467598, "clip_ratio/high_mean": 0.0017251825993298553, "clip_ratio/low_mean": 0.0017964808685064781, "clip_ratio/low_min": 4.848132266488392e-05, "clip_ratio/region_mean": 0.003521663456922397, "epoch": 3.8958880139982504, "grad_norm": 0.10053832828998566, "learning_rate": 1e-06, "loss": -0.029, "step": 768 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.05224609375, "completions/max_length": 4096.0, "completions/max_terminated_length": 4076.0, "completions/mean_length": 751.1327514648438, "completions/mean_terminated_length": 566.7428588867188, "completions/min_length": 93.0, "completions/min_terminated_length": 93.0, "epoch": 4.004666083406241, "grad_norm": 0.15310677886009216, "learning_rate": 1e-06, "loss": -0.0267, "num_tokens": 231437548.0, "reward": 0.6320452094078064, "reward_std": 0.1672099530696869, "rewards/simpleverify_reward/mean": 0.6320452094078064, "rewards/simpleverify_reward/std": 0.48226580023765564, "step": 769 }, { "clip_ratio/high_max": 0.0019281686836620793, "clip_ratio/high_mean": 0.000810307843494229, "clip_ratio/low_mean": 0.0005624724508379586, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013727803016081452, "epoch": 4.009332166812482, "grad_norm": 0.14683762192726135, "learning_rate": 1e-06, "loss": -0.0043, "step": 770 }, { "clip_ratio/high_max": 0.002079153142403811, "clip_ratio/high_mean": 0.0009315488823631313, "clip_ratio/low_mean": 0.000509585847339622, "clip_ratio/low_min": 1.4357913642015774e-05, "clip_ratio/region_mean": 0.0014411347001441754, "epoch": 4.013998250218723, "grad_norm": 0.166998952627182, "learning_rate": 1e-06, "loss": -0.0529, "step": 771 }, { "clip_ratio/high_max": 0.0019399192751734518, "clip_ratio/high_mean": 0.0008342950713995378, "clip_ratio/low_mean": 0.0004934847083859495, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001327779762505088, "epoch": 4.0186643336249634, "grad_norm": 0.13920444250106812, "learning_rate": 1e-06, "loss": -0.0777, "step": 772 }, { "clip_ratio/high_max": 0.002094589752232423, "clip_ratio/high_mean": 0.0007418531313305721, "clip_ratio/low_mean": 0.0005927519014221616, "clip_ratio/low_min": 1.3443751413433347e-05, "clip_ratio/region_mean": 0.0013346050182008184, "epoch": 4.023330417031205, "grad_norm": 0.14391952753067017, "learning_rate": 1e-06, "loss": 0.0313, "step": 773 }, { "clip_ratio/high_max": 0.0016304237433359958, "clip_ratio/high_mean": 0.0006564975301444065, "clip_ratio/low_mean": 0.0009182446938211797, "clip_ratio/low_min": 9.740125460666604e-05, "clip_ratio/region_mean": 0.0015747422658023424, "epoch": 4.027996500437445, "grad_norm": 0.1473110467195511, "learning_rate": 1e-06, "loss": 0.0725, "step": 774 }, { "clip_ratio/high_max": 0.001807895336241927, "clip_ratio/high_mean": 0.0007525314977101516, "clip_ratio/low_mean": 0.0006893953623148263, "clip_ratio/low_min": 1.4282450138125569e-05, "clip_ratio/region_mean": 0.0014419268190977164, "epoch": 4.0326625838436865, "grad_norm": 0.2618180215358734, "learning_rate": 1e-06, "loss": 0.0087, "step": 775 }, { "clip_ratio/high_max": 0.0019271043056505732, "clip_ratio/high_mean": 0.0007989125369931571, "clip_ratio/low_mean": 0.0007507249192713061, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015496374471695162, "epoch": 4.037328667249927, "grad_norm": 0.1659613400697708, "learning_rate": 1e-06, "loss": 0.0537, "step": 776 }, { "clip_ratio/high_max": 0.002457275360939093, "clip_ratio/high_mean": 0.0010113995485880878, "clip_ratio/low_mean": 0.0006561719774254016, "clip_ratio/low_min": 6.624775960517582e-05, "clip_ratio/region_mean": 0.0016675715560268145, "epoch": 4.041994750656168, "grad_norm": 0.1470591127872467, "learning_rate": 1e-06, "loss": -0.0696, "step": 777 }, { "clip_ratio/high_max": 0.0023316239748965017, "clip_ratio/high_mean": 0.0008853714643919375, "clip_ratio/low_mean": 0.0007322361934711807, "clip_ratio/low_min": 1.6684463844285347e-05, "clip_ratio/region_mean": 0.0016176076023839414, "epoch": 4.046660834062409, "grad_norm": 0.1329738199710846, "learning_rate": 1e-06, "loss": -0.0272, "step": 778 }, { "clip_ratio/high_max": 0.0023746980587020516, "clip_ratio/high_mean": 0.000955700097620138, "clip_ratio/low_mean": 0.0007923496677904041, "clip_ratio/low_min": 0.00011225816160731483, "clip_ratio/region_mean": 0.0017480497917858884, "epoch": 4.05132691746865, "grad_norm": 0.1371714174747467, "learning_rate": 1e-06, "loss": -0.0351, "step": 779 }, { "clip_ratio/high_max": 0.002152440862118965, "clip_ratio/high_mean": 0.000914997246582061, "clip_ratio/low_mean": 0.0007267953774316993, "clip_ratio/low_min": 2.956480602733791e-05, "clip_ratio/region_mean": 0.001641792623559013, "epoch": 4.05599300087489, "grad_norm": 0.15127252042293549, "learning_rate": 1e-06, "loss": -0.0008, "step": 780 }, { "clip_ratio/high_max": 0.0026071308384416625, "clip_ratio/high_mean": 0.0010602735819702502, "clip_ratio/low_mean": 0.0008386076415263233, "clip_ratio/low_min": 4.0919271668826696e-05, "clip_ratio/region_mean": 0.0018988811898452695, "epoch": 4.060659084281132, "grad_norm": 0.14045506715774536, "learning_rate": 1e-06, "loss": -0.0268, "step": 781 }, { "clip_ratio/high_max": 0.0023875536026025657, "clip_ratio/high_mean": 0.0009982529518310912, "clip_ratio/low_mean": 0.0007220503011922119, "clip_ratio/low_min": 1.4511260815197602e-05, "clip_ratio/region_mean": 0.0017203032366523985, "epoch": 4.065325167687372, "grad_norm": 0.1363794058561325, "learning_rate": 1e-06, "loss": -0.061, "step": 782 }, { "clip_ratio/high_max": 0.002237800872535445, "clip_ratio/high_mean": 0.0009411189221282257, "clip_ratio/low_mean": 0.0007834278085283586, "clip_ratio/low_min": 1.3058921467745677e-05, "clip_ratio/region_mean": 0.001724546767945867, "epoch": 4.069991251093613, "grad_norm": 0.14006073772907257, "learning_rate": 1e-06, "loss": -0.0118, "step": 783 }, { "clip_ratio/high_max": 0.0023545168733107857, "clip_ratio/high_mean": 0.0009650763731769985, "clip_ratio/low_mean": 0.0008394488713747705, "clip_ratio/low_min": 1.2945319213031325e-05, "clip_ratio/region_mean": 0.0018045252436422743, "epoch": 4.074657334499854, "grad_norm": 0.12399918586015701, "learning_rate": 1e-06, "loss": -0.0149, "step": 784 }, { "clip_ratio/high_max": 0.004768627492012456, "clip_ratio/high_mean": 0.0019168325743521564, "clip_ratio/low_mean": 0.0019984095233667176, "clip_ratio/low_min": 0.00013257576210889965, "clip_ratio/region_mean": 0.003915242094080895, "epoch": 4.079323417906095, "grad_norm": 0.10813162475824356, "learning_rate": 1e-06, "loss": -0.0277, "step": 785 }, { "clip_ratio/high_max": 0.0038928389622014947, "clip_ratio/high_mean": 0.001720463815217954, "clip_ratio/low_mean": 0.0017948549648281187, "clip_ratio/low_min": 9.112090629059821e-05, "clip_ratio/region_mean": 0.0035153187927789986, "epoch": 4.083989501312336, "grad_norm": 0.11050586402416229, "learning_rate": 1e-06, "loss": -0.0053, "step": 786 }, { "clip_ratio/high_max": 0.0042497809627093375, "clip_ratio/high_mean": 0.0018293542743776925, "clip_ratio/low_mean": 0.0015810461409273557, "clip_ratio/low_min": 4.887903924100101e-05, "clip_ratio/region_mean": 0.0034104005026165396, "epoch": 4.088655584718577, "grad_norm": 0.10018651187419891, "learning_rate": 1e-06, "loss": -0.0537, "step": 787 }, { "clip_ratio/high_max": 0.004470041996682994, "clip_ratio/high_mean": 0.0018017599068116397, "clip_ratio/low_mean": 0.0014861097724860883, "clip_ratio/low_min": 6.933658733032644e-05, "clip_ratio/region_mean": 0.003287869672931265, "epoch": 4.093321668124818, "grad_norm": 0.10895606875419617, "learning_rate": 1e-06, "loss": -0.0786, "step": 788 }, { "clip_ratio/high_max": 0.004060956765897572, "clip_ratio/high_mean": 0.0014696106518385932, "clip_ratio/low_mean": 0.0016836730610521045, "clip_ratio/low_min": 0.00017380488225171575, "clip_ratio/region_mean": 0.003153283687424846, "epoch": 4.0979877515310585, "grad_norm": 0.1082402765750885, "learning_rate": 1e-06, "loss": 0.0304, "step": 789 }, { "clip_ratio/high_max": 0.0034149702478316613, "clip_ratio/high_mean": 0.001354354008071823, "clip_ratio/low_mean": 0.002163726749131456, "clip_ratio/low_min": 0.00010136599303223193, "clip_ratio/region_mean": 0.0035180808044970036, "epoch": 4.1026538349373, "grad_norm": 0.11549611389636993, "learning_rate": 1e-06, "loss": 0.0716, "step": 790 }, { "clip_ratio/high_max": 0.003594898749724962, "clip_ratio/high_mean": 0.0014823518176854122, "clip_ratio/low_mean": 0.0019362849016033579, "clip_ratio/low_min": 4.2087540350621566e-05, "clip_ratio/region_mean": 0.0034186366538051516, "epoch": 4.10731991834354, "grad_norm": 0.10483682155609131, "learning_rate": 1e-06, "loss": 0.0078, "step": 791 }, { "clip_ratio/high_max": 0.0035983753041364253, "clip_ratio/high_mean": 0.0015340608879341744, "clip_ratio/low_mean": 0.0019362895254744217, "clip_ratio/low_min": 4.0577830077381805e-05, "clip_ratio/region_mean": 0.003470350435236469, "epoch": 4.111986001749782, "grad_norm": 0.10962541401386261, "learning_rate": 1e-06, "loss": 0.0528, "step": 792 }, { "clip_ratio/high_max": 0.004705524479504675, "clip_ratio/high_mean": 0.0018390097429801244, "clip_ratio/low_mean": 0.0015835553567740135, "clip_ratio/low_min": 0.00020804732594115194, "clip_ratio/region_mean": 0.003422565132495947, "epoch": 4.116652085156022, "grad_norm": 0.11367785930633545, "learning_rate": 1e-06, "loss": -0.0705, "step": 793 }, { "clip_ratio/high_max": 0.004744290665257722, "clip_ratio/high_mean": 0.0018269469437655061, "clip_ratio/low_mean": 0.0016876386980584357, "clip_ratio/low_min": 7.148984877858311e-05, "clip_ratio/region_mean": 0.003514585696393624, "epoch": 4.121318168562263, "grad_norm": 0.11523880809545517, "learning_rate": 1e-06, "loss": -0.0281, "step": 794 }, { "clip_ratio/high_max": 0.004780321134603582, "clip_ratio/high_mean": 0.0017885620854940498, "clip_ratio/low_mean": 0.0017926065120263956, "clip_ratio/low_min": 0.00018352220467932057, "clip_ratio/region_mean": 0.0035811686975648627, "epoch": 4.125984251968504, "grad_norm": 0.10960574448108673, "learning_rate": 1e-06, "loss": -0.0361, "step": 795 }, { "clip_ratio/high_max": 0.004405864514410496, "clip_ratio/high_mean": 0.00181055924622342, "clip_ratio/low_mean": 0.0017280713764193933, "clip_ratio/low_min": 4.628486931324005e-05, "clip_ratio/region_mean": 0.003538630604452919, "epoch": 4.130650335374745, "grad_norm": 0.11767042428255081, "learning_rate": 1e-06, "loss": -0.0018, "step": 796 }, { "clip_ratio/high_max": 0.005056446942035109, "clip_ratio/high_mean": 0.001986493145523127, "clip_ratio/low_mean": 0.0016335325453837868, "clip_ratio/low_min": 0.00010130920418305323, "clip_ratio/region_mean": 0.0036200256072334014, "epoch": 4.135316418780985, "grad_norm": 0.10663873702287674, "learning_rate": 1e-06, "loss": -0.0276, "step": 797 }, { "clip_ratio/high_max": 0.004029067713418044, "clip_ratio/high_mean": 0.0018686227303987835, "clip_ratio/low_mean": 0.0015110203494259622, "clip_ratio/low_min": 8.672525291331112e-05, "clip_ratio/region_mean": 0.003379643130756449, "epoch": 4.139982502187227, "grad_norm": 0.10719020664691925, "learning_rate": 1e-06, "loss": -0.0618, "step": 798 }, { "clip_ratio/high_max": 0.004220316142891534, "clip_ratio/high_mean": 0.0018387219788564835, "clip_ratio/low_mean": 0.0017396884795743972, "clip_ratio/low_min": 2.89821473415941e-05, "clip_ratio/region_mean": 0.0035784105712082237, "epoch": 4.144648585593467, "grad_norm": 0.1055833175778389, "learning_rate": 1e-06, "loss": -0.0127, "step": 799 }, { "clip_ratio/high_max": 0.004139769880566746, "clip_ratio/high_mean": 0.0017569779702171218, "clip_ratio/low_mean": 0.001630351831408916, "clip_ratio/low_min": 6.589509575860575e-05, "clip_ratio/region_mean": 0.0033873297506943345, "epoch": 4.1493146689997085, "grad_norm": 0.09660372138023376, "learning_rate": 1e-06, "loss": -0.0156, "step": 800 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.054966517857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3951.0, "completions/mean_length": 757.5895385742188, "completions/mean_terminated_length": 563.4156494140625, "completions/min_length": 87.0, "completions/min_terminated_length": 87.0, "epoch": 4.153980752405949, "grad_norm": 0.1303890198469162, "learning_rate": 1e-06, "loss": 0.0233, "num_tokens": 240510959.0, "reward": 0.6400669813156128, "reward_std": 0.16997414827346802, "rewards/simpleverify_reward/mean": 0.6400669813156128, "rewards/simpleverify_reward/std": 0.4799971878528595, "step": 801 }, { "clip_ratio/high_max": 0.0022182005195645615, "clip_ratio/high_mean": 0.0009035354141815333, "clip_ratio/low_mean": 0.00040883870860852767, "clip_ratio/low_min": 2.0414829123183154e-05, "clip_ratio/region_mean": 0.0013123741155141033, "epoch": 4.15864683581219, "grad_norm": 0.1353575736284256, "learning_rate": 1e-06, "loss": -0.0451, "step": 802 }, { "clip_ratio/high_max": 0.0019282172725070268, "clip_ratio/high_mean": 0.0006982145287111052, "clip_ratio/low_mean": 0.0004980692733624892, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011962838179897517, "epoch": 4.163312919218431, "grad_norm": 0.1320056915283203, "learning_rate": 1e-06, "loss": 0.0104, "step": 803 }, { "clip_ratio/high_max": 0.002186690879170783, "clip_ratio/high_mean": 0.0008233120861405041, "clip_ratio/low_mean": 0.000502015504935116, "clip_ratio/low_min": 1.4980824744270649e-05, "clip_ratio/region_mean": 0.0013253275719762314, "epoch": 4.167979002624672, "grad_norm": 0.14056254923343658, "learning_rate": 1e-06, "loss": -0.0136, "step": 804 }, { "clip_ratio/high_max": 0.0018644992742338218, "clip_ratio/high_mean": 0.0007338922723647556, "clip_ratio/low_mean": 0.0006387510229615145, "clip_ratio/low_min": 1.2457643606467173e-05, "clip_ratio/region_mean": 0.0013726432880503125, "epoch": 4.172645086030913, "grad_norm": 0.142314150929451, "learning_rate": 1e-06, "loss": 0.0426, "step": 805 }, { "clip_ratio/high_max": 0.0022365255281329155, "clip_ratio/high_mean": 0.0009228459985024529, "clip_ratio/low_mean": 0.0005956899294687901, "clip_ratio/low_min": 4.7568952140863985e-05, "clip_ratio/region_mean": 0.001518535915238317, "epoch": 4.177311169437154, "grad_norm": 0.15085534751415253, "learning_rate": 1e-06, "loss": -0.0449, "step": 806 }, { "clip_ratio/high_max": 0.002015844114794163, "clip_ratio/high_mean": 0.0007960460789036006, "clip_ratio/low_mean": 0.0007721027604929986, "clip_ratio/low_min": 6.227522681001574e-05, "clip_ratio/region_mean": 0.001568148840306094, "epoch": 4.181977252843395, "grad_norm": 0.1615273654460907, "learning_rate": 1e-06, "loss": -0.0028, "step": 807 }, { "clip_ratio/high_max": 0.002182118456403259, "clip_ratio/high_mean": 0.000867377075337572, "clip_ratio/low_mean": 0.0005352647349354811, "clip_ratio/low_min": 1.6391293684137054e-05, "clip_ratio/region_mean": 0.0014026417811692227, "epoch": 4.186643336249635, "grad_norm": 0.14414745569229126, "learning_rate": 1e-06, "loss": -0.0459, "step": 808 }, { "clip_ratio/high_max": 0.002051880284852814, "clip_ratio/high_mean": 0.0009167857915599598, "clip_ratio/low_mean": 0.0007480407002731226, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0016648264499963261, "epoch": 4.191309419655877, "grad_norm": 0.14732277393341064, "learning_rate": 1e-06, "loss": -0.0004, "step": 809 }, { "clip_ratio/high_max": 0.0022003626872901805, "clip_ratio/high_mean": 0.0008670665047247894, "clip_ratio/low_mean": 0.0006310636917987722, "clip_ratio/low_min": 1.4955730875954032e-05, "clip_ratio/region_mean": 0.0014981302010710351, "epoch": 4.195975503062117, "grad_norm": 0.14946165680885315, "learning_rate": 1e-06, "loss": -0.0403, "step": 810 }, { "clip_ratio/high_max": 0.0025730982815730385, "clip_ratio/high_mean": 0.0010119561811734457, "clip_ratio/low_mean": 0.000793170767792617, "clip_ratio/low_min": 3.861623099510325e-05, "clip_ratio/region_mean": 0.0018051269580610096, "epoch": 4.200641586468358, "grad_norm": 0.20283643901348114, "learning_rate": 1e-06, "loss": -0.0117, "step": 811 }, { "clip_ratio/high_max": 0.0021455318419612013, "clip_ratio/high_mean": 0.0008771239990892354, "clip_ratio/low_mean": 0.0007881807368903537, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001665304764173925, "epoch": 4.205307669874599, "grad_norm": 0.13793624937534332, "learning_rate": 1e-06, "loss": -0.049, "step": 812 }, { "clip_ratio/high_max": 0.002361922037380282, "clip_ratio/high_mean": 0.0009592125097697135, "clip_ratio/low_mean": 0.0007195807465905091, "clip_ratio/low_min": 4.42750497313682e-05, "clip_ratio/region_mean": 0.001678793239989318, "epoch": 4.20997375328084, "grad_norm": 0.14665831625461578, "learning_rate": 1e-06, "loss": -0.033, "step": 813 }, { "clip_ratio/high_max": 0.0025589886427042075, "clip_ratio/high_mean": 0.001097928319722996, "clip_ratio/low_mean": 0.0007390546379610896, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0018369829485891387, "epoch": 4.2146398366870805, "grad_norm": 0.142076775431633, "learning_rate": 1e-06, "loss": -0.0695, "step": 814 }, { "clip_ratio/high_max": 0.0027473546069813892, "clip_ratio/high_mean": 0.0010548358768573962, "clip_ratio/low_mean": 0.0009258007758035092, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0019806366763077676, "epoch": 4.219305920093322, "grad_norm": 0.14724980294704437, "learning_rate": 1e-06, "loss": -0.0079, "step": 815 }, { "clip_ratio/high_max": 0.0032117781302076764, "clip_ratio/high_mean": 0.0012206568790134043, "clip_ratio/low_mean": 0.0009935115704138298, "clip_ratio/low_min": 3.166160240652971e-05, "clip_ratio/region_mean": 0.0022141684603411704, "epoch": 4.223972003499562, "grad_norm": 0.146607905626297, "learning_rate": 1e-06, "loss": -0.0, "step": 816 }, { "clip_ratio/high_max": 0.004683214487158693, "clip_ratio/high_mean": 0.0016601214156253263, "clip_ratio/low_mean": 0.0015739032824058086, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.003234024756238796, "epoch": 4.228638086905804, "grad_norm": 0.0962480679154396, "learning_rate": 1e-06, "loss": 0.0226, "step": 817 }, { "clip_ratio/high_max": 0.0051162459421902895, "clip_ratio/high_mean": 0.00207439846417401, "clip_ratio/low_mean": 0.00145659243571572, "clip_ratio/low_min": 7.145190465962514e-05, "clip_ratio/region_mean": 0.003530990972649306, "epoch": 4.233304170312044, "grad_norm": 0.11082332581281662, "learning_rate": 1e-06, "loss": -0.0459, "step": 818 }, { "clip_ratio/high_max": 0.00396244965668302, "clip_ratio/high_mean": 0.0015430440325872041, "clip_ratio/low_mean": 0.001758814545610221, "clip_ratio/low_min": 3.710667078848928e-05, "clip_ratio/region_mean": 0.003301858509075828, "epoch": 4.237970253718285, "grad_norm": 0.13086576759815216, "learning_rate": 1e-06, "loss": 0.0097, "step": 819 }, { "clip_ratio/high_max": 0.004030764815979637, "clip_ratio/high_mean": 0.0017902548061101697, "clip_ratio/low_mean": 0.001622117943043122, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0034123726582038216, "epoch": 4.242636337124526, "grad_norm": 0.10510283708572388, "learning_rate": 1e-06, "loss": -0.0145, "step": 820 }, { "clip_ratio/high_max": 0.0037007724822615273, "clip_ratio/high_mean": 0.0014824314021097962, "clip_ratio/low_mean": 0.0019096084688499104, "clip_ratio/low_min": 6.586123345186934e-05, "clip_ratio/region_mean": 0.003392039885511622, "epoch": 4.247302420530767, "grad_norm": 0.1070796474814415, "learning_rate": 1e-06, "loss": 0.0417, "step": 821 }, { "clip_ratio/high_max": 0.004464153011213057, "clip_ratio/high_mean": 0.0018111991194018628, "clip_ratio/low_mean": 0.0017414308404113399, "clip_ratio/low_min": 9.955874702427536e-05, "clip_ratio/region_mean": 0.0035526299761841074, "epoch": 4.251968503937007, "grad_norm": 0.10536172986030579, "learning_rate": 1e-06, "loss": -0.0458, "step": 822 }, { "clip_ratio/high_max": 0.004796643399458844, "clip_ratio/high_mean": 0.0020294683636166155, "clip_ratio/low_mean": 0.002219650499682757, "clip_ratio/low_min": 0.00022132278900244273, "clip_ratio/region_mean": 0.004249118763254955, "epoch": 4.256634587343249, "grad_norm": 0.11842436343431473, "learning_rate": 1e-06, "loss": -0.004, "step": 823 }, { "clip_ratio/high_max": 0.004542186914477497, "clip_ratio/high_mean": 0.0018836865274352022, "clip_ratio/low_mean": 0.0015959499269229127, "clip_ratio/low_min": 3.278258736827411e-05, "clip_ratio/region_mean": 0.0034796364489011467, "epoch": 4.26130067074949, "grad_norm": 0.10479302704334259, "learning_rate": 1e-06, "loss": -0.0469, "step": 824 }, { "clip_ratio/high_max": 0.004993004273273982, "clip_ratio/high_mean": 0.0019716628521564417, "clip_ratio/low_mean": 0.0019497534976835595, "clip_ratio/low_min": 5.7192533859051764e-05, "clip_ratio/region_mean": 0.003921416304365266, "epoch": 4.2659667541557305, "grad_norm": 0.11368942260742188, "learning_rate": 1e-06, "loss": -0.0014, "step": 825 }, { "clip_ratio/high_max": 0.00470474392932374, "clip_ratio/high_mean": 0.0018914956162916496, "clip_ratio/low_mean": 0.0015624966254108585, "clip_ratio/low_min": 4.2671825212892145e-05, "clip_ratio/region_mean": 0.003453992190770805, "epoch": 4.270632837561972, "grad_norm": 0.10735386610031128, "learning_rate": 1e-06, "loss": -0.0411, "step": 826 }, { "clip_ratio/high_max": 0.004756406488013454, "clip_ratio/high_mean": 0.001861303004261572, "clip_ratio/low_mean": 0.0018831373272405472, "clip_ratio/low_min": 0.00012971574687981047, "clip_ratio/region_mean": 0.0037444403569679707, "epoch": 4.275298920968212, "grad_norm": 0.10722675174474716, "learning_rate": 1e-06, "loss": -0.0127, "step": 827 }, { "clip_ratio/high_max": 0.0039394246123265475, "clip_ratio/high_mean": 0.0017110314838646445, "clip_ratio/low_mean": 0.0016145914778462611, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0033256230381084606, "epoch": 4.2799650043744535, "grad_norm": 0.11127717047929764, "learning_rate": 1e-06, "loss": -0.0498, "step": 828 }, { "clip_ratio/high_max": 0.004596872029651422, "clip_ratio/high_mean": 0.0018493219176889397, "clip_ratio/low_mean": 0.0016120491272886284, "clip_ratio/low_min": 0.0001126769020629581, "clip_ratio/region_mean": 0.0034613710013218224, "epoch": 4.284631087780694, "grad_norm": 0.10674639791250229, "learning_rate": 1e-06, "loss": -0.0339, "step": 829 }, { "clip_ratio/high_max": 0.004938447622407693, "clip_ratio/high_mean": 0.0021008079893363174, "clip_ratio/low_mean": 0.001592003183759516, "clip_ratio/low_min": 5.755064557888545e-05, "clip_ratio/region_mean": 0.0036928112531313673, "epoch": 4.289297171186935, "grad_norm": 0.11336338520050049, "learning_rate": 1e-06, "loss": -0.0704, "step": 830 }, { "clip_ratio/high_max": 0.004813632971490733, "clip_ratio/high_mean": 0.001890468458441319, "clip_ratio/low_mean": 0.0019733882982109208, "clip_ratio/low_min": 5.0423557695467025e-05, "clip_ratio/region_mean": 0.0038638567930320278, "epoch": 4.293963254593176, "grad_norm": 0.11263824254274368, "learning_rate": 1e-06, "loss": -0.0089, "step": 831 }, { "clip_ratio/high_max": 0.005107821853016503, "clip_ratio/high_mean": 0.0019648220113595016, "clip_ratio/low_mean": 0.0020307529703131877, "clip_ratio/low_min": 5.191029777051881e-05, "clip_ratio/region_mean": 0.003995574952568859, "epoch": 4.298629337999417, "grad_norm": 0.11687400192022324, "learning_rate": 1e-06, "loss": -0.001, "step": 832 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0552455357142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 4050.0, "completions/mean_length": 746.4529418945312, "completions/mean_terminated_length": 550.5845336914062, "completions/min_length": 107.0, "completions/min_terminated_length": 107.0, "epoch": 4.303295421405657, "grad_norm": 0.15614362061023712, "learning_rate": 1e-06, "loss": -0.0253, "num_tokens": 249417172.0, "reward": 0.6404157876968384, "reward_std": 0.16133955121040344, "rewards/simpleverify_reward/mean": 0.6404157280921936, "rewards/simpleverify_reward/std": 0.47989529371261597, "step": 833 }, { "clip_ratio/high_max": 0.0019733297231141478, "clip_ratio/high_mean": 0.0007616561470058514, "clip_ratio/low_mean": 0.0003771799752030347, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011388361308490857, "epoch": 4.307961504811899, "grad_norm": 0.11703892052173615, "learning_rate": 1e-06, "loss": -0.0354, "step": 834 }, { "clip_ratio/high_max": 0.0017955149014596827, "clip_ratio/high_mean": 0.00075140794251638, "clip_ratio/low_mean": 0.0005205176976232906, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012719256410491653, "epoch": 4.312627588218139, "grad_norm": 0.13232852518558502, "learning_rate": 1e-06, "loss": -0.0301, "step": 835 }, { "clip_ratio/high_max": 0.0022504101289086975, "clip_ratio/high_mean": 0.0007580840783703024, "clip_ratio/low_mean": 0.0006088165519031463, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001366900651191827, "epoch": 4.31729367162438, "grad_norm": 0.1509833186864853, "learning_rate": 1e-06, "loss": -0.004, "step": 836 }, { "clip_ratio/high_max": 0.0018517387979954947, "clip_ratio/high_mean": 0.0006814233456680086, "clip_ratio/low_mean": 0.0005646014424200985, "clip_ratio/low_min": 4.8543690354563296e-05, "clip_ratio/region_mean": 0.0012460247962735593, "epoch": 4.321959755030621, "grad_norm": 0.14457760751247406, "learning_rate": 1e-06, "loss": -0.0033, "step": 837 }, { "clip_ratio/high_max": 0.0020943353047186974, "clip_ratio/high_mean": 0.0009089495742955478, "clip_ratio/low_mean": 0.000545835501725378, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014547850587405264, "epoch": 4.326625838436862, "grad_norm": 0.16517773270606995, "learning_rate": 1e-06, "loss": -0.0521, "step": 838 }, { "clip_ratio/high_max": 0.0020902541946270503, "clip_ratio/high_mean": 0.000818183794763172, "clip_ratio/low_mean": 0.0005284679818942095, "clip_ratio/low_min": 1.6451698684250005e-05, "clip_ratio/region_mean": 0.001346651781204855, "epoch": 4.331291921843103, "grad_norm": 0.1452416032552719, "learning_rate": 1e-06, "loss": -0.0028, "step": 839 }, { "clip_ratio/high_max": 0.0022448662639362738, "clip_ratio/high_mean": 0.0009470668483118061, "clip_ratio/low_mean": 0.0007544977806901443, "clip_ratio/low_min": 2.0051331375725567e-05, "clip_ratio/region_mean": 0.0017015646080835722, "epoch": 4.335958005249344, "grad_norm": 0.154227152466774, "learning_rate": 1e-06, "loss": -0.0499, "step": 840 }, { "clip_ratio/high_max": 0.002260458692035172, "clip_ratio/high_mean": 0.0009264874788641464, "clip_ratio/low_mean": 0.0007122709248505998, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0016387584328185767, "epoch": 4.340624088655585, "grad_norm": 0.15906360745429993, "learning_rate": 1e-06, "loss": -0.0753, "step": 841 }, { "clip_ratio/high_max": 0.001790504771634005, "clip_ratio/high_mean": 0.0007345174362853868, "clip_ratio/low_mean": 0.0008167722917278297, "clip_ratio/low_min": 3.391209975234233e-05, "clip_ratio/region_mean": 0.0015512897371081635, "epoch": 4.3452901720618256, "grad_norm": 0.14767055213451385, "learning_rate": 1e-06, "loss": -0.0089, "step": 842 }, { "clip_ratio/high_max": 0.002371604168729391, "clip_ratio/high_mean": 0.0009641461310820887, "clip_ratio/low_mean": 0.0008687915960763348, "clip_ratio/low_min": 4.038337283418514e-05, "clip_ratio/region_mean": 0.0018329377344343811, "epoch": 4.349956255468067, "grad_norm": 0.14764977991580963, "learning_rate": 1e-06, "loss": -0.0236, "step": 843 }, { "clip_ratio/high_max": 0.002153606779756956, "clip_ratio/high_mean": 0.000962589367190958, "clip_ratio/low_mean": 0.0008469747272101813, "clip_ratio/low_min": 1.8200349586550146e-05, "clip_ratio/region_mean": 0.0018095641207764857, "epoch": 4.354622338874307, "grad_norm": 0.15038618445396423, "learning_rate": 1e-06, "loss": -0.0243, "step": 844 }, { "clip_ratio/high_max": 0.0024876581810531206, "clip_ratio/high_mean": 0.0008999401725304779, "clip_ratio/low_mean": 0.0008811707884888165, "clip_ratio/low_min": 3.200204810127616e-05, "clip_ratio/region_mean": 0.0017811109064496122, "epoch": 4.359288422280549, "grad_norm": 0.14355981349945068, "learning_rate": 1e-06, "loss": -0.0087, "step": 845 }, { "clip_ratio/high_max": 0.0027049496093241032, "clip_ratio/high_mean": 0.0010384314173279563, "clip_ratio/low_mean": 0.0009056093240360497, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0019440407340880483, "epoch": 4.363954505686789, "grad_norm": 0.13249464333057404, "learning_rate": 1e-06, "loss": -0.0204, "step": 846 }, { "clip_ratio/high_max": 0.0026887355488725007, "clip_ratio/high_mean": 0.0009471602461417206, "clip_ratio/low_mean": 0.0011728872632374987, "clip_ratio/low_min": 1.4820962860540021e-05, "clip_ratio/region_mean": 0.0021200475603109226, "epoch": 4.36862058909303, "grad_norm": 0.16055814921855927, "learning_rate": 1e-06, "loss": 0.0564, "step": 847 }, { "clip_ratio/high_max": 0.002603636574349366, "clip_ratio/high_mean": 0.0009957883503375342, "clip_ratio/low_mean": 0.0009716837648738874, "clip_ratio/low_min": 3.131262565148063e-05, "clip_ratio/region_mean": 0.0019674721334013157, "epoch": 4.373286672499271, "grad_norm": 0.4194774925708771, "learning_rate": 1e-06, "loss": 0.0092, "step": 848 }, { "clip_ratio/high_max": 0.005164644433534704, "clip_ratio/high_mean": 0.0017719701754685957, "clip_ratio/low_mean": 0.0017693263198452769, "clip_ratio/low_min": 0.00012767993757734075, "clip_ratio/region_mean": 0.0035412964716670103, "epoch": 4.377952755905512, "grad_norm": 0.10239679366350174, "learning_rate": 1e-06, "loss": -0.0261, "step": 849 }, { "clip_ratio/high_max": 0.004233103740261868, "clip_ratio/high_mean": 0.00167551855702186, "clip_ratio/low_mean": 0.0012020007525279652, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002877519349567592, "epoch": 4.3826188393117524, "grad_norm": 0.09712014347314835, "learning_rate": 1e-06, "loss": -0.036, "step": 850 }, { "clip_ratio/high_max": 0.004521616967394948, "clip_ratio/high_mean": 0.001693811413133517, "clip_ratio/low_mean": 0.0015810784134373534, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.003274889815656934, "epoch": 4.387284922717994, "grad_norm": 0.10434824973344803, "learning_rate": 1e-06, "loss": -0.0309, "step": 851 }, { "clip_ratio/high_max": 0.00442529335123254, "clip_ratio/high_mean": 0.0015839472398511134, "clip_ratio/low_mean": 0.0017927135959325824, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.003376660868525505, "epoch": 4.391951006124234, "grad_norm": 0.11312887817621231, "learning_rate": 1e-06, "loss": -0.0048, "step": 852 }, { "clip_ratio/high_max": 0.003923805696103955, "clip_ratio/high_mean": 0.0015902004452073015, "clip_ratio/low_mean": 0.0017551334349263925, "clip_ratio/low_min": 0.0002065648695861455, "clip_ratio/region_mean": 0.003345333934703376, "epoch": 4.3966170895304755, "grad_norm": 0.10837895423173904, "learning_rate": 1e-06, "loss": -0.0042, "step": 853 }, { "clip_ratio/high_max": 0.004731163033284247, "clip_ratio/high_mean": 0.0019692518435476813, "clip_ratio/low_mean": 0.0015142955780902412, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0034835473634302616, "epoch": 4.401283172936716, "grad_norm": 0.11120729148387909, "learning_rate": 1e-06, "loss": -0.053, "step": 854 }, { "clip_ratio/high_max": 0.004113192786462605, "clip_ratio/high_mean": 0.0017632048693485558, "clip_ratio/low_mean": 0.0017256184910365846, "clip_ratio/low_min": 4.935509423376061e-05, "clip_ratio/region_mean": 0.0034888233203673735, "epoch": 4.405949256342957, "grad_norm": 0.10507329553365707, "learning_rate": 1e-06, "loss": -0.0037, "step": 855 }, { "clip_ratio/high_max": 0.005143274262081832, "clip_ratio/high_mean": 0.0020832320569752483, "clip_ratio/low_mean": 0.0019940908168791793, "clip_ratio/low_min": 3.105590076302178e-05, "clip_ratio/region_mean": 0.004077322853845544, "epoch": 4.410615339749198, "grad_norm": 0.11597028374671936, "learning_rate": 1e-06, "loss": -0.0509, "step": 856 }, { "clip_ratio/high_max": 0.0049623505474301055, "clip_ratio/high_mean": 0.002057961784885265, "clip_ratio/low_mean": 0.0015668369633203838, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0036247987300157547, "epoch": 4.415281423155439, "grad_norm": 0.10977782309055328, "learning_rate": 1e-06, "loss": -0.0762, "step": 857 }, { "clip_ratio/high_max": 0.004179463256150484, "clip_ratio/high_mean": 0.0016374404294765554, "clip_ratio/low_mean": 0.001921766066516284, "clip_ratio/low_min": 6.827616016380489e-05, "clip_ratio/region_mean": 0.003559206481440924, "epoch": 4.41994750656168, "grad_norm": 0.09874669462442398, "learning_rate": 1e-06, "loss": -0.0097, "step": 858 }, { "clip_ratio/high_max": 0.005240919272182509, "clip_ratio/high_mean": 0.002115168419550173, "clip_ratio/low_mean": 0.001900317489344161, "clip_ratio/low_min": 6.975446740398183e-05, "clip_ratio/region_mean": 0.00401548579975497, "epoch": 4.424613589967921, "grad_norm": 0.12094337493181229, "learning_rate": 1e-06, "loss": -0.0245, "step": 859 }, { "clip_ratio/high_max": 0.004078470228705555, "clip_ratio/high_mean": 0.0019232929043937474, "clip_ratio/low_mean": 0.001969290155102499, "clip_ratio/low_min": 0.00014264184937928803, "clip_ratio/region_mean": 0.0038925828994251788, "epoch": 4.429279673374162, "grad_norm": 0.11632602661848068, "learning_rate": 1e-06, "loss": -0.0252, "step": 860 }, { "clip_ratio/high_max": 0.004177798458840698, "clip_ratio/high_mean": 0.0016859750394360162, "clip_ratio/low_mean": 0.0020808482622669544, "clip_ratio/low_min": 6.092290277592838e-05, "clip_ratio/region_mean": 0.0037668233271688223, "epoch": 4.433945756780402, "grad_norm": 0.10976847261190414, "learning_rate": 1e-06, "loss": -0.0096, "step": 861 }, { "clip_ratio/high_max": 0.004763852419273462, "clip_ratio/high_mean": 0.0018648332043085247, "clip_ratio/low_mean": 0.0018139741932827746, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.003678807450341992, "epoch": 4.438611840186644, "grad_norm": 0.10736335068941116, "learning_rate": 1e-06, "loss": -0.0211, "step": 862 }, { "clip_ratio/high_max": 0.005099577596411109, "clip_ratio/high_mean": 0.0018054244974337053, "clip_ratio/low_mean": 0.002453087792673614, "clip_ratio/low_min": 5.2112285629846156e-05, "clip_ratio/region_mean": 0.004258512228261679, "epoch": 4.443277923592884, "grad_norm": 0.12012824416160583, "learning_rate": 1e-06, "loss": 0.0553, "step": 863 }, { "clip_ratio/high_max": 0.0041862075158860534, "clip_ratio/high_mean": 0.001806170283089159, "clip_ratio/low_mean": 0.0021111234709678683, "clip_ratio/low_min": 0.00012395874364301562, "clip_ratio/region_mean": 0.003917293754057027, "epoch": 4.447944006999125, "grad_norm": 0.11286681890487671, "learning_rate": 1e-06, "loss": 0.0082, "step": 864 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.059849330357142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 4087.0, "completions/mean_length": 760.3589477539062, "completions/mean_terminated_length": 548.0143432617188, "completions/min_length": 85.0, "completions/min_terminated_length": 85.0, "epoch": 4.452610090405366, "grad_norm": 0.14467744529247284, "learning_rate": 1e-06, "loss": -0.0089, "num_tokens": 258211005.0, "reward": 0.6513671875, "reward_std": 0.16271862387657166, "rewards/simpleverify_reward/mean": 0.6513671875, "rewards/simpleverify_reward/std": 0.47655409574508667, "step": 865 }, { "clip_ratio/high_max": 0.002316336896910798, "clip_ratio/high_mean": 0.0009091276860999642, "clip_ratio/low_mean": 0.0005119927582200035, "clip_ratio/low_min": 1.4341440873977263e-05, "clip_ratio/region_mean": 0.001421120443410473, "epoch": 4.457276173811607, "grad_norm": 0.1792125105857849, "learning_rate": 1e-06, "loss": 0.0152, "step": 866 }, { "clip_ratio/high_max": 0.0019441972399363294, "clip_ratio/high_mean": 0.0008390541006519925, "clip_ratio/low_mean": 0.0005526151089725317, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013916692259954289, "epoch": 4.4619422572178475, "grad_norm": 0.14807169139385223, "learning_rate": 1e-06, "loss": 0.004, "step": 867 }, { "clip_ratio/high_max": 0.002378000226599397, "clip_ratio/high_mean": 0.0008174100212272606, "clip_ratio/low_mean": 0.0005076649413240375, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013250749288999941, "epoch": 4.466608340624089, "grad_norm": 0.14011111855506897, "learning_rate": 1e-06, "loss": -0.0136, "step": 868 }, { "clip_ratio/high_max": 0.0019595753110479563, "clip_ratio/high_mean": 0.0008377066769753583, "clip_ratio/low_mean": 0.0006263971245061839, "clip_ratio/low_min": 2.6795283702085726e-05, "clip_ratio/region_mean": 0.001464103796024574, "epoch": 4.471274424030329, "grad_norm": 0.16807053983211517, "learning_rate": 1e-06, "loss": -0.0146, "step": 869 }, { "clip_ratio/high_max": 0.0023144043152569793, "clip_ratio/high_mean": 0.0009316980140283704, "clip_ratio/low_mean": 0.00048414221146231284, "clip_ratio/low_min": 2.0038474758621305e-05, "clip_ratio/region_mean": 0.001415840244590072, "epoch": 4.475940507436571, "grad_norm": 0.1402735710144043, "learning_rate": 1e-06, "loss": -0.043, "step": 870 }, { "clip_ratio/high_max": 0.0023137359239626676, "clip_ratio/high_mean": 0.0008994987783808028, "clip_ratio/low_mean": 0.000516015511493606, "clip_ratio/low_min": 2.61451586993644e-05, "clip_ratio/region_mean": 0.0014155143035168294, "epoch": 4.480606590842811, "grad_norm": 0.1389661282300949, "learning_rate": 1e-06, "loss": -0.0388, "step": 871 }, { "clip_ratio/high_max": 0.0020878209252259694, "clip_ratio/high_mean": 0.0008631467462691944, "clip_ratio/low_mean": 0.000618815704001463, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014819624411757104, "epoch": 4.485272674249052, "grad_norm": 0.1456226110458374, "learning_rate": 1e-06, "loss": -0.0067, "step": 872 }, { "clip_ratio/high_max": 0.0027428164539742284, "clip_ratio/high_mean": 0.0010457027237862349, "clip_ratio/low_mean": 0.0006245064837457903, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0016702092325431295, "epoch": 4.489938757655293, "grad_norm": 0.14806191623210907, "learning_rate": 1e-06, "loss": -0.0566, "step": 873 }, { "clip_ratio/high_max": 0.0025542981675243936, "clip_ratio/high_mean": 0.0009610265551600605, "clip_ratio/low_mean": 0.0008237967913373723, "clip_ratio/low_min": 3.612716682255268e-05, "clip_ratio/region_mean": 0.001784823376510758, "epoch": 4.494604841061534, "grad_norm": 0.13969510793685913, "learning_rate": 1e-06, "loss": -0.0007, "step": 874 }, { "clip_ratio/high_max": 0.002498247482435545, "clip_ratio/high_mean": 0.0008596607276558643, "clip_ratio/low_mean": 0.0007649064564247965, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0016245672304648906, "epoch": 4.499270924467774, "grad_norm": 0.14741335809230804, "learning_rate": 1e-06, "loss": -0.008, "step": 875 }, { "clip_ratio/high_max": 0.002230259844509419, "clip_ratio/high_mean": 0.0009351415774290217, "clip_ratio/low_mean": 0.0006591346937057097, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015942762547638267, "epoch": 4.503937007874016, "grad_norm": 0.14288948476314545, "learning_rate": 1e-06, "loss": -0.0744, "step": 876 }, { "clip_ratio/high_max": 0.002228765268228017, "clip_ratio/high_mean": 0.0009118919861066388, "clip_ratio/low_mean": 0.0008585191244492307, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001770411130564753, "epoch": 4.508603091280257, "grad_norm": 0.13459570705890656, "learning_rate": 1e-06, "loss": -0.0504, "step": 877 }, { "clip_ratio/high_max": 0.0025594074395485222, "clip_ratio/high_mean": 0.0010769996661110781, "clip_ratio/low_mean": 0.0007173201038312982, "clip_ratio/low_min": 4.5391858293442056e-05, "clip_ratio/region_mean": 0.0017943197817658074, "epoch": 4.5132691746864975, "grad_norm": 0.14680016040802002, "learning_rate": 1e-06, "loss": -0.0328, "step": 878 }, { "clip_ratio/high_max": 0.002306700167537201, "clip_ratio/high_mean": 0.0008955223729572026, "clip_ratio/low_mean": 0.0007326979175559245, "clip_ratio/low_min": 5.7451481552561745e-05, "clip_ratio/region_mean": 0.0016282202959700953, "epoch": 4.517935258092739, "grad_norm": 0.13933761417865753, "learning_rate": 1e-06, "loss": -0.0047, "step": 879 }, { "clip_ratio/high_max": 0.0028688012826023623, "clip_ratio/high_mean": 0.0011098889663117006, "clip_ratio/low_mean": 0.000850883605380659, "clip_ratio/low_min": 3.868592921207892e-05, "clip_ratio/region_mean": 0.001960772558959434, "epoch": 4.522601341498979, "grad_norm": 0.15788087248802185, "learning_rate": 1e-06, "loss": -0.0223, "step": 880 }, { "clip_ratio/high_max": 0.00535259504977148, "clip_ratio/high_mean": 0.0019520611167536117, "clip_ratio/low_mean": 0.001639450252696406, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0035915113548981026, "epoch": 4.5272674249052205, "grad_norm": 0.09956730902194977, "learning_rate": 1e-06, "loss": -0.0097, "step": 881 }, { "clip_ratio/high_max": 0.004989013526937924, "clip_ratio/high_mean": 0.0018150042742490768, "clip_ratio/low_mean": 0.0019589620096667204, "clip_ratio/low_min": 5.9637404774548486e-05, "clip_ratio/region_mean": 0.0037739662366220728, "epoch": 4.531933508311461, "grad_norm": 0.11524045467376709, "learning_rate": 1e-06, "loss": 0.0141, "step": 882 }, { "clip_ratio/high_max": 0.003978137872763909, "clip_ratio/high_mean": 0.0017849262621894013, "clip_ratio/low_mean": 0.0019400392247916898, "clip_ratio/low_min": 0.00016200301979552023, "clip_ratio/region_mean": 0.00372496536874678, "epoch": 4.536599591717702, "grad_norm": 0.10538440197706223, "learning_rate": 1e-06, "loss": 0.0031, "step": 883 }, { "clip_ratio/high_max": 0.0054128130213939585, "clip_ratio/high_mean": 0.0017640075366216479, "clip_ratio/low_mean": 0.001633014808248845, "clip_ratio/low_min": 4.260395508026704e-05, "clip_ratio/region_mean": 0.0033970223848882597, "epoch": 4.541265675123943, "grad_norm": 0.1047298014163971, "learning_rate": 1e-06, "loss": -0.0144, "step": 884 }, { "clip_ratio/high_max": 0.004757896866067313, "clip_ratio/high_mean": 0.0019963004961027764, "clip_ratio/low_mean": 0.001823432648961898, "clip_ratio/low_min": 9.751690413395409e-05, "clip_ratio/region_mean": 0.003819733072305098, "epoch": 4.545931758530184, "grad_norm": 0.11066095530986786, "learning_rate": 1e-06, "loss": -0.0155, "step": 885 }, { "clip_ratio/high_max": 0.004701394456787966, "clip_ratio/high_mean": 0.001973842332517961, "clip_ratio/low_mean": 0.0014590519895136822, "clip_ratio/low_min": 8.305855590151623e-05, "clip_ratio/region_mean": 0.003432894285651855, "epoch": 4.550597841936424, "grad_norm": 0.10153724998235703, "learning_rate": 1e-06, "loss": -0.0438, "step": 886 }, { "clip_ratio/high_max": 0.00507149487384595, "clip_ratio/high_mean": 0.0019511599730321905, "clip_ratio/low_mean": 0.0015741557072033174, "clip_ratio/low_min": 3.0361914468812756e-05, "clip_ratio/region_mean": 0.0035253157111583278, "epoch": 4.555263925342666, "grad_norm": 0.10322874039411545, "learning_rate": 1e-06, "loss": -0.0397, "step": 887 }, { "clip_ratio/high_max": 0.004596612459863536, "clip_ratio/high_mean": 0.0017719544666761067, "clip_ratio/low_mean": 0.0016361703092115931, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0034081247285939753, "epoch": 4.559930008748906, "grad_norm": 0.10048892349004745, "learning_rate": 1e-06, "loss": -0.0076, "step": 888 }, { "clip_ratio/high_max": 0.005299713113345206, "clip_ratio/high_mean": 0.002177820435463218, "clip_ratio/low_mean": 0.00166122873997665, "clip_ratio/low_min": 3.2075957278721035e-05, "clip_ratio/region_mean": 0.0038390491536119953, "epoch": 4.564596092155147, "grad_norm": 0.11002933233976364, "learning_rate": 1e-06, "loss": -0.0576, "step": 889 }, { "clip_ratio/high_max": 0.005094123160233721, "clip_ratio/high_mean": 0.0020380398200359195, "clip_ratio/low_mean": 0.0019353531279193703, "clip_ratio/low_min": 8.312551653943956e-05, "clip_ratio/region_mean": 0.003973393046180718, "epoch": 4.569262175561388, "grad_norm": 0.11039287596940994, "learning_rate": 1e-06, "loss": -0.0016, "step": 890 }, { "clip_ratio/high_max": 0.004597242412273772, "clip_ratio/high_mean": 0.0017055854477803223, "clip_ratio/low_mean": 0.0019164369950885884, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.003622022377385292, "epoch": 4.573928258967629, "grad_norm": 0.11102067679166794, "learning_rate": 1e-06, "loss": -0.0091, "step": 891 }, { "clip_ratio/high_max": 0.005018611598643474, "clip_ratio/high_mean": 0.0019941198697779328, "clip_ratio/low_mean": 0.0016786272935860325, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0036727471742779016, "epoch": 4.57859434237387, "grad_norm": 0.12340915203094482, "learning_rate": 1e-06, "loss": -0.0753, "step": 892 }, { "clip_ratio/high_max": 0.004183171215117909, "clip_ratio/high_mean": 0.0018596687259559985, "clip_ratio/low_mean": 0.0019171984677086584, "clip_ratio/low_min": 6.156119343359023e-05, "clip_ratio/region_mean": 0.003776867190026678, "epoch": 4.583260425780111, "grad_norm": 0.10531151294708252, "learning_rate": 1e-06, "loss": -0.0512, "step": 893 }, { "clip_ratio/high_max": 0.005291488443617709, "clip_ratio/high_mean": 0.0020982137757528108, "clip_ratio/low_mean": 0.0016920844827836845, "clip_ratio/low_min": 0.00013617557851830497, "clip_ratio/region_mean": 0.003790298127569258, "epoch": 4.587926509186351, "grad_norm": 0.11162614077329636, "learning_rate": 1e-06, "loss": -0.0336, "step": 894 }, { "clip_ratio/high_max": 0.004409324668813497, "clip_ratio/high_mean": 0.0018550814274931327, "clip_ratio/low_mean": 0.0019025323053938337, "clip_ratio/low_min": 0.00019946683823945932, "clip_ratio/region_mean": 0.0037576137256110087, "epoch": 4.592592592592593, "grad_norm": 0.10397019237279892, "learning_rate": 1e-06, "loss": -0.0056, "step": 895 }, { "clip_ratio/high_max": 0.005536343996936921, "clip_ratio/high_mean": 0.0021662823201040737, "clip_ratio/low_mean": 0.0020387462864164263, "clip_ratio/low_min": 0.000132779701743857, "clip_ratio/region_mean": 0.004205028613796458, "epoch": 4.597258675998834, "grad_norm": 0.12072121351957321, "learning_rate": 1e-06, "loss": -0.0234, "step": 896 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0640345982142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 4092.0, "completions/mean_length": 779.6520385742188, "completions/mean_terminated_length": 552.7621459960938, "completions/min_length": 104.0, "completions/min_terminated_length": 104.0, "epoch": 4.601924759405074, "grad_norm": 0.1514691263437271, "learning_rate": 1e-06, "loss": -0.0414, "num_tokens": 267081832.0, "reward": 0.6273019313812256, "reward_std": 0.17339251935482025, "rewards/simpleverify_reward/mean": 0.6273018717765808, "rewards/simpleverify_reward/std": 0.4835395812988281, "step": 897 }, { "clip_ratio/high_max": 0.0019966550171375275, "clip_ratio/high_mean": 0.0008502315777150216, "clip_ratio/low_mean": 0.0005893587976970593, "clip_ratio/low_min": 1.0537852176639717e-05, "clip_ratio/region_mean": 0.001439590396330459, "epoch": 4.606590842811316, "grad_norm": 0.14296402037143707, "learning_rate": 1e-06, "loss": -0.0248, "step": 898 }, { "clip_ratio/high_max": 0.002014832331042271, "clip_ratio/high_mean": 0.0008722093480173498, "clip_ratio/low_mean": 0.0005629072593364981, "clip_ratio/low_min": 2.0458264771150425e-05, "clip_ratio/region_mean": 0.001435116610082332, "epoch": 4.611256926217556, "grad_norm": 0.14364726841449738, "learning_rate": 1e-06, "loss": -0.0404, "step": 899 }, { "clip_ratio/high_max": 0.001931311859152629, "clip_ratio/high_mean": 0.0007064443070703419, "clip_ratio/low_mean": 0.0005196976344450377, "clip_ratio/low_min": 1.7857142665889114e-05, "clip_ratio/region_mean": 0.001226141952429316, "epoch": 4.615923009623797, "grad_norm": 0.15297484397888184, "learning_rate": 1e-06, "loss": 0.0051, "step": 900 }, { "clip_ratio/high_max": 0.0021477193004102446, "clip_ratio/high_mean": 0.0008941083542595152, "clip_ratio/low_mean": 0.0006637904762101243, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001557898816827219, "epoch": 4.620589093030038, "grad_norm": 0.16430549323558807, "learning_rate": 1e-06, "loss": -0.0043, "step": 901 }, { "clip_ratio/high_max": 0.0021034933233750053, "clip_ratio/high_mean": 0.0008832715120661305, "clip_ratio/low_mean": 0.0005339268254829221, "clip_ratio/low_min": 1.535249248263426e-05, "clip_ratio/region_mean": 0.001417198305716738, "epoch": 4.625255176436279, "grad_norm": 0.12906159460544586, "learning_rate": 1e-06, "loss": -0.0428, "step": 902 }, { "clip_ratio/high_max": 0.0017007193819154054, "clip_ratio/high_mean": 0.000825791841634782, "clip_ratio/low_mean": 0.0006772770047973609, "clip_ratio/low_min": 2.825090632541105e-05, "clip_ratio/region_mean": 0.0015030688191473018, "epoch": 4.6299212598425195, "grad_norm": 0.13809776306152344, "learning_rate": 1e-06, "loss": -0.0136, "step": 903 }, { "clip_ratio/high_max": 0.0018239171695313416, "clip_ratio/high_mean": 0.0008989349298644811, "clip_ratio/low_mean": 0.0007857135205995291, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0016846484140842222, "epoch": 4.634587343248761, "grad_norm": 0.1569642871618271, "learning_rate": 1e-06, "loss": -0.0476, "step": 904 }, { "clip_ratio/high_max": 0.0022080958005972207, "clip_ratio/high_mean": 0.0009907887379085878, "clip_ratio/low_mean": 0.0007729441331321141, "clip_ratio/low_min": 2.535790736146737e-05, "clip_ratio/region_mean": 0.0017637328746786807, "epoch": 4.639253426655001, "grad_norm": 0.13189558684825897, "learning_rate": 1e-06, "loss": -0.0164, "step": 905 }, { "clip_ratio/high_max": 0.002198292910179589, "clip_ratio/high_mean": 0.0009344591671833768, "clip_ratio/low_mean": 0.0008431569713138742, "clip_ratio/low_min": 1.546264138596598e-05, "clip_ratio/region_mean": 0.00177761611121241, "epoch": 4.6439195100612425, "grad_norm": 1.2006735801696777, "learning_rate": 1e-06, "loss": -0.0119, "step": 906 }, { "clip_ratio/high_max": 0.0023442394885933027, "clip_ratio/high_mean": 0.0008907200317480601, "clip_ratio/low_mean": 0.0008983572843135335, "clip_ratio/low_min": 1.826417246775236e-05, "clip_ratio/region_mean": 0.001789077268767869, "epoch": 4.648585593467483, "grad_norm": 0.17841781675815582, "learning_rate": 1e-06, "loss": -0.0247, "step": 907 }, { "clip_ratio/high_max": 0.002398787924903445, "clip_ratio/high_mean": 0.0009394316330144648, "clip_ratio/low_mean": 0.0009981245384551585, "clip_ratio/low_min": 8.655553392600268e-05, "clip_ratio/region_mean": 0.00193755621148739, "epoch": 4.653251676873724, "grad_norm": 0.13568075001239777, "learning_rate": 1e-06, "loss": -0.0113, "step": 908 }, { "clip_ratio/high_max": 0.0024681854047230445, "clip_ratio/high_mean": 0.0011434089319664054, "clip_ratio/low_mean": 0.0009468560929235537, "clip_ratio/low_min": 4.797081237484235e-05, "clip_ratio/region_mean": 0.002090265064907726, "epoch": 4.657917760279965, "grad_norm": 0.15703392028808594, "learning_rate": 1e-06, "loss": -0.06, "step": 909 }, { "clip_ratio/high_max": 0.002165435485949274, "clip_ratio/high_mean": 0.0008746803232497768, "clip_ratio/low_mean": 0.0009741417288751109, "clip_ratio/low_min": 5.2604104894271586e-05, "clip_ratio/region_mean": 0.001848822066676803, "epoch": 4.662583843686206, "grad_norm": 0.14709343016147614, "learning_rate": 1e-06, "loss": 0.0095, "step": 910 }, { "clip_ratio/high_max": 0.002896125013648998, "clip_ratio/high_mean": 0.001228104272740893, "clip_ratio/low_mean": 0.0009200112190228538, "clip_ratio/low_min": 4.656675082514994e-05, "clip_ratio/region_mean": 0.00214811552723404, "epoch": 4.667249927092447, "grad_norm": 0.1403370052576065, "learning_rate": 1e-06, "loss": -0.0347, "step": 911 }, { "clip_ratio/high_max": 0.0025390020018676296, "clip_ratio/high_mean": 0.0012303942494327202, "clip_ratio/low_mean": 0.0011628588417806895, "clip_ratio/low_min": 5.816253542434424e-05, "clip_ratio/region_mean": 0.0023932530712045263, "epoch": 4.671916010498688, "grad_norm": 0.22160589694976807, "learning_rate": 1e-06, "loss": -0.0357, "step": 912 }, { "clip_ratio/high_max": 0.004892050696071237, "clip_ratio/high_mean": 0.001986209419555962, "clip_ratio/low_mean": 0.0016969894204521552, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0036831988181802444, "epoch": 4.676582093904928, "grad_norm": 0.104917012155056, "learning_rate": 1e-06, "loss": -0.0422, "step": 913 }, { "clip_ratio/high_max": 0.004811933271412272, "clip_ratio/high_mean": 0.001927438690472627, "clip_ratio/low_mean": 0.002063471223664237, "clip_ratio/low_min": 0.00014265083882492036, "clip_ratio/region_mean": 0.003990909899584949, "epoch": 4.681248177311169, "grad_norm": 0.10945775359869003, "learning_rate": 1e-06, "loss": -0.0256, "step": 914 }, { "clip_ratio/high_max": 0.0042466986560611986, "clip_ratio/high_mean": 0.0018081294510920998, "clip_ratio/low_mean": 0.0019243499027652433, "clip_ratio/low_min": 0.0002875029167626053, "clip_ratio/region_mean": 0.003732479381142184, "epoch": 4.685914260717411, "grad_norm": 0.11086112260818481, "learning_rate": 1e-06, "loss": -0.0412, "step": 915 }, { "clip_ratio/high_max": 0.0041995878927991726, "clip_ratio/high_mean": 0.0017002133972709998, "clip_ratio/low_mean": 0.002053764088486787, "clip_ratio/low_min": 3.422782174311578e-05, "clip_ratio/region_mean": 0.003753977500309702, "epoch": 4.690580344123651, "grad_norm": 0.10955814272165298, "learning_rate": 1e-06, "loss": 0.0042, "step": 916 }, { "clip_ratio/high_max": 0.005060057876107749, "clip_ratio/high_mean": 0.0021903947490500286, "clip_ratio/low_mean": 0.0021353217744035646, "clip_ratio/low_min": 4.075644028489478e-05, "clip_ratio/region_mean": 0.004325716552557424, "epoch": 4.695246427529892, "grad_norm": 0.13480030000209808, "learning_rate": 1e-06, "loss": -0.0054, "step": 917 }, { "clip_ratio/high_max": 0.004877847895841114, "clip_ratio/high_mean": 0.002042485401034355, "clip_ratio/low_mean": 0.0016304570890497416, "clip_ratio/low_min": 3.070498496526852e-05, "clip_ratio/region_mean": 0.0036729424609802663, "epoch": 4.699912510936133, "grad_norm": 0.10425883531570435, "learning_rate": 1e-06, "loss": -0.0435, "step": 918 }, { "clip_ratio/high_max": 0.003946854681998957, "clip_ratio/high_mean": 0.0018309454935661051, "clip_ratio/low_mean": 0.0018375474792264868, "clip_ratio/low_min": 0.0002036843252426479, "clip_ratio/region_mean": 0.0036684929218608886, "epoch": 4.704578594342374, "grad_norm": 0.1084047481417656, "learning_rate": 1e-06, "loss": -0.0144, "step": 919 }, { "clip_ratio/high_max": 0.004446893581189215, "clip_ratio/high_mean": 0.0019434659261605702, "clip_ratio/low_mean": 0.002068542980850907, "clip_ratio/low_min": 4.321272353990935e-05, "clip_ratio/region_mean": 0.004012008881545626, "epoch": 4.7092446777486145, "grad_norm": 0.1510782539844513, "learning_rate": 1e-06, "loss": -0.0485, "step": 920 }, { "clip_ratio/high_max": 0.004283720336388797, "clip_ratio/high_mean": 0.0018741002641036175, "clip_ratio/low_mean": 0.0018227697546535637, "clip_ratio/low_min": 7.896795341366669e-05, "clip_ratio/region_mean": 0.0036968700151192024, "epoch": 4.713910761154856, "grad_norm": 0.10664052516222, "learning_rate": 1e-06, "loss": -0.0171, "step": 921 }, { "clip_ratio/high_max": 0.004387929628137499, "clip_ratio/high_mean": 0.001822286933020223, "clip_ratio/low_mean": 0.002020908665144816, "clip_ratio/low_min": 0.00010714295603975188, "clip_ratio/region_mean": 0.0038431956636486575, "epoch": 4.718576844561096, "grad_norm": 0.1320187747478485, "learning_rate": 1e-06, "loss": -0.0135, "step": 922 }, { "clip_ratio/high_max": 0.004836086663999595, "clip_ratio/high_mean": 0.0018862230645027012, "clip_ratio/low_mean": 0.0020955869695171714, "clip_ratio/low_min": 0.00019287233226350509, "clip_ratio/region_mean": 0.003981809961260296, "epoch": 4.723242927967338, "grad_norm": 0.1082654595375061, "learning_rate": 1e-06, "loss": -0.0256, "step": 923 }, { "clip_ratio/high_max": 0.004527326280367561, "clip_ratio/high_mean": 0.0018808628992701415, "clip_ratio/low_mean": 0.0021697771371691488, "clip_ratio/low_min": 0.0001757780737534631, "clip_ratio/region_mean": 0.0040506400691810995, "epoch": 4.727909011373578, "grad_norm": 0.10881368815898895, "learning_rate": 1e-06, "loss": -0.0121, "step": 924 }, { "clip_ratio/high_max": 0.00433174853969831, "clip_ratio/high_mean": 0.0021152673398319166, "clip_ratio/low_mean": 0.001989982614759356, "clip_ratio/low_min": 7.648932842130307e-05, "clip_ratio/region_mean": 0.00410524990002159, "epoch": 4.732575094779819, "grad_norm": 0.115203358232975, "learning_rate": 1e-06, "loss": -0.0609, "step": 925 }, { "clip_ratio/high_max": 0.004231540784530807, "clip_ratio/high_mean": 0.0017437462593079545, "clip_ratio/low_mean": 0.002007946306548547, "clip_ratio/low_min": 0.0001181820043711923, "clip_ratio/region_mean": 0.0037516925513045862, "epoch": 4.73724117818606, "grad_norm": 0.11197567731142044, "learning_rate": 1e-06, "loss": 0.0085, "step": 926 }, { "clip_ratio/high_max": 0.005584392165474128, "clip_ratio/high_mean": 0.0022223177875275724, "clip_ratio/low_mean": 0.0017691505308903288, "clip_ratio/low_min": 6.256367851165123e-05, "clip_ratio/region_mean": 0.003991468358435668, "epoch": 4.741907261592301, "grad_norm": 0.12189286947250366, "learning_rate": 1e-06, "loss": -0.0355, "step": 927 }, { "clip_ratio/high_max": 0.0050614687643246725, "clip_ratio/high_mean": 0.0023151986715674866, "clip_ratio/low_mean": 0.0022876011753396597, "clip_ratio/low_min": 0.00024112741084536538, "clip_ratio/region_mean": 0.004602799934218638, "epoch": 4.746573344998541, "grad_norm": 0.1318483203649521, "learning_rate": 1e-06, "loss": -0.0371, "step": 928 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0656389508928571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3892.0, "completions/mean_length": 783.4395141601562, "completions/mean_terminated_length": 550.7317504882812, "completions/min_length": 5.0, "completions/min_terminated_length": 5.0, "epoch": 4.751239428404783, "grad_norm": 0.159715935587883, "learning_rate": 1e-06, "loss": -0.0382, "num_tokens": 275896892.0, "reward": 0.638253390789032, "reward_std": 0.15481005609035492, "rewards/simpleverify_reward/mean": 0.6382533311843872, "rewards/simpleverify_reward/std": 0.4805227816104889, "step": 929 }, { "clip_ratio/high_max": 0.0020554618749883957, "clip_ratio/high_mean": 0.0008090114497463219, "clip_ratio/low_mean": 0.0005496207622854854, "clip_ratio/low_min": 2.48065098276129e-05, "clip_ratio/region_mean": 0.0013586322456831113, "epoch": 4.755905511811024, "grad_norm": 0.1484919786453247, "learning_rate": 1e-06, "loss": -0.0431, "step": 930 }, { "clip_ratio/high_max": 0.0018220637721242383, "clip_ratio/high_mean": 0.0006127040596766165, "clip_ratio/low_mean": 0.00044348830215312773, "clip_ratio/low_min": 1.35398613565485e-05, "clip_ratio/region_mean": 0.0010561923627392389, "epoch": 4.7605715952172645, "grad_norm": 0.14512133598327637, "learning_rate": 1e-06, "loss": 0.0141, "step": 931 }, { "clip_ratio/high_max": 0.0020306450605858117, "clip_ratio/high_mean": 0.0008300516037706984, "clip_ratio/low_mean": 0.0006354021197694237, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014654537335445639, "epoch": 4.765237678623506, "grad_norm": 0.1508951038122177, "learning_rate": 1e-06, "loss": -0.0547, "step": 932 }, { "clip_ratio/high_max": 0.002150512529624393, "clip_ratio/high_mean": 0.000912185809283983, "clip_ratio/low_mean": 0.0005712286874768324, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014834145003987942, "epoch": 4.769903762029746, "grad_norm": 0.13094373047351837, "learning_rate": 1e-06, "loss": -0.0429, "step": 933 }, { "clip_ratio/high_max": 0.0020429766009328887, "clip_ratio/high_mean": 0.0007692655763094081, "clip_ratio/low_mean": 0.0006541620823554695, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001423427642293973, "epoch": 4.7745698454359875, "grad_norm": 0.13760513067245483, "learning_rate": 1e-06, "loss": -0.032, "step": 934 }, { "clip_ratio/high_max": 0.0021114697156008333, "clip_ratio/high_mean": 0.0007809801372786751, "clip_ratio/low_mean": 0.00069660631561419, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014775864328839816, "epoch": 4.779235928842228, "grad_norm": 0.15440140664577484, "learning_rate": 1e-06, "loss": 0.0005, "step": 935 }, { "clip_ratio/high_max": 0.002296822720381897, "clip_ratio/high_mean": 0.0008933182980399579, "clip_ratio/low_mean": 0.0006223407153811422, "clip_ratio/low_min": 1.8768769223242998e-05, "clip_ratio/region_mean": 0.0015156590270635206, "epoch": 4.783902012248469, "grad_norm": 0.1917096972465515, "learning_rate": 1e-06, "loss": -0.019, "step": 936 }, { "clip_ratio/high_max": 0.001814560037018964, "clip_ratio/high_mean": 0.0008024966682569357, "clip_ratio/low_mean": 0.0008624395850347355, "clip_ratio/low_min": 4.576175888360012e-05, "clip_ratio/region_mean": 0.0016649362369207665, "epoch": 4.78856809565471, "grad_norm": 0.4116879403591156, "learning_rate": 1e-06, "loss": 0.0076, "step": 937 }, { "clip_ratio/high_max": 0.002389597320870962, "clip_ratio/high_mean": 0.000893748148882878, "clip_ratio/low_mean": 0.0008153154285537312, "clip_ratio/low_min": 7.735586041235365e-05, "clip_ratio/region_mean": 0.0017090635774366092, "epoch": 4.793234179060951, "grad_norm": 0.16660092771053314, "learning_rate": 1e-06, "loss": 0.0136, "step": 938 }, { "clip_ratio/high_max": 0.002578572675702162, "clip_ratio/high_mean": 0.0010321015943191014, "clip_ratio/low_mean": 0.0007902187990111997, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001822320387873333, "epoch": 4.797900262467191, "grad_norm": 0.13554871082305908, "learning_rate": 1e-06, "loss": -0.0276, "step": 939 }, { "clip_ratio/high_max": 0.0020533044953481294, "clip_ratio/high_mean": 0.0007841348906367784, "clip_ratio/low_mean": 0.0006604758827961632, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014446107634285, "epoch": 4.802566345873433, "grad_norm": 0.13301295042037964, "learning_rate": 1e-06, "loss": -0.0157, "step": 940 }, { "clip_ratio/high_max": 0.002186465288104955, "clip_ratio/high_mean": 0.000920623836464074, "clip_ratio/low_mean": 0.0007418143559334567, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001662438200582983, "epoch": 4.807232429279673, "grad_norm": 0.12652911245822906, "learning_rate": 1e-06, "loss": -0.0029, "step": 941 }, { "clip_ratio/high_max": 0.002254081457067514, "clip_ratio/high_mean": 0.0009264382424589712, "clip_ratio/low_mean": 0.0008024652561289258, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001728903480398003, "epoch": 4.811898512685914, "grad_norm": 0.13703419268131256, "learning_rate": 1e-06, "loss": -0.0249, "step": 942 }, { "clip_ratio/high_max": 0.002292283003043849, "clip_ratio/high_mean": 0.0009604744955140632, "clip_ratio/low_mean": 0.0008194325037038652, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0017799070119508542, "epoch": 4.816564596092155, "grad_norm": 0.14402545988559723, "learning_rate": 1e-06, "loss": -0.0348, "step": 943 }, { "clip_ratio/high_max": 0.0023272093385457993, "clip_ratio/high_mean": 0.0009783410023374017, "clip_ratio/low_mean": 0.0007571067553726607, "clip_ratio/low_min": 1.5006002286099829e-05, "clip_ratio/region_mean": 0.0017354477822664194, "epoch": 4.821230679498396, "grad_norm": 0.14965122938156128, "learning_rate": 1e-06, "loss": 0.0011, "step": 944 }, { "clip_ratio/high_max": 0.0049721550167305395, "clip_ratio/high_mean": 0.0020464786102820653, "clip_ratio/low_mean": 0.0018046193799818866, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.003851098066661507, "epoch": 4.8258967629046365, "grad_norm": 0.10149649530649185, "learning_rate": 1e-06, "loss": -0.0391, "step": 945 }, { "clip_ratio/high_max": 0.005088968478958122, "clip_ratio/high_mean": 0.0019277975079603493, "clip_ratio/low_mean": 0.0016199627498281188, "clip_ratio/low_min": 6.703131657559425e-05, "clip_ratio/region_mean": 0.0035477601923048496, "epoch": 4.830562846310878, "grad_norm": 0.10678434371948242, "learning_rate": 1e-06, "loss": -0.044, "step": 946 }, { "clip_ratio/high_max": 0.00415263109607622, "clip_ratio/high_mean": 0.0015010385832283646, "clip_ratio/low_mean": 0.0016842340974108083, "clip_ratio/low_min": 5.060728653916158e-05, "clip_ratio/region_mean": 0.00318527268973412, "epoch": 4.835228929717118, "grad_norm": 0.10137312859296799, "learning_rate": 1e-06, "loss": 0.0132, "step": 947 }, { "clip_ratio/high_max": 0.004087245135451667, "clip_ratio/high_mean": 0.0017850204712885898, "clip_ratio/low_mean": 0.0016947056246863212, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0034797260814229958, "epoch": 4.83989501312336, "grad_norm": 0.11224978417158127, "learning_rate": 1e-06, "loss": -0.0556, "step": 948 }, { "clip_ratio/high_max": 0.004553834689431824, "clip_ratio/high_mean": 0.001844798811362125, "clip_ratio/low_mean": 0.0015333011797338258, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0033781000674935058, "epoch": 4.844561096529601, "grad_norm": 0.09770405292510986, "learning_rate": 1e-06, "loss": -0.0436, "step": 949 }, { "clip_ratio/high_max": 0.004226598524837755, "clip_ratio/high_mean": 0.0016337919259967748, "clip_ratio/low_mean": 0.001581754527251178, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0032155465087271295, "epoch": 4.849227179935841, "grad_norm": 18.484195709228516, "learning_rate": 1e-06, "loss": -0.0285, "step": 950 }, { "clip_ratio/high_max": 0.004315058395150118, "clip_ratio/high_mean": 0.0017018895014189184, "clip_ratio/low_mean": 0.0019697889147209935, "clip_ratio/low_min": 2.8280543119763024e-05, "clip_ratio/region_mean": 0.0036716784379677847, "epoch": 4.853893263342083, "grad_norm": 0.10661263018846512, "learning_rate": 1e-06, "loss": -0.0005, "step": 951 }, { "clip_ratio/high_max": 0.004197582638880704, "clip_ratio/high_mean": 0.0016967068913800176, "clip_ratio/low_mean": 0.0016142671956913546, "clip_ratio/low_min": 9.007618064060807e-05, "clip_ratio/region_mean": 0.0033109740979853086, "epoch": 4.858559346748323, "grad_norm": 0.11126602441072464, "learning_rate": 1e-06, "loss": -0.0199, "step": 952 }, { "clip_ratio/high_max": 0.004582642883178778, "clip_ratio/high_mean": 0.0018451784017088357, "clip_ratio/low_mean": 0.0023961000188137405, "clip_ratio/low_min": 0.00010348653813707642, "clip_ratio/region_mean": 0.0042412783950567245, "epoch": 4.863225430154564, "grad_norm": 0.1211385428905487, "learning_rate": 1e-06, "loss": 0.0063, "step": 953 }, { "clip_ratio/high_max": 0.0047500235014013015, "clip_ratio/high_mean": 0.0017259489686693996, "clip_ratio/low_mean": 0.002125600480212597, "clip_ratio/low_min": 0.0001804160656320164, "clip_ratio/region_mean": 0.003851549423416145, "epoch": 4.867891513560805, "grad_norm": 0.11494329571723938, "learning_rate": 1e-06, "loss": 0.0126, "step": 954 }, { "clip_ratio/high_max": 0.0045973342639626935, "clip_ratio/high_mean": 0.0018686926341615617, "clip_ratio/low_mean": 0.001895394267194206, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.003764086897717789, "epoch": 4.872557596967046, "grad_norm": 0.10718845576047897, "learning_rate": 1e-06, "loss": -0.0283, "step": 955 }, { "clip_ratio/high_max": 0.004053079588629771, "clip_ratio/high_mean": 0.0014844440484012011, "clip_ratio/low_mean": 0.0017039077829394955, "clip_ratio/low_min": 8.639415318612009e-05, "clip_ratio/region_mean": 0.003188351824064739, "epoch": 4.8772236803732865, "grad_norm": 0.10361669957637787, "learning_rate": 1e-06, "loss": -0.0164, "step": 956 }, { "clip_ratio/high_max": 0.004060480059706606, "clip_ratio/high_mean": 0.0017420621479686815, "clip_ratio/low_mean": 0.0017068008855858352, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.003448863048106432, "epoch": 4.881889763779528, "grad_norm": 0.09397058188915253, "learning_rate": 1e-06, "loss": -0.0035, "step": 957 }, { "clip_ratio/high_max": 0.004708353015303146, "clip_ratio/high_mean": 0.0019292090946692042, "clip_ratio/low_mean": 0.0017499009336461313, "clip_ratio/low_min": 3.354204091010615e-05, "clip_ratio/region_mean": 0.0036791100428672507, "epoch": 4.886555847185768, "grad_norm": 0.837611198425293, "learning_rate": 1e-06, "loss": -0.0253, "step": 958 }, { "clip_ratio/high_max": 0.004463962875888683, "clip_ratio/high_mean": 0.0019200675742467865, "clip_ratio/low_mean": 0.0018050010803563055, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.003725068731000647, "epoch": 4.8912219305920095, "grad_norm": 0.10971732437610626, "learning_rate": 1e-06, "loss": -0.0356, "step": 959 }, { "clip_ratio/high_max": 0.004368073656223714, "clip_ratio/high_mean": 0.0017574460252944846, "clip_ratio/low_mean": 0.001854871938121505, "clip_ratio/low_min": 3.480924715404399e-05, "clip_ratio/region_mean": 0.0036123179525020532, "epoch": 4.89588801399825, "grad_norm": 0.10876951366662979, "learning_rate": 1e-06, "loss": 0.0003, "step": 960 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0673130580357143, "completions/max_length": 4096.0, "completions/max_terminated_length": 4094.0, "completions/mean_length": 787.8792114257812, "completions/mean_terminated_length": 549.12841796875, "completions/min_length": 75.0, "completions/min_terminated_length": 75.0, "epoch": 5.004666083406241, "grad_norm": 0.15265098214149475, "learning_rate": 1e-06, "loss": -0.0605, "num_tokens": 284643888.0, "reward": 0.6535993814468384, "reward_std": 0.15969029068946838, "rewards/simpleverify_reward/mean": 0.6535993218421936, "rewards/simpleverify_reward/std": 0.4758393168449402, "step": 961 }, { "clip_ratio/high_max": 0.0015449587008333765, "clip_ratio/high_mean": 0.0006328750387183391, "clip_ratio/low_mean": 0.00038651525574096013, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001019390307192225, "epoch": 5.009332166812482, "grad_norm": 0.1369791328907013, "learning_rate": 1e-06, "loss": -0.0003, "step": 962 }, { "clip_ratio/high_max": 0.002080726913845865, "clip_ratio/high_mean": 0.0007893530382716563, "clip_ratio/low_mean": 0.00048584852174826665, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012752015827572905, "epoch": 5.013998250218723, "grad_norm": 0.16443632543087006, "learning_rate": 1e-06, "loss": -0.0112, "step": 963 }, { "clip_ratio/high_max": 0.002666403310286114, "clip_ratio/high_mean": 0.00095054303528741, "clip_ratio/low_mean": 0.0005697498290828662, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015202929062070325, "epoch": 5.0186643336249634, "grad_norm": 0.15811331570148468, "learning_rate": 1e-06, "loss": -0.0324, "step": 964 }, { "clip_ratio/high_max": 0.0020964365467079915, "clip_ratio/high_mean": 0.0008281324171548476, "clip_ratio/low_mean": 0.0006010557517583948, "clip_ratio/low_min": 1.5439723938470706e-05, "clip_ratio/region_mean": 0.001429188203474041, "epoch": 5.023330417031205, "grad_norm": 0.15350686013698578, "learning_rate": 1e-06, "loss": -0.0444, "step": 965 }, { "clip_ratio/high_max": 0.0019612005635281093, "clip_ratio/high_mean": 0.0007759465188428294, "clip_ratio/low_mean": 0.0006645556522926199, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014405021938728169, "epoch": 5.027996500437445, "grad_norm": 0.16173428297042847, "learning_rate": 1e-06, "loss": -0.0141, "step": 966 }, { "clip_ratio/high_max": 0.0020538765675155446, "clip_ratio/high_mean": 0.0008916555689211236, "clip_ratio/low_mean": 0.0006211086874827743, "clip_ratio/low_min": 2.3773298380547203e-05, "clip_ratio/region_mean": 0.0015127642800507601, "epoch": 5.0326625838436865, "grad_norm": 0.3055776059627533, "learning_rate": 1e-06, "loss": -0.0427, "step": 967 }, { "clip_ratio/high_max": 0.002021764033997897, "clip_ratio/high_mean": 0.0008895534883777145, "clip_ratio/low_mean": 0.0007763721532683121, "clip_ratio/low_min": 1.604621320439037e-05, "clip_ratio/region_mean": 0.0016659256580169313, "epoch": 5.037328667249927, "grad_norm": 0.15162530541419983, "learning_rate": 1e-06, "loss": -0.0229, "step": 968 }, { "clip_ratio/high_max": 0.0023166202881839126, "clip_ratio/high_mean": 0.0010773405392683344, "clip_ratio/low_mean": 0.0006841995609647711, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0017615400975046214, "epoch": 5.041994750656168, "grad_norm": 0.15083321928977966, "learning_rate": 1e-06, "loss": -0.0519, "step": 969 }, { "clip_ratio/high_max": 0.002525533207517583, "clip_ratio/high_mean": 0.0009593003469490213, "clip_ratio/low_mean": 0.0007786151818436338, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0017379155178787187, "epoch": 5.046660834062409, "grad_norm": 0.14743049442768097, "learning_rate": 1e-06, "loss": 0.0032, "step": 970 }, { "clip_ratio/high_max": 0.0021769272279925644, "clip_ratio/high_mean": 0.0008751045024837367, "clip_ratio/low_mean": 0.000754498791138758, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0016296032808895689, "epoch": 5.05132691746865, "grad_norm": 0.15316613018512726, "learning_rate": 1e-06, "loss": 0.0007, "step": 971 }, { "clip_ratio/high_max": 0.002614977056509815, "clip_ratio/high_mean": 0.0010195441718678921, "clip_ratio/low_mean": 0.0007461757559212856, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0017657199496170506, "epoch": 5.05599300087489, "grad_norm": 0.1445103883743286, "learning_rate": 1e-06, "loss": -0.0281, "step": 972 }, { "clip_ratio/high_max": 0.002598426493932493, "clip_ratio/high_mean": 0.0011274473254161421, "clip_ratio/low_mean": 0.0007086557998263743, "clip_ratio/low_min": 3.5561879485612735e-05, "clip_ratio/region_mean": 0.0018361031798121985, "epoch": 5.060659084281132, "grad_norm": 0.16397222876548767, "learning_rate": 1e-06, "loss": -0.065, "step": 973 }, { "clip_ratio/high_max": 0.0025484296202193946, "clip_ratio/high_mean": 0.0009887233918561833, "clip_ratio/low_mean": 0.0010092802231156384, "clip_ratio/low_min": 1.4702422959089745e-05, "clip_ratio/region_mean": 0.001998003637709189, "epoch": 5.065325167687372, "grad_norm": 0.15653736889362335, "learning_rate": 1e-06, "loss": 0.0044, "step": 974 }, { "clip_ratio/high_max": 0.0026231064257444814, "clip_ratio/high_mean": 0.0012304056035645772, "clip_ratio/low_mean": 0.0009291319292970002, "clip_ratio/low_min": 2.3561542548122816e-05, "clip_ratio/region_mean": 0.002159537500119768, "epoch": 5.069991251093613, "grad_norm": 0.1562759131193161, "learning_rate": 1e-06, "loss": -0.0305, "step": 975 }, { "clip_ratio/high_max": 0.00226770499284612, "clip_ratio/high_mean": 0.0009163102477032226, "clip_ratio/low_mean": 0.0009694444834167371, "clip_ratio/low_min": 3.327051854284946e-05, "clip_ratio/region_mean": 0.0018857547365769278, "epoch": 5.074657334499854, "grad_norm": 0.1452087163925171, "learning_rate": 1e-06, "loss": 0.009, "step": 976 }, { "clip_ratio/high_max": 0.005008928033930715, "clip_ratio/high_mean": 0.0020603489101631567, "clip_ratio/low_mean": 0.0016285567744489526, "clip_ratio/low_min": 4.883575456915423e-05, "clip_ratio/region_mean": 0.003688905722810887, "epoch": 5.079323417906095, "grad_norm": 0.10370133817195892, "learning_rate": 1e-06, "loss": -0.0613, "step": 977 }, { "clip_ratio/high_max": 0.0036495929598459043, "clip_ratio/high_mean": 0.0015194915413303534, "clip_ratio/low_mean": 0.0015842710636206903, "clip_ratio/low_min": 0.00014450841081270482, "clip_ratio/region_mean": 0.003103762515820563, "epoch": 5.083989501312336, "grad_norm": 0.1010274812579155, "learning_rate": 1e-06, "loss": -0.001, "step": 978 }, { "clip_ratio/high_max": 0.0047291311420849524, "clip_ratio/high_mean": 0.0018774631898850203, "clip_ratio/low_mean": 0.0018876308276958298, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0037650940648745745, "epoch": 5.088655584718577, "grad_norm": 0.10905813425779343, "learning_rate": 1e-06, "loss": -0.0121, "step": 979 }, { "clip_ratio/high_max": 0.0047671905340394005, "clip_ratio/high_mean": 0.0019036019148188643, "clip_ratio/low_mean": 0.0018992231380252633, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0038028250564821064, "epoch": 5.093321668124818, "grad_norm": 0.11359624564647675, "learning_rate": 1e-06, "loss": -0.0333, "step": 980 }, { "clip_ratio/high_max": 0.00482658707187511, "clip_ratio/high_mean": 0.0017627755369176157, "clip_ratio/low_mean": 0.0017244030241272412, "clip_ratio/low_min": 7.458234176738188e-05, "clip_ratio/region_mean": 0.0034871784882852808, "epoch": 5.0979877515310585, "grad_norm": 0.11160120368003845, "learning_rate": 1e-06, "loss": -0.0453, "step": 981 }, { "clip_ratio/high_max": 0.004728345738840289, "clip_ratio/high_mean": 0.0018364019997534342, "clip_ratio/low_mean": 0.0017556422608322464, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0035920441732741892, "epoch": 5.1026538349373, "grad_norm": 0.10930287092924118, "learning_rate": 1e-06, "loss": -0.015, "step": 982 }, { "clip_ratio/high_max": 0.004754893736389931, "clip_ratio/high_mean": 0.0018541152639954817, "clip_ratio/low_mean": 0.0016714120574761182, "clip_ratio/low_min": 8.838607027428225e-05, "clip_ratio/region_mean": 0.0035255272887297906, "epoch": 5.10731991834354, "grad_norm": 0.11352820694446564, "learning_rate": 1e-06, "loss": -0.0436, "step": 983 }, { "clip_ratio/high_max": 0.004237971021211706, "clip_ratio/high_mean": 0.0018223492006654851, "clip_ratio/low_mean": 0.0018080762565659825, "clip_ratio/low_min": 0.00011208964497200213, "clip_ratio/region_mean": 0.0036304254026617855, "epoch": 5.111986001749782, "grad_norm": 0.10900255292654037, "learning_rate": 1e-06, "loss": -0.0237, "step": 984 }, { "clip_ratio/high_max": 0.004388932880829088, "clip_ratio/high_mean": 0.0020051638675795402, "clip_ratio/low_mean": 0.0015720104820502456, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0035771743714576587, "epoch": 5.116652085156022, "grad_norm": 0.1130501925945282, "learning_rate": 1e-06, "loss": -0.0528, "step": 985 }, { "clip_ratio/high_max": 0.004771657433593646, "clip_ratio/high_mean": 0.0018092952741426416, "clip_ratio/low_mean": 0.0019510745405568741, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.003760369843803346, "epoch": 5.121318168562263, "grad_norm": 0.10710525512695312, "learning_rate": 1e-06, "loss": 0.0023, "step": 986 }, { "clip_ratio/high_max": 0.004259601439116523, "clip_ratio/high_mean": 0.0016855404355737846, "clip_ratio/low_mean": 0.0017088917084038258, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0033944321985472925, "epoch": 5.125984251968504, "grad_norm": 0.10002481192350388, "learning_rate": 1e-06, "loss": -0.0001, "step": 987 }, { "clip_ratio/high_max": 0.00473655374662485, "clip_ratio/high_mean": 0.0018687648516788613, "clip_ratio/low_mean": 0.001634989010199206, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.003503753847326152, "epoch": 5.130650335374745, "grad_norm": 0.10728838294744492, "learning_rate": 1e-06, "loss": -0.0289, "step": 988 }, { "clip_ratio/high_max": 0.005118130226037465, "clip_ratio/high_mean": 0.002142934983567102, "clip_ratio/low_mean": 0.0016328284436895046, "clip_ratio/low_min": 7.112375897122547e-05, "clip_ratio/region_mean": 0.003775763470912352, "epoch": 5.135316418780985, "grad_norm": 0.11420293897390366, "learning_rate": 1e-06, "loss": -0.066, "step": 989 }, { "clip_ratio/high_max": 0.0044603657006518915, "clip_ratio/high_mean": 0.0018084128423652146, "clip_ratio/low_mean": 0.0020731912227347493, "clip_ratio/low_min": 3.426535113248974e-05, "clip_ratio/region_mean": 0.0038816040760139003, "epoch": 5.139982502187227, "grad_norm": 0.11369369924068451, "learning_rate": 1e-06, "loss": 0.0034, "step": 990 }, { "clip_ratio/high_max": 0.004287930874852464, "clip_ratio/high_mean": 0.0019494573280098848, "clip_ratio/low_mean": 0.002007729548495263, "clip_ratio/low_min": 6.232415398699231e-05, "clip_ratio/region_mean": 0.003957186854677275, "epoch": 5.144648585593467, "grad_norm": 0.11844845861196518, "learning_rate": 1e-06, "loss": -0.0315, "step": 991 }, { "clip_ratio/high_max": 0.003953901003114879, "clip_ratio/high_mean": 0.0016898707726795692, "clip_ratio/low_mean": 0.0020398725173436105, "clip_ratio/low_min": 0.00010927770199486986, "clip_ratio/region_mean": 0.0037297434464562684, "epoch": 5.1493146689997085, "grad_norm": 0.11048706620931625, "learning_rate": 1e-06, "loss": 0.0082, "step": 992 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0729631696428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3814.0, "completions/mean_length": 819.5020141601562, "completions/mean_terminated_length": 561.62255859375, "completions/min_length": 93.0, "completions/min_terminated_length": 93.0, "epoch": 5.153980752405949, "grad_norm": 0.17419984936714172, "learning_rate": 1e-06, "loss": -0.0407, "num_tokens": 293529060.0, "reward": 0.638741672039032, "reward_std": 0.1680195927619934, "rewards/simpleverify_reward/mean": 0.6387416124343872, "rewards/simpleverify_reward/std": 0.4803819954395294, "step": 993 }, { "clip_ratio/high_max": 0.002075881082419073, "clip_ratio/high_mean": 0.0008263601012004074, "clip_ratio/low_mean": 0.0005146906523805228, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001341050759947393, "epoch": 5.15864683581219, "grad_norm": 0.15556688606739044, "learning_rate": 1e-06, "loss": -0.0246, "step": 994 }, { "clip_ratio/high_max": 0.002147895091184182, "clip_ratio/high_mean": 0.0009027577998494962, "clip_ratio/low_mean": 0.0004864289367105812, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013891867456550244, "epoch": 5.163312919218431, "grad_norm": 0.1463095247745514, "learning_rate": 1e-06, "loss": -0.0561, "step": 995 }, { "clip_ratio/high_max": 0.0020475842684390955, "clip_ratio/high_mean": 0.0007753284135105787, "clip_ratio/low_mean": 0.0006058864819351584, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001381214904540684, "epoch": 5.167979002624672, "grad_norm": 0.16062968969345093, "learning_rate": 1e-06, "loss": -0.0059, "step": 996 }, { "clip_ratio/high_max": 0.0023307400806515943, "clip_ratio/high_mean": 0.0008343964818777749, "clip_ratio/low_mean": 0.0005391193506056879, "clip_ratio/low_min": 1.2852148756792303e-05, "clip_ratio/region_mean": 0.0013735158354393207, "epoch": 5.172645086030913, "grad_norm": 0.1553787887096405, "learning_rate": 1e-06, "loss": -0.046, "step": 997 }, { "clip_ratio/high_max": 0.0022699056935380213, "clip_ratio/high_mean": 0.0009314027211075881, "clip_ratio/low_mean": 0.0005514473541552434, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014828500752628315, "epoch": 5.177311169437154, "grad_norm": 0.1535441279411316, "learning_rate": 1e-06, "loss": -0.0533, "step": 998 }, { "clip_ratio/high_max": 0.0021797854351461865, "clip_ratio/high_mean": 0.000865523954416858, "clip_ratio/low_mean": 0.0005951901521257241, "clip_ratio/low_min": 1.8882175936596468e-05, "clip_ratio/region_mean": 0.001460714076529257, "epoch": 5.181977252843395, "grad_norm": 0.1486525535583496, "learning_rate": 1e-06, "loss": -0.0368, "step": 999 }, { "clip_ratio/high_max": 0.001936640423082281, "clip_ratio/high_mean": 0.0007841346305212937, "clip_ratio/low_mean": 0.0006156760136946104, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013998106733197346, "epoch": 5.186643336249635, "grad_norm": 0.12503276765346527, "learning_rate": 1e-06, "loss": -0.0421, "step": 1000 }, { "clip_ratio/high_max": 0.0020422697707545012, "clip_ratio/high_mean": 0.0008638777890155325, "clip_ratio/low_mean": 0.0008741974161239341, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.00173807522514835, "epoch": 5.191309419655877, "grad_norm": 0.16644065082073212, "learning_rate": 1e-06, "loss": 0.0336, "step": 1001 }, { "clip_ratio/high_max": 0.0027344133777660318, "clip_ratio/high_mean": 0.0011106047677458264, "clip_ratio/low_mean": 0.0006984382453083526, "clip_ratio/low_min": 4.552203245111741e-05, "clip_ratio/region_mean": 0.001809042994864285, "epoch": 5.195975503062117, "grad_norm": 0.1430450826883316, "learning_rate": 1e-06, "loss": -0.0494, "step": 1002 }, { "clip_ratio/high_max": 0.002473121763614472, "clip_ratio/high_mean": 0.0010891216006712057, "clip_ratio/low_mean": 0.0008091115796560189, "clip_ratio/low_min": 2.7789889827545267e-05, "clip_ratio/region_mean": 0.001898233160318341, "epoch": 5.200641586468358, "grad_norm": 0.15866203606128693, "learning_rate": 1e-06, "loss": -0.0535, "step": 1003 }, { "clip_ratio/high_max": 0.002856684586731717, "clip_ratio/high_mean": 0.0009380237552250037, "clip_ratio/low_mean": 0.0008077421898633474, "clip_ratio/low_min": 2.752552245510742e-05, "clip_ratio/region_mean": 0.0017457659414503723, "epoch": 5.205307669874599, "grad_norm": 0.15228678286075592, "learning_rate": 1e-06, "loss": -0.0054, "step": 1004 }, { "clip_ratio/high_max": 0.002359831109060906, "clip_ratio/high_mean": 0.0010075241279992042, "clip_ratio/low_mean": 0.0008300671506731305, "clip_ratio/low_min": 1.8662287402548827e-05, "clip_ratio/region_mean": 0.0018375912695773877, "epoch": 5.20997375328084, "grad_norm": 0.19849438965320587, "learning_rate": 1e-06, "loss": 0.0139, "step": 1005 }, { "clip_ratio/high_max": 0.002412451616692124, "clip_ratio/high_mean": 0.0009395330635015853, "clip_ratio/low_mean": 0.0010428279365441995, "clip_ratio/low_min": 1.710454307612963e-05, "clip_ratio/region_mean": 0.0019823610127787106, "epoch": 5.2146398366870805, "grad_norm": 0.17778748273849487, "learning_rate": 1e-06, "loss": -0.0024, "step": 1006 }, { "clip_ratio/high_max": 0.002688764285267098, "clip_ratio/high_mean": 0.0011151650487590814, "clip_ratio/low_mean": 0.0008966112636699108, "clip_ratio/low_min": 4.430529224919155e-05, "clip_ratio/region_mean": 0.0020117762323934585, "epoch": 5.219305920093322, "grad_norm": 0.1618351936340332, "learning_rate": 1e-06, "loss": -0.0314, "step": 1007 }, { "clip_ratio/high_max": 0.002520723966881633, "clip_ratio/high_mean": 0.0010178906759392703, "clip_ratio/low_mean": 0.0008467458392260596, "clip_ratio/low_min": 4.185410580248572e-05, "clip_ratio/region_mean": 0.0018646365206222981, "epoch": 5.223972003499562, "grad_norm": 0.14499059319496155, "learning_rate": 1e-06, "loss": -0.0371, "step": 1008 }, { "clip_ratio/high_max": 0.0055825999297667295, "clip_ratio/high_mean": 0.002216798107838258, "clip_ratio/low_mean": 0.0020892682441626675, "clip_ratio/low_min": 0.0002328925475012511, "clip_ratio/region_mean": 0.004306066344724968, "epoch": 5.228638086905804, "grad_norm": 0.11005309969186783, "learning_rate": 1e-06, "loss": -0.0417, "step": 1009 }, { "clip_ratio/high_max": 0.004347499430878088, "clip_ratio/high_mean": 0.0017518723761895671, "clip_ratio/low_mean": 0.0017631687187531497, "clip_ratio/low_min": 1.0877131899178494e-05, "clip_ratio/region_mean": 0.003515041171340272, "epoch": 5.233304170312044, "grad_norm": 0.1101030558347702, "learning_rate": 1e-06, "loss": -0.0255, "step": 1010 }, { "clip_ratio/high_max": 0.0043478384031914175, "clip_ratio/high_mean": 0.0018308321341464762, "clip_ratio/low_mean": 0.0015104327831068076, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0033412650373065844, "epoch": 5.237970253718285, "grad_norm": 0.10577068477869034, "learning_rate": 1e-06, "loss": -0.0568, "step": 1011 }, { "clip_ratio/high_max": 0.004228475634590723, "clip_ratio/high_mean": 0.001662406968534924, "clip_ratio/low_mean": 0.001927349188918015, "clip_ratio/low_min": 5.152514495421201e-05, "clip_ratio/region_mean": 0.0035897560883313417, "epoch": 5.242636337124526, "grad_norm": 0.11040983349084854, "learning_rate": 1e-06, "loss": -0.0068, "step": 1012 }, { "clip_ratio/high_max": 0.004759146053402219, "clip_ratio/high_mean": 0.0018668192642508075, "clip_ratio/low_mean": 0.001728527121940715, "clip_ratio/low_min": 9.341413169750012e-05, "clip_ratio/region_mean": 0.003595346410293132, "epoch": 5.247302420530767, "grad_norm": 0.11569134891033173, "learning_rate": 1e-06, "loss": -0.047, "step": 1013 }, { "clip_ratio/high_max": 0.004707593601779081, "clip_ratio/high_mean": 0.001989219083043281, "clip_ratio/low_mean": 0.001605293608008651, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.003594512731069699, "epoch": 5.251968503937007, "grad_norm": 0.11057382822036743, "learning_rate": 1e-06, "loss": -0.0542, "step": 1014 }, { "clip_ratio/high_max": 0.004848193901125342, "clip_ratio/high_mean": 0.001878933770058211, "clip_ratio/low_mean": 0.0016232934976869728, "clip_ratio/low_min": 9.441087604500353e-05, "clip_ratio/region_mean": 0.003502227205899544, "epoch": 5.256634587343249, "grad_norm": 0.10582774132490158, "learning_rate": 1e-06, "loss": -0.0375, "step": 1015 }, { "clip_ratio/high_max": 0.004101810191059485, "clip_ratio/high_mean": 0.001611021340067964, "clip_ratio/low_mean": 0.0015275996047421359, "clip_ratio/low_min": 3.7821482692379504e-05, "clip_ratio/region_mean": 0.0031386208938783966, "epoch": 5.26130067074949, "grad_norm": 0.09692647308111191, "learning_rate": 1e-06, "loss": -0.0428, "step": 1016 }, { "clip_ratio/high_max": 0.004056684148963541, "clip_ratio/high_mean": 0.0016346950651495717, "clip_ratio/low_mean": 0.002188072379794903, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0038227673794608563, "epoch": 5.2659667541557305, "grad_norm": 0.12245281040668488, "learning_rate": 1e-06, "loss": 0.0326, "step": 1017 }, { "clip_ratio/high_max": 0.004675952921388671, "clip_ratio/high_mean": 0.0020794166302948724, "clip_ratio/low_mean": 0.0016500580404681386, "clip_ratio/low_min": 0.00014556574751622975, "clip_ratio/region_mean": 0.003729474628926255, "epoch": 5.270632837561972, "grad_norm": 0.10504581034183502, "learning_rate": 1e-06, "loss": -0.0502, "step": 1018 }, { "clip_ratio/high_max": 0.00457784635364078, "clip_ratio/high_mean": 0.002114764240104705, "clip_ratio/low_mean": 0.001816475563828135, "clip_ratio/low_min": 0.0001811290203477256, "clip_ratio/region_mean": 0.003931239814846776, "epoch": 5.275298920968212, "grad_norm": 0.11754226684570312, "learning_rate": 1e-06, "loss": -0.0544, "step": 1019 }, { "clip_ratio/high_max": 0.005019220378017053, "clip_ratio/high_mean": 0.0017417570961697493, "clip_ratio/low_mean": 0.0017731974403432105, "clip_ratio/low_min": 5.505104491021484e-05, "clip_ratio/region_mean": 0.0035149546019965783, "epoch": 5.2799650043744535, "grad_norm": 0.10802837461233139, "learning_rate": 1e-06, "loss": -0.0062, "step": 1020 }, { "clip_ratio/high_max": 0.0049049765220843256, "clip_ratio/high_mean": 0.0018938581051770598, "clip_ratio/low_mean": 0.002084886218653992, "clip_ratio/low_min": 4.9710111852618866e-05, "clip_ratio/region_mean": 0.003978744367486797, "epoch": 5.284631087780694, "grad_norm": 0.12198702245950699, "learning_rate": 1e-06, "loss": 0.0127, "step": 1021 }, { "clip_ratio/high_max": 0.004180621843261179, "clip_ratio/high_mean": 0.0016994495381368324, "clip_ratio/low_mean": 0.002120194199960679, "clip_ratio/low_min": 0.00022694712606607936, "clip_ratio/region_mean": 0.003819643723545596, "epoch": 5.289297171186935, "grad_norm": 0.12182199209928513, "learning_rate": 1e-06, "loss": -0.0035, "step": 1022 }, { "clip_ratio/high_max": 0.005215120690991171, "clip_ratio/high_mean": 0.002139085489034187, "clip_ratio/low_mean": 0.0019649441965157166, "clip_ratio/low_min": 4.937783887726255e-05, "clip_ratio/region_mean": 0.004104029692825861, "epoch": 5.293963254593176, "grad_norm": 0.12089893966913223, "learning_rate": 1e-06, "loss": -0.0325, "step": 1023 }, { "clip_ratio/high_max": 0.00402004765055608, "clip_ratio/high_mean": 0.001815882667870028, "clip_ratio/low_mean": 0.0017658336619206239, "clip_ratio/low_min": 0.00011170033394591883, "clip_ratio/region_mean": 0.003581716286134906, "epoch": 5.298629337999417, "grad_norm": 0.1064288318157196, "learning_rate": 1e-06, "loss": -0.038, "step": 1024 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0703822544642857, "completions/max_length": 4096.0, "completions/max_terminated_length": 4088.0, "completions/mean_length": 796.1515502929688, "completions/mean_terminated_length": 546.3167724609375, "completions/min_length": 102.0, "completions/min_terminated_length": 102.0, "epoch": 5.303295421405657, "grad_norm": 0.16900043189525604, "learning_rate": 1e-06, "loss": -0.0407, "num_tokens": 302235488.0, "reward": 0.64453125, "reward_std": 0.15173809230327606, "rewards/simpleverify_reward/mean": 0.64453125, "rewards/simpleverify_reward/std": 0.4786717891693115, "step": 1025 }, { "clip_ratio/high_max": 0.0019901058039977215, "clip_ratio/high_mean": 0.0007580732817586977, "clip_ratio/low_mean": 0.0004407145856930583, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011987878679065034, "epoch": 5.307961504811899, "grad_norm": 0.30315497517585754, "learning_rate": 1e-06, "loss": -0.0187, "step": 1026 }, { "clip_ratio/high_max": 0.0017779778499971144, "clip_ratio/high_mean": 0.0007188380077423062, "clip_ratio/low_mean": 0.00045006515028944705, "clip_ratio/low_min": 1.700217580946628e-05, "clip_ratio/region_mean": 0.0011689031598507427, "epoch": 5.312627588218139, "grad_norm": 0.13995777070522308, "learning_rate": 1e-06, "loss": -0.0235, "step": 1027 }, { "clip_ratio/high_max": 0.0018832266214303672, "clip_ratio/high_mean": 0.0007336670914810384, "clip_ratio/low_mean": 0.0004941793376929127, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012278464309929404, "epoch": 5.31729367162438, "grad_norm": 0.14066806435585022, "learning_rate": 1e-06, "loss": -0.0408, "step": 1028 }, { "clip_ratio/high_max": 0.0020801038554054685, "clip_ratio/high_mean": 0.0008948110171331791, "clip_ratio/low_mean": 0.0005780501096523949, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014728611458849628, "epoch": 5.321959755030621, "grad_norm": 0.14191918075084686, "learning_rate": 1e-06, "loss": -0.017, "step": 1029 }, { "clip_ratio/high_max": 0.0019493888903525658, "clip_ratio/high_mean": 0.0008005459312698804, "clip_ratio/low_mean": 0.0005298624591887346, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013304083731782157, "epoch": 5.326625838436862, "grad_norm": 0.1472458392381668, "learning_rate": 1e-06, "loss": -0.0328, "step": 1030 }, { "clip_ratio/high_max": 0.0022974872917984612, "clip_ratio/high_mean": 0.0008544014235667419, "clip_ratio/low_mean": 0.0005862041334694368, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014406055270228535, "epoch": 5.331291921843103, "grad_norm": 0.14370617270469666, "learning_rate": 1e-06, "loss": -0.0494, "step": 1031 }, { "clip_ratio/high_max": 0.00222120506077772, "clip_ratio/high_mean": 0.0008841853414196521, "clip_ratio/low_mean": 0.0007317423262520606, "clip_ratio/low_min": 5.127830445417203e-05, "clip_ratio/region_mean": 0.001615927649254445, "epoch": 5.335958005249344, "grad_norm": 0.164176344871521, "learning_rate": 1e-06, "loss": 0.0062, "step": 1032 }, { "clip_ratio/high_max": 0.001983374317205744, "clip_ratio/high_mean": 0.0008687130357429851, "clip_ratio/low_mean": 0.0006822798013672582, "clip_ratio/low_min": 2.1208008547546342e-05, "clip_ratio/region_mean": 0.001550992819829844, "epoch": 5.340624088655585, "grad_norm": 0.15979741513729095, "learning_rate": 1e-06, "loss": -0.0045, "step": 1033 }, { "clip_ratio/high_max": 0.002303304754605051, "clip_ratio/high_mean": 0.0009059564308699919, "clip_ratio/low_mean": 0.0005827184099871374, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014886748540448025, "epoch": 5.3452901720618256, "grad_norm": 0.17692916095256805, "learning_rate": 1e-06, "loss": -0.0234, "step": 1034 }, { "clip_ratio/high_max": 0.0019705890881596133, "clip_ratio/high_mean": 0.0007961713872646214, "clip_ratio/low_mean": 0.0007159205924835987, "clip_ratio/low_min": 4.466944665182382e-05, "clip_ratio/region_mean": 0.0015120919997571036, "epoch": 5.349956255468067, "grad_norm": 0.12925322353839874, "learning_rate": 1e-06, "loss": -0.0375, "step": 1035 }, { "clip_ratio/high_max": 0.002061121122096665, "clip_ratio/high_mean": 0.000853650688441121, "clip_ratio/low_mean": 0.0007101527780832839, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015638034747098573, "epoch": 5.354622338874307, "grad_norm": 0.1644068956375122, "learning_rate": 1e-06, "loss": -0.0292, "step": 1036 }, { "clip_ratio/high_max": 0.002102839069266338, "clip_ratio/high_mean": 0.0009394999797223136, "clip_ratio/low_mean": 0.0007577474707431975, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0016972474550129846, "epoch": 5.359288422280549, "grad_norm": 0.18274541199207306, "learning_rate": 1e-06, "loss": -0.0273, "step": 1037 }, { "clip_ratio/high_max": 0.0029189602355472744, "clip_ratio/high_mean": 0.0010747261840151623, "clip_ratio/low_mean": 0.0007616193743160693, "clip_ratio/low_min": 4.585473288898356e-05, "clip_ratio/region_mean": 0.0018363455383223481, "epoch": 5.363954505686789, "grad_norm": 0.13675089180469513, "learning_rate": 1e-06, "loss": -0.0172, "step": 1038 }, { "clip_ratio/high_max": 0.002146710750821512, "clip_ratio/high_mean": 0.0009420935148227727, "clip_ratio/low_mean": 0.0008759860538702924, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001818079577788012, "epoch": 5.36862058909303, "grad_norm": 0.1734006553888321, "learning_rate": 1e-06, "loss": -0.0138, "step": 1039 }, { "clip_ratio/high_max": 0.0025304565497208387, "clip_ratio/high_mean": 0.0008993535375338979, "clip_ratio/low_mean": 0.0008891449360817205, "clip_ratio/low_min": 1.2685203728324268e-05, "clip_ratio/region_mean": 0.0017884984699776396, "epoch": 5.373286672499271, "grad_norm": 0.15388691425323486, "learning_rate": 1e-06, "loss": -0.0048, "step": 1040 }, { "clip_ratio/high_max": 0.004619701052433811, "clip_ratio/high_mean": 0.0019604874978540465, "clip_ratio/low_mean": 0.0015945161358104087, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.003555003640940413, "epoch": 5.377952755905512, "grad_norm": 0.09656365215778351, "learning_rate": 1e-06, "loss": -0.0415, "step": 1041 }, { "clip_ratio/high_max": 0.004308526469685603, "clip_ratio/high_mean": 0.0017046349639713299, "clip_ratio/low_mean": 0.0015700769690738525, "clip_ratio/low_min": 0.00014946940791560337, "clip_ratio/region_mean": 0.0032747119257692248, "epoch": 5.3826188393117524, "grad_norm": 0.10604587197303772, "learning_rate": 1e-06, "loss": -0.0196, "step": 1042 }, { "clip_ratio/high_max": 0.004013026918983087, "clip_ratio/high_mean": 0.001673447659413796, "clip_ratio/low_mean": 0.0017098265161621384, "clip_ratio/low_min": 8.573388186050579e-05, "clip_ratio/region_mean": 0.0033832742337835953, "epoch": 5.387284922717994, "grad_norm": 0.10498189926147461, "learning_rate": 1e-06, "loss": -0.0243, "step": 1043 }, { "clip_ratio/high_max": 0.004044265740958508, "clip_ratio/high_mean": 0.0016807025749585591, "clip_ratio/low_mean": 0.0016275965026579797, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.003308299033960793, "epoch": 5.391951006124234, "grad_norm": 0.0970829576253891, "learning_rate": 1e-06, "loss": -0.0416, "step": 1044 }, { "clip_ratio/high_max": 0.004686776621383615, "clip_ratio/high_mean": 0.0018879604613175616, "clip_ratio/low_mean": 0.0016483919480378972, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0035363523493288085, "epoch": 5.3966170895304755, "grad_norm": 0.10536900907754898, "learning_rate": 1e-06, "loss": -0.0177, "step": 1045 }, { "clip_ratio/high_max": 0.004191796950181015, "clip_ratio/high_mean": 0.0017523608075862285, "clip_ratio/low_mean": 0.0016774851937952917, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.003429846008657478, "epoch": 5.401283172936716, "grad_norm": 0.09806672483682632, "learning_rate": 1e-06, "loss": -0.0336, "step": 1046 }, { "clip_ratio/high_max": 0.005034724992583506, "clip_ratio/high_mean": 0.0018981661414727569, "clip_ratio/low_mean": 0.0014747300283488585, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.00337289618619252, "epoch": 5.405949256342957, "grad_norm": 0.1024920791387558, "learning_rate": 1e-06, "loss": -0.0502, "step": 1047 }, { "clip_ratio/high_max": 0.004791682600625791, "clip_ratio/high_mean": 0.0018399969194433652, "clip_ratio/low_mean": 0.0019361706745257834, "clip_ratio/low_min": 0.00012376033919281326, "clip_ratio/region_mean": 0.00377616765035782, "epoch": 5.410615339749198, "grad_norm": 0.115408755838871, "learning_rate": 1e-06, "loss": 0.0052, "step": 1048 }, { "clip_ratio/high_max": 0.004598794512276072, "clip_ratio/high_mean": 0.0017598953018023167, "clip_ratio/low_mean": 0.0018190212904301006, "clip_ratio/low_min": 2.1208008547546342e-05, "clip_ratio/region_mean": 0.003578916519472841, "epoch": 5.415281423155439, "grad_norm": 0.10779279470443726, "learning_rate": 1e-06, "loss": -0.0054, "step": 1049 }, { "clip_ratio/high_max": 0.004387183973449282, "clip_ratio/high_mean": 0.0018157201557187364, "clip_ratio/low_mean": 0.0016432555639767088, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.003458975683315657, "epoch": 5.41994750656168, "grad_norm": 0.11029481142759323, "learning_rate": 1e-06, "loss": -0.0245, "step": 1050 }, { "clip_ratio/high_max": 0.004310808617447037, "clip_ratio/high_mean": 0.0016411887845606543, "clip_ratio/low_mean": 0.0015889382902969373, "clip_ratio/low_min": 6.700416997773573e-05, "clip_ratio/region_mean": 0.00323012717126403, "epoch": 5.424613589967921, "grad_norm": 0.1090804859995842, "learning_rate": 1e-06, "loss": -0.0382, "step": 1051 }, { "clip_ratio/high_max": 0.004544772586086765, "clip_ratio/high_mean": 0.001863996196334483, "clip_ratio/low_mean": 0.001818540484237019, "clip_ratio/low_min": 5.2065977797610685e-05, "clip_ratio/region_mean": 0.003682536698761396, "epoch": 5.429279673374162, "grad_norm": 0.10998934507369995, "learning_rate": 1e-06, "loss": -0.0302, "step": 1052 }, { "clip_ratio/high_max": 0.004446835315320641, "clip_ratio/high_mean": 0.0018928991667053197, "clip_ratio/low_mean": 0.0019031375068152556, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0037960366753395647, "epoch": 5.433945756780402, "grad_norm": 0.11447273939847946, "learning_rate": 1e-06, "loss": -0.0283, "step": 1053 }, { "clip_ratio/high_max": 0.004872256686212495, "clip_ratio/high_mean": 0.0018683382477320265, "clip_ratio/low_mean": 0.0017378241464029998, "clip_ratio/low_min": 8.043758134590462e-05, "clip_ratio/region_mean": 0.0036061624414287508, "epoch": 5.438611840186644, "grad_norm": 0.10243591666221619, "learning_rate": 1e-06, "loss": -0.018, "step": 1054 }, { "clip_ratio/high_max": 0.0048987017798935995, "clip_ratio/high_mean": 0.0018398495521978475, "clip_ratio/low_mean": 0.0019785495933319908, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.003818399185547605, "epoch": 5.443277923592884, "grad_norm": 0.11500147730112076, "learning_rate": 1e-06, "loss": -0.0148, "step": 1055 }, { "clip_ratio/high_max": 0.004209175094729289, "clip_ratio/high_mean": 0.0016091649413283449, "clip_ratio/low_mean": 0.001991480603464879, "clip_ratio/low_min": 3.8055612094467506e-05, "clip_ratio/region_mean": 0.0036006455484312028, "epoch": 5.447944006999125, "grad_norm": 0.09771430492401123, "learning_rate": 1e-06, "loss": -0.0055, "step": 1056 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.08203125, "completions/max_length": 4096.0, "completions/max_terminated_length": 4028.0, "completions/mean_length": 844.0969848632812, "completions/mean_terminated_length": 553.5013427734375, "completions/min_length": 5.0, "completions/min_terminated_length": 5.0, "epoch": 5.452610090405366, "grad_norm": 0.18503591418266296, "learning_rate": 1e-06, "loss": -0.0423, "num_tokens": 310969310.0, "reward": 0.6193498969078064, "reward_std": 0.16954688727855682, "rewards/simpleverify_reward/mean": 0.6193498969078064, "rewards/simpleverify_reward/std": 0.48556363582611084, "step": 1057 }, { "clip_ratio/high_max": 0.0021010026830481365, "clip_ratio/high_mean": 0.0008167638879967853, "clip_ratio/low_mean": 0.0005597656472673407, "clip_ratio/low_min": 3.10698133034748e-05, "clip_ratio/region_mean": 0.001376529544359073, "epoch": 5.457276173811607, "grad_norm": 0.15392066538333893, "learning_rate": 1e-06, "loss": -0.0204, "step": 1058 }, { "clip_ratio/high_max": 0.002218676880147541, "clip_ratio/high_mean": 0.0008808757229417097, "clip_ratio/low_mean": 0.00044940638440493785, "clip_ratio/low_min": 1.230557154485723e-05, "clip_ratio/region_mean": 0.0013302821207616944, "epoch": 5.4619422572178475, "grad_norm": 0.14625273644924164, "learning_rate": 1e-06, "loss": -0.0445, "step": 1059 }, { "clip_ratio/high_max": 0.0019228427627240308, "clip_ratio/high_mean": 0.0008116791414067848, "clip_ratio/low_mean": 0.0004936997529512155, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013053789080004208, "epoch": 5.466608340624089, "grad_norm": 0.18353533744812012, "learning_rate": 1e-06, "loss": -0.0338, "step": 1060 }, { "clip_ratio/high_max": 0.00246318471909035, "clip_ratio/high_mean": 0.0009007052176457364, "clip_ratio/low_mean": 0.0005558047769227414, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001456509988202015, "epoch": 5.471274424030329, "grad_norm": 0.15294893085956573, "learning_rate": 1e-06, "loss": -0.038, "step": 1061 }, { "clip_ratio/high_max": 0.002179118077037856, "clip_ratio/high_mean": 0.0009593284976290306, "clip_ratio/low_mean": 0.0006045227364666061, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001563851212267764, "epoch": 5.475940507436571, "grad_norm": 0.16398212313652039, "learning_rate": 1e-06, "loss": -0.0505, "step": 1062 }, { "clip_ratio/high_max": 0.0023751916523906402, "clip_ratio/high_mean": 0.0009796846388780978, "clip_ratio/low_mean": 0.0007294433135029976, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0017091279478336219, "epoch": 5.480606590842811, "grad_norm": 0.17016813158988953, "learning_rate": 1e-06, "loss": -0.019, "step": 1063 }, { "clip_ratio/high_max": 0.002340564598853234, "clip_ratio/high_mean": 0.0009789278356038267, "clip_ratio/low_mean": 0.0008047365790844196, "clip_ratio/low_min": 2.706799568841234e-05, "clip_ratio/region_mean": 0.0017836644183262251, "epoch": 5.485272674249052, "grad_norm": 0.1788434386253357, "learning_rate": 1e-06, "loss": -0.0424, "step": 1064 }, { "clip_ratio/high_max": 0.0021957421995466575, "clip_ratio/high_mean": 0.0009893622482195497, "clip_ratio/low_mean": 0.0007605226783198304, "clip_ratio/low_min": 1.6037978639360517e-05, "clip_ratio/region_mean": 0.0017498849192634225, "epoch": 5.489938757655293, "grad_norm": 0.1769745945930481, "learning_rate": 1e-06, "loss": -0.0257, "step": 1065 }, { "clip_ratio/high_max": 0.002496462398994481, "clip_ratio/high_mean": 0.0009307483560405672, "clip_ratio/low_mean": 0.0006463490763053414, "clip_ratio/low_min": 2.1570318494923413e-05, "clip_ratio/region_mean": 0.0015770974387123715, "epoch": 5.494604841061534, "grad_norm": 0.14289164543151855, "learning_rate": 1e-06, "loss": -0.0413, "step": 1066 }, { "clip_ratio/high_max": 0.0019630641108960845, "clip_ratio/high_mean": 0.0008781963006185833, "clip_ratio/low_mean": 0.0009712367609608918, "clip_ratio/low_min": 3.799853766395245e-05, "clip_ratio/region_mean": 0.0018494330797693692, "epoch": 5.499270924467774, "grad_norm": 0.15272675454616547, "learning_rate": 1e-06, "loss": 0.022, "step": 1067 }, { "clip_ratio/high_max": 0.002803758798108902, "clip_ratio/high_mean": 0.0011310019144730177, "clip_ratio/low_mean": 0.0007846928119761287, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0019156947018927895, "epoch": 5.503937007874016, "grad_norm": 0.14283253252506256, "learning_rate": 1e-06, "loss": -0.0268, "step": 1068 }, { "clip_ratio/high_max": 0.002550644741859287, "clip_ratio/high_mean": 0.0011323581566102803, "clip_ratio/low_mean": 0.000811888476164313, "clip_ratio/low_min": 2.7162102924194187e-05, "clip_ratio/region_mean": 0.0019442466073087417, "epoch": 5.508603091280257, "grad_norm": 0.18627505004405975, "learning_rate": 1e-06, "loss": -0.0231, "step": 1069 }, { "clip_ratio/high_max": 0.0024651432540849783, "clip_ratio/high_mean": 0.0010573405561444815, "clip_ratio/low_mean": 0.0007259911490109516, "clip_ratio/low_min": 1.2636473911697976e-05, "clip_ratio/region_mean": 0.0017833317106124014, "epoch": 5.5132691746864975, "grad_norm": 0.15494415163993835, "learning_rate": 1e-06, "loss": -0.0277, "step": 1070 }, { "clip_ratio/high_max": 0.0023478621369577013, "clip_ratio/high_mean": 0.0010715455155150266, "clip_ratio/low_mean": 0.0006710978486808017, "clip_ratio/low_min": 3.34582437062636e-05, "clip_ratio/region_mean": 0.0017426433369109873, "epoch": 5.517935258092739, "grad_norm": 0.14109767973423004, "learning_rate": 1e-06, "loss": -0.037, "step": 1071 }, { "clip_ratio/high_max": 0.0022095215899753384, "clip_ratio/high_mean": 0.0010863233892450808, "clip_ratio/low_mean": 0.0008011010177142452, "clip_ratio/low_min": 3.37837846018374e-05, "clip_ratio/region_mean": 0.0018874244196922518, "epoch": 5.522601341498979, "grad_norm": 0.17250967025756836, "learning_rate": 1e-06, "loss": -0.0699, "step": 1072 }, { "clip_ratio/high_max": 0.005617723465547897, "clip_ratio/high_mean": 0.002320662810234353, "clip_ratio/low_mean": 0.0019079897974734195, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.004228652687743306, "epoch": 5.5272674249052205, "grad_norm": 0.11128078401088715, "learning_rate": 1e-06, "loss": -0.0433, "step": 1073 }, { "clip_ratio/high_max": 0.00512938011524966, "clip_ratio/high_mean": 0.001960689442057628, "clip_ratio/low_mean": 0.0019387240965897945, "clip_ratio/low_min": 0.00017297096201218665, "clip_ratio/region_mean": 0.0038994134665699676, "epoch": 5.531933508311461, "grad_norm": 0.11491167545318604, "learning_rate": 1e-06, "loss": -0.0213, "step": 1074 }, { "clip_ratio/high_max": 0.0048753045266494155, "clip_ratio/high_mean": 0.0019276366438134573, "clip_ratio/low_mean": 0.0016700985834177118, "clip_ratio/low_min": 0.0001384274655720219, "clip_ratio/region_mean": 0.00359773519448936, "epoch": 5.536599591717702, "grad_norm": 0.12996047735214233, "learning_rate": 1e-06, "loss": -0.0454, "step": 1075 }, { "clip_ratio/high_max": 0.004236992390360683, "clip_ratio/high_mean": 0.0019387972351978533, "clip_ratio/low_mean": 0.001695863469649339, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0036346606648294255, "epoch": 5.541265675123943, "grad_norm": 0.10617971420288086, "learning_rate": 1e-06, "loss": -0.0346, "step": 1076 }, { "clip_ratio/high_max": 0.005691662343451753, "clip_ratio/high_mean": 0.0020724655187223107, "clip_ratio/low_mean": 0.0018812367343343794, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.003953702165745199, "epoch": 5.545931758530184, "grad_norm": 0.1152462512254715, "learning_rate": 1e-06, "loss": -0.0389, "step": 1077 }, { "clip_ratio/high_max": 0.004724372047348879, "clip_ratio/high_mean": 0.002063470528810285, "clip_ratio/low_mean": 0.001929070131154731, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.00399254058720544, "epoch": 5.550597841936424, "grad_norm": 0.11365631967782974, "learning_rate": 1e-06, "loss": -0.0516, "step": 1078 }, { "clip_ratio/high_max": 0.005498058104421943, "clip_ratio/high_mean": 0.002033653436228633, "clip_ratio/low_mean": 0.002255057930597104, "clip_ratio/low_min": 5.693463754141703e-05, "clip_ratio/region_mean": 0.004288711308618076, "epoch": 5.555263925342666, "grad_norm": 0.11926291882991791, "learning_rate": 1e-06, "loss": -0.02, "step": 1079 }, { "clip_ratio/high_max": 0.0046259300579549745, "clip_ratio/high_mean": 0.0021313317483873107, "clip_ratio/low_mean": 0.001989418040466262, "clip_ratio/low_min": 7.97193861217238e-05, "clip_ratio/region_mean": 0.004120749741559848, "epoch": 5.559930008748906, "grad_norm": 0.1176396906375885, "learning_rate": 1e-06, "loss": -0.0434, "step": 1080 }, { "clip_ratio/high_max": 0.005511932453373447, "clip_ratio/high_mean": 0.002204282194725238, "clip_ratio/low_mean": 0.0021201263734837994, "clip_ratio/low_min": 0.00016949895234574797, "clip_ratio/region_mean": 0.004324408466345631, "epoch": 5.564596092155147, "grad_norm": 0.13098770380020142, "learning_rate": 1e-06, "loss": -0.0269, "step": 1081 }, { "clip_ratio/high_max": 0.004959311118000187, "clip_ratio/high_mean": 0.001934655385412043, "clip_ratio/low_mean": 0.0018828015745384619, "clip_ratio/low_min": 4.3140636989846826e-05, "clip_ratio/region_mean": 0.003817457014520187, "epoch": 5.569262175561388, "grad_norm": 0.11082416027784348, "learning_rate": 1e-06, "loss": -0.0421, "step": 1082 }, { "clip_ratio/high_max": 0.0037324036748032086, "clip_ratio/high_mean": 0.0017099572214647196, "clip_ratio/low_mean": 0.002334678851184435, "clip_ratio/low_min": 5.456360486277845e-05, "clip_ratio/region_mean": 0.004044636138132773, "epoch": 5.573928258967629, "grad_norm": 0.1414852887392044, "learning_rate": 1e-06, "loss": 0.0211, "step": 1083 }, { "clip_ratio/high_max": 0.005095094718853943, "clip_ratio/high_mean": 0.0019395994531805627, "clip_ratio/low_mean": 0.0018185253939009272, "clip_ratio/low_min": 3.838280463241972e-05, "clip_ratio/region_mean": 0.0037581248761853203, "epoch": 5.57859434237387, "grad_norm": 0.1120387390255928, "learning_rate": 1e-06, "loss": -0.0276, "step": 1084 }, { "clip_ratio/high_max": 0.004994406510377303, "clip_ratio/high_mean": 0.002108082568156533, "clip_ratio/low_mean": 0.0021776220492029097, "clip_ratio/low_min": 4.074315438629128e-05, "clip_ratio/region_mean": 0.00428570453368593, "epoch": 5.583260425780111, "grad_norm": 0.18482013046741486, "learning_rate": 1e-06, "loss": -0.0243, "step": 1085 }, { "clip_ratio/high_max": 0.004674766169046052, "clip_ratio/high_mean": 0.0019227692828280851, "clip_ratio/low_mean": 0.001774288590240758, "clip_ratio/low_min": 4.752851600642316e-05, "clip_ratio/region_mean": 0.0036970578075852245, "epoch": 5.587926509186351, "grad_norm": 0.1114470362663269, "learning_rate": 1e-06, "loss": -0.0287, "step": 1086 }, { "clip_ratio/high_max": 0.0048480612895218655, "clip_ratio/high_mean": 0.0020140057094977237, "clip_ratio/low_mean": 0.001531969082861906, "clip_ratio/low_min": 3.806333916145377e-05, "clip_ratio/region_mean": 0.003545974803273566, "epoch": 5.592592592592593, "grad_norm": 0.10625629127025604, "learning_rate": 1e-06, "loss": -0.0378, "step": 1087 }, { "clip_ratio/high_max": 0.0047394736466230825, "clip_ratio/high_mean": 0.002096644733683206, "clip_ratio/low_mean": 0.001747896931192372, "clip_ratio/low_min": 0.00010532051237532869, "clip_ratio/region_mean": 0.0038445415993919596, "epoch": 5.597258675998834, "grad_norm": 0.09825687855482101, "learning_rate": 1e-06, "loss": -0.0707, "step": 1088 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0721261160714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 4073.0, "completions/mean_length": 801.2921752929688, "completions/mean_terminated_length": 545.1856689453125, "completions/min_length": 89.0, "completions/min_terminated_length": 89.0, "epoch": 5.601924759405074, "grad_norm": 0.17363311350345612, "learning_rate": 1e-06, "loss": -0.0254, "num_tokens": 319663274.0, "reward": 0.66015625, "reward_std": 0.16099147498607635, "rewards/simpleverify_reward/mean": 0.66015625, "rewards/simpleverify_reward/std": 0.473672479391098, "step": 1089 }, { "clip_ratio/high_max": 0.00196332907944452, "clip_ratio/high_mean": 0.0007311119425139623, "clip_ratio/low_mean": 0.0004850490440730937, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012161609702161513, "epoch": 5.606590842811316, "grad_norm": 0.16732467710971832, "learning_rate": 1e-06, "loss": -0.0208, "step": 1090 }, { "clip_ratio/high_max": 0.001616110388567904, "clip_ratio/high_mean": 0.0006934280008863425, "clip_ratio/low_mean": 0.0005145723725945572, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001208000390761299, "epoch": 5.611256926217556, "grad_norm": 0.1546524167060852, "learning_rate": 1e-06, "loss": -0.0027, "step": 1091 }, { "clip_ratio/high_max": 0.0017909990892803762, "clip_ratio/high_mean": 0.0007520955605286872, "clip_ratio/low_mean": 0.0004986000549251912, "clip_ratio/low_min": 2.0627063349820673e-05, "clip_ratio/region_mean": 0.0012506956118158996, "epoch": 5.615923009623797, "grad_norm": 0.15939919650554657, "learning_rate": 1e-06, "loss": -0.036, "step": 1092 }, { "clip_ratio/high_max": 0.002326019777683541, "clip_ratio/high_mean": 0.0008496173995808931, "clip_ratio/low_mean": 0.0005462897142933798, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013959071038698312, "epoch": 5.620589093030038, "grad_norm": 0.34524571895599365, "learning_rate": 1e-06, "loss": -0.0367, "step": 1093 }, { "clip_ratio/high_max": 0.0025907351737259887, "clip_ratio/high_mean": 0.0010017334243457299, "clip_ratio/low_mean": 0.0006092508074289071, "clip_ratio/low_min": 3.235642634535907e-05, "clip_ratio/region_mean": 0.001610984189028386, "epoch": 5.625255176436279, "grad_norm": 0.2278021275997162, "learning_rate": 1e-06, "loss": -0.0663, "step": 1094 }, { "clip_ratio/high_max": 0.0023175524911493994, "clip_ratio/high_mean": 0.0008981495684565743, "clip_ratio/low_mean": 0.0007825216453056782, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0016806712010293268, "epoch": 5.6299212598425195, "grad_norm": 0.16181299090385437, "learning_rate": 1e-06, "loss": -0.0676, "step": 1095 }, { "clip_ratio/high_max": 0.002330497867660597, "clip_ratio/high_mean": 0.0007984306157595711, "clip_ratio/low_mean": 0.000825373417683295, "clip_ratio/low_min": 4.587155854096636e-05, "clip_ratio/region_mean": 0.001623804015252972, "epoch": 5.634587343248761, "grad_norm": 0.15510323643684387, "learning_rate": 1e-06, "loss": 0.0053, "step": 1096 }, { "clip_ratio/high_max": 0.00197228736942634, "clip_ratio/high_mean": 0.0008431544520135503, "clip_ratio/low_mean": 0.0007782273205521051, "clip_ratio/low_min": 1.766534842317924e-05, "clip_ratio/region_mean": 0.0016213818089454435, "epoch": 5.639253426655001, "grad_norm": 0.1508847326040268, "learning_rate": 1e-06, "loss": -0.0176, "step": 1097 }, { "clip_ratio/high_max": 0.002444359219225589, "clip_ratio/high_mean": 0.0009905890728987288, "clip_ratio/low_mean": 0.0008047451269703743, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0017953341521206312, "epoch": 5.6439195100612425, "grad_norm": 0.15566974878311157, "learning_rate": 1e-06, "loss": -0.0634, "step": 1098 }, { "clip_ratio/high_max": 0.0026409386919112876, "clip_ratio/high_mean": 0.0010656040212779772, "clip_ratio/low_mean": 0.0008403113897657022, "clip_ratio/low_min": 5.142429654370062e-05, "clip_ratio/region_mean": 0.0019059153928537853, "epoch": 5.648585593467483, "grad_norm": 0.14686118066310883, "learning_rate": 1e-06, "loss": -0.0326, "step": 1099 }, { "clip_ratio/high_max": 0.0022543475060956553, "clip_ratio/high_mean": 0.000969434300714056, "clip_ratio/low_mean": 0.0008012929883989273, "clip_ratio/low_min": 8.432835875282763e-05, "clip_ratio/region_mean": 0.0017707272854750045, "epoch": 5.653251676873724, "grad_norm": 0.1577766388654709, "learning_rate": 1e-06, "loss": -0.0164, "step": 1100 }, { "clip_ratio/high_max": 0.0026114291977137327, "clip_ratio/high_mean": 0.0011314384209981654, "clip_ratio/low_mean": 0.0007933738379506394, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0019248122625867836, "epoch": 5.657917760279965, "grad_norm": 0.16805461049079895, "learning_rate": 1e-06, "loss": -0.048, "step": 1101 }, { "clip_ratio/high_max": 0.002807301214488689, "clip_ratio/high_mean": 0.0010178441880270839, "clip_ratio/low_mean": 0.0007994123307071277, "clip_ratio/low_min": 3.0259017876232974e-05, "clip_ratio/region_mean": 0.0018172565614804626, "epoch": 5.662583843686206, "grad_norm": 0.16753637790679932, "learning_rate": 1e-06, "loss": -0.0254, "step": 1102 }, { "clip_ratio/high_max": 0.0028521964559331536, "clip_ratio/high_mean": 0.0010553336069278885, "clip_ratio/low_mean": 0.0007586399824504042, "clip_ratio/low_min": 1.26237127915374e-05, "clip_ratio/region_mean": 0.001813973613025155, "epoch": 5.667249927092447, "grad_norm": 0.15705883502960205, "learning_rate": 1e-06, "loss": -0.0512, "step": 1103 }, { "clip_ratio/high_max": 0.0023743039564578794, "clip_ratio/high_mean": 0.0010277639485138934, "clip_ratio/low_mean": 0.0009079600440600188, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0019357239725650288, "epoch": 5.671916010498688, "grad_norm": 0.3744487762451172, "learning_rate": 1e-06, "loss": -0.0063, "step": 1104 }, { "clip_ratio/high_max": 0.004711620123998728, "clip_ratio/high_mean": 0.0021453926419781055, "clip_ratio/low_mean": 0.0020286341932660434, "clip_ratio/low_min": 5.2936193242203444e-05, "clip_ratio/region_mean": 0.004174026864347979, "epoch": 5.676582093904928, "grad_norm": 0.10706911236047745, "learning_rate": 1e-06, "loss": -0.0264, "step": 1105 }, { "clip_ratio/high_max": 0.004823029492399655, "clip_ratio/high_mean": 0.0018834305519703776, "clip_ratio/low_mean": 0.0019089536344836233, "clip_ratio/low_min": 5.804504326079041e-05, "clip_ratio/region_mean": 0.0037923841300653294, "epoch": 5.681248177311169, "grad_norm": 0.1027434915304184, "learning_rate": 1e-06, "loss": -0.0219, "step": 1106 }, { "clip_ratio/high_max": 0.004103656210645568, "clip_ratio/high_mean": 0.0016622874500171747, "clip_ratio/low_mean": 0.001836797686337377, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0034990852000191808, "epoch": 5.685914260717411, "grad_norm": 0.10081746429204941, "learning_rate": 1e-06, "loss": -0.0035, "step": 1107 }, { "clip_ratio/high_max": 0.004733275738544762, "clip_ratio/high_mean": 0.0018238598349853419, "clip_ratio/low_mean": 0.0015205755989882164, "clip_ratio/low_min": 9.480412791162962e-05, "clip_ratio/region_mean": 0.0033444353539380245, "epoch": 5.690580344123651, "grad_norm": 0.10527554154396057, "learning_rate": 1e-06, "loss": -0.0369, "step": 1108 }, { "clip_ratio/high_max": 0.004575478262268007, "clip_ratio/high_mean": 0.0018464363274688367, "clip_ratio/low_mean": 0.0017243847651116084, "clip_ratio/low_min": 4.298486965126358e-05, "clip_ratio/region_mean": 0.0035708210634766147, "epoch": 5.695246427529892, "grad_norm": 0.11438775062561035, "learning_rate": 1e-06, "loss": -0.0376, "step": 1109 }, { "clip_ratio/high_max": 0.005584066020674072, "clip_ratio/high_mean": 0.0021269731296342798, "clip_ratio/low_mean": 0.0017782876293495065, "clip_ratio/low_min": 0.0001413922946085222, "clip_ratio/region_mean": 0.0039052607608027756, "epoch": 5.699912510936133, "grad_norm": 0.12058404833078384, "learning_rate": 1e-06, "loss": -0.0673, "step": 1110 }, { "clip_ratio/high_max": 0.005755571139161475, "clip_ratio/high_mean": 0.002112916648911778, "clip_ratio/low_mean": 0.001908920079586096, "clip_ratio/low_min": 6.401502469088882e-05, "clip_ratio/region_mean": 0.004021836764877662, "epoch": 5.704578594342374, "grad_norm": 0.1170424222946167, "learning_rate": 1e-06, "loss": -0.0686, "step": 1111 }, { "clip_ratio/high_max": 0.004587032104609534, "clip_ratio/high_mean": 0.001741022126225289, "clip_ratio/low_mean": 0.0020003209792776033, "clip_ratio/low_min": 7.352940883720294e-05, "clip_ratio/region_mean": 0.003741343149158638, "epoch": 5.7092446777486145, "grad_norm": 0.11023940145969391, "learning_rate": 1e-06, "loss": 0.0045, "step": 1112 }, { "clip_ratio/high_max": 0.004333077071350999, "clip_ratio/high_mean": 0.0018212781506008469, "clip_ratio/low_mean": 0.0019266284434706904, "clip_ratio/low_min": 6.54450268484652e-05, "clip_ratio/region_mean": 0.0037479064922081307, "epoch": 5.713910761154856, "grad_norm": 0.1100669801235199, "learning_rate": 1e-06, "loss": -0.0185, "step": 1113 }, { "clip_ratio/high_max": 0.005343921991880052, "clip_ratio/high_mean": 0.0021120144956512377, "clip_ratio/low_mean": 0.0018746332407317823, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.003986647701822221, "epoch": 5.718576844561096, "grad_norm": 0.12329089641571045, "learning_rate": 1e-06, "loss": -0.0643, "step": 1114 }, { "clip_ratio/high_max": 0.005325925361830741, "clip_ratio/high_mean": 0.0019957017175329383, "clip_ratio/low_mean": 0.0018841441451513674, "clip_ratio/low_min": 5.041570693720132e-05, "clip_ratio/region_mean": 0.0038798459281679243, "epoch": 5.723242927967338, "grad_norm": 0.1092992052435875, "learning_rate": 1e-06, "loss": -0.0333, "step": 1115 }, { "clip_ratio/high_max": 0.005351192216039635, "clip_ratio/high_mean": 0.0020601542782969773, "clip_ratio/low_mean": 0.0018666402938833926, "clip_ratio/low_min": 0.00019100204735877924, "clip_ratio/region_mean": 0.0039267945976462215, "epoch": 5.727909011373578, "grad_norm": 0.11630088090896606, "learning_rate": 1e-06, "loss": -0.0174, "step": 1116 }, { "clip_ratio/high_max": 0.0045871324255131185, "clip_ratio/high_mean": 0.0019645351421786472, "clip_ratio/low_mean": 0.0019200983188056853, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0038846334064146504, "epoch": 5.732575094779819, "grad_norm": 0.11486152559518814, "learning_rate": 1e-06, "loss": -0.0489, "step": 1117 }, { "clip_ratio/high_max": 0.004793749481905252, "clip_ratio/high_mean": 0.0018304229961358942, "clip_ratio/low_mean": 0.001937795370395179, "clip_ratio/low_min": 4.538852590485476e-05, "clip_ratio/region_mean": 0.003768218281038571, "epoch": 5.73724117818606, "grad_norm": 0.10911096632480621, "learning_rate": 1e-06, "loss": -0.0263, "step": 1118 }, { "clip_ratio/high_max": 0.005130968100274913, "clip_ratio/high_mean": 0.0019012518459931016, "clip_ratio/low_mean": 0.001879583091067616, "clip_ratio/low_min": 8.83659886312671e-05, "clip_ratio/region_mean": 0.0037808349152328447, "epoch": 5.741907261592301, "grad_norm": 0.11238105595111847, "learning_rate": 1e-06, "loss": -0.0521, "step": 1119 }, { "clip_ratio/high_max": 0.005286748561047716, "clip_ratio/high_mean": 0.0019838979951600777, "clip_ratio/low_mean": 0.002030055969953537, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.004013953875983134, "epoch": 5.746573344998541, "grad_norm": 0.11621847748756409, "learning_rate": 1e-06, "loss": -0.0073, "step": 1120 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0686383928571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 4058.0, "completions/mean_length": 790.1351318359375, "completions/mean_terminated_length": 546.50341796875, "completions/min_length": 52.0, "completions/min_terminated_length": 52.0, "epoch": 5.751239428404783, "grad_norm": 0.18430668115615845, "learning_rate": 1e-06, "loss": -0.0391, "num_tokens": 328396203.0, "reward": 0.6566685438156128, "reward_std": 0.15478026866912842, "rewards/simpleverify_reward/mean": 0.6566685438156128, "rewards/simpleverify_reward/std": 0.4748375415802002, "step": 1121 }, { "clip_ratio/high_max": 0.0018891220570367295, "clip_ratio/high_mean": 0.000707031127603841, "clip_ratio/low_mean": 0.00039948274570633657, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.00110651386603422, "epoch": 5.755905511811024, "grad_norm": 0.15498477220535278, "learning_rate": 1e-06, "loss": -0.0244, "step": 1122 }, { "clip_ratio/high_max": 0.002032228709140327, "clip_ratio/high_mean": 0.0008019923952815589, "clip_ratio/low_mean": 0.000571771780414565, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013737642038904596, "epoch": 5.7605715952172645, "grad_norm": 0.1581478714942932, "learning_rate": 1e-06, "loss": -0.0245, "step": 1123 }, { "clip_ratio/high_max": 0.002226620723376982, "clip_ratio/high_mean": 0.0009234493281837786, "clip_ratio/low_mean": 0.0006573238442797447, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015807732015673537, "epoch": 5.765237678623506, "grad_norm": 0.17381872236728668, "learning_rate": 1e-06, "loss": -0.0238, "step": 1124 }, { "clip_ratio/high_max": 0.0021960548911010846, "clip_ratio/high_mean": 0.0008710607471584808, "clip_ratio/low_mean": 0.0004864364336754079, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013574971635534894, "epoch": 5.769903762029746, "grad_norm": 0.1541343480348587, "learning_rate": 1e-06, "loss": -0.0236, "step": 1125 }, { "clip_ratio/high_max": 0.002459991505020298, "clip_ratio/high_mean": 0.0010374924004281638, "clip_ratio/low_mean": 0.000506134047100204, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015436263965966646, "epoch": 5.7745698454359875, "grad_norm": 0.2002820521593094, "learning_rate": 1e-06, "loss": -0.0382, "step": 1126 }, { "clip_ratio/high_max": 0.0021695071482099593, "clip_ratio/high_mean": 0.0008159552180586616, "clip_ratio/low_mean": 0.0006894877496961271, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015054429532028735, "epoch": 5.779235928842228, "grad_norm": 0.1626857966184616, "learning_rate": 1e-06, "loss": 0.0064, "step": 1127 }, { "clip_ratio/high_max": 0.0027781318640336394, "clip_ratio/high_mean": 0.0009724751816975186, "clip_ratio/low_mean": 0.0006995455764808867, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0016720208077458665, "epoch": 5.783902012248469, "grad_norm": 0.1575443595647812, "learning_rate": 1e-06, "loss": -0.0353, "step": 1128 }, { "clip_ratio/high_max": 0.0024203429420595057, "clip_ratio/high_mean": 0.0009116146902670152, "clip_ratio/low_mean": 0.0007138412656786386, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0016254559122899082, "epoch": 5.78856809565471, "grad_norm": 0.1576351821422577, "learning_rate": 1e-06, "loss": -0.0088, "step": 1129 }, { "clip_ratio/high_max": 0.002319489431101829, "clip_ratio/high_mean": 0.001001957340122317, "clip_ratio/low_mean": 0.0005997584121359978, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0016017158013710286, "epoch": 5.793234179060951, "grad_norm": 0.2136499285697937, "learning_rate": 1e-06, "loss": -0.009, "step": 1130 }, { "clip_ratio/high_max": 0.002192228399508167, "clip_ratio/high_mean": 0.0008696250970388064, "clip_ratio/low_mean": 0.0006664772299700417, "clip_ratio/low_min": 1.3631406545755453e-05, "clip_ratio/region_mean": 0.001536102336103795, "epoch": 5.797900262467191, "grad_norm": 0.21299482882022858, "learning_rate": 1e-06, "loss": -0.0376, "step": 1131 }, { "clip_ratio/high_max": 0.0023163586229202338, "clip_ratio/high_mean": 0.000869337822223315, "clip_ratio/low_mean": 0.0008491279440931976, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001718465719022788, "epoch": 5.802566345873433, "grad_norm": 0.14687684178352356, "learning_rate": 1e-06, "loss": -0.0274, "step": 1132 }, { "clip_ratio/high_max": 0.002249891826068051, "clip_ratio/high_mean": 0.00098193742451258, "clip_ratio/low_mean": 0.0007998976843737182, "clip_ratio/low_min": 4.605039794114418e-05, "clip_ratio/region_mean": 0.0017818351443565916, "epoch": 5.807232429279673, "grad_norm": 0.15908323228359222, "learning_rate": 1e-06, "loss": -0.0248, "step": 1133 }, { "clip_ratio/high_max": 0.002293161494890228, "clip_ratio/high_mean": 0.0009758439846336842, "clip_ratio/low_mean": 0.0008544246284145629, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0018302686075912789, "epoch": 5.811898512685914, "grad_norm": 0.14060647785663605, "learning_rate": 1e-06, "loss": -0.0879, "step": 1134 }, { "clip_ratio/high_max": 0.0026730988538474776, "clip_ratio/high_mean": 0.0011894568560819607, "clip_ratio/low_mean": 0.0007574568626296241, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0019469136677798815, "epoch": 5.816564596092155, "grad_norm": 0.19878049194812775, "learning_rate": 1e-06, "loss": -0.074, "step": 1135 }, { "clip_ratio/high_max": 0.002440112686599605, "clip_ratio/high_mean": 0.0009724094215926016, "clip_ratio/low_mean": 0.0008294244266835449, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0018018338305409998, "epoch": 5.821230679498396, "grad_norm": 0.15591241419315338, "learning_rate": 1e-06, "loss": 0.0256, "step": 1136 }, { "clip_ratio/high_max": 0.005504639950231649, "clip_ratio/high_mean": 0.0023303269626921974, "clip_ratio/low_mean": 0.001662674556428101, "clip_ratio/low_min": 5.6715063692536205e-05, "clip_ratio/region_mean": 0.0039930015336722136, "epoch": 5.8258967629046365, "grad_norm": 0.10306641459465027, "learning_rate": 1e-06, "loss": -0.0401, "step": 1137 }, { "clip_ratio/high_max": 0.005226370238233358, "clip_ratio/high_mean": 0.0016609989979770035, "clip_ratio/low_mean": 0.0015529863662777643, "clip_ratio/low_min": 3.5734705306822434e-05, "clip_ratio/region_mean": 0.0032139852773980238, "epoch": 5.830562846310878, "grad_norm": 0.10660423338413239, "learning_rate": 1e-06, "loss": -0.0253, "step": 1138 }, { "clip_ratio/high_max": 0.004313852696213871, "clip_ratio/high_mean": 0.0017088600434362888, "clip_ratio/low_mean": 0.0017686615319689736, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0034775214953697287, "epoch": 5.835228929717118, "grad_norm": 0.11858677864074707, "learning_rate": 1e-06, "loss": -0.0253, "step": 1139 }, { "clip_ratio/high_max": 0.004938046971801668, "clip_ratio/high_mean": 0.001908992780954577, "clip_ratio/low_mean": 0.001893316577479709, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.003802309380262159, "epoch": 5.83989501312336, "grad_norm": 0.11748365312814713, "learning_rate": 1e-06, "loss": -0.0246, "step": 1140 }, { "clip_ratio/high_max": 0.0049477749853394926, "clip_ratio/high_mean": 0.0018091693309543189, "clip_ratio/low_mean": 0.0016056250087785884, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0034147944024880417, "epoch": 5.844561096529601, "grad_norm": 0.1080893725156784, "learning_rate": 1e-06, "loss": -0.0244, "step": 1141 }, { "clip_ratio/high_max": 0.004769791499711573, "clip_ratio/high_mean": 0.002059310289041605, "clip_ratio/low_mean": 0.001641244802158326, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.003700555083923973, "epoch": 5.849227179935841, "grad_norm": 0.11255943030118942, "learning_rate": 1e-06, "loss": -0.0391, "step": 1142 }, { "clip_ratio/high_max": 0.0047813797573326156, "clip_ratio/high_mean": 0.0017254416125069838, "clip_ratio/low_mean": 0.0018506154738133773, "clip_ratio/low_min": 7.246376480907202e-05, "clip_ratio/region_mean": 0.0035760571772698313, "epoch": 5.853893263342083, "grad_norm": 0.11120809614658356, "learning_rate": 1e-06, "loss": 0.0055, "step": 1143 }, { "clip_ratio/high_max": 0.004502664844039828, "clip_ratio/high_mean": 0.0017670512679615058, "clip_ratio/low_mean": 0.0017467857906012796, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.003513837087666616, "epoch": 5.858559346748323, "grad_norm": 0.11343419551849365, "learning_rate": 1e-06, "loss": -0.0363, "step": 1144 }, { "clip_ratio/high_max": 0.004410411180288065, "clip_ratio/high_mean": 0.0018015401183220092, "clip_ratio/low_mean": 0.0019400491728447378, "clip_ratio/low_min": 8.901093497115653e-05, "clip_ratio/region_mean": 0.0037415893893921748, "epoch": 5.863225430154564, "grad_norm": 0.11715330928564072, "learning_rate": 1e-06, "loss": -0.0097, "step": 1145 }, { "clip_ratio/high_max": 0.004683272010879591, "clip_ratio/high_mean": 0.001918780766573036, "clip_ratio/low_mean": 0.0016194566269405186, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.003538237448083237, "epoch": 5.867891513560805, "grad_norm": 0.10791654884815216, "learning_rate": 1e-06, "loss": -0.0099, "step": 1146 }, { "clip_ratio/high_max": 0.003957893750339281, "clip_ratio/high_mean": 0.0017250058299396187, "clip_ratio/low_mean": 0.001734820663841674, "clip_ratio/low_min": 4.003843787359074e-05, "clip_ratio/region_mean": 0.0034598266502143815, "epoch": 5.872557596967046, "grad_norm": 0.10622411966323853, "learning_rate": 1e-06, "loss": -0.0384, "step": 1147 }, { "clip_ratio/high_max": 0.004854585014982149, "clip_ratio/high_mean": 0.001847105897468282, "clip_ratio/low_mean": 0.0017949430330190808, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0036420489923330024, "epoch": 5.8772236803732865, "grad_norm": 0.10256685316562653, "learning_rate": 1e-06, "loss": -0.0283, "step": 1148 }, { "clip_ratio/high_max": 0.004042023836518638, "clip_ratio/high_mean": 0.0018867390535888262, "clip_ratio/low_mean": 0.0019108781634713523, "clip_ratio/low_min": 0.0001165127287094947, "clip_ratio/region_mean": 0.0037976172316120937, "epoch": 5.881889763779528, "grad_norm": 0.11155485361814499, "learning_rate": 1e-06, "loss": -0.0256, "step": 1149 }, { "clip_ratio/high_max": 0.00501972998608835, "clip_ratio/high_mean": 0.002004125024541281, "clip_ratio/low_mean": 0.0017382519363309257, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0037423769827000797, "epoch": 5.886555847185768, "grad_norm": 0.10853773355484009, "learning_rate": 1e-06, "loss": 0.109, "step": 1150 }, { "clip_ratio/high_max": 0.004434913855220657, "clip_ratio/high_mean": 0.0020659262700064573, "clip_ratio/low_mean": 0.0017229504010174423, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0037888767546974123, "epoch": 5.8912219305920095, "grad_norm": 0.11831418424844742, "learning_rate": 1e-06, "loss": -0.075, "step": 1151 }, { "clip_ratio/high_max": 0.004260365531081334, "clip_ratio/high_mean": 0.0017086459702113643, "clip_ratio/low_mean": 0.0019094327826678636, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.003618078728322871, "epoch": 5.89588801399825, "grad_norm": 0.1175091415643692, "learning_rate": 1e-06, "loss": 0.0247, "step": 1152 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0709402901785714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3824.0, "completions/mean_length": 796.6719360351562, "completions/mean_terminated_length": 544.7447509765625, "completions/min_length": 103.0, "completions/min_terminated_length": 103.0, "epoch": 6.004666083406241, "grad_norm": 0.18242177367210388, "learning_rate": 1e-06, "loss": -0.0283, "num_tokens": 337082867.0, "reward": 0.6605747938156128, "reward_std": 0.15867824852466583, "rewards/simpleverify_reward/mean": 0.6605747938156128, "rewards/simpleverify_reward/std": 0.47353073954582214, "step": 1153 }, { "clip_ratio/high_max": 0.002072514696919825, "clip_ratio/high_mean": 0.0008343372883246047, "clip_ratio/low_mean": 0.0004541730272649147, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012885103205917403, "epoch": 6.009332166812482, "grad_norm": 0.17426519095897675, "learning_rate": 1e-06, "loss": -0.0393, "step": 1154 }, { "clip_ratio/high_max": 0.0020436446357052773, "clip_ratio/high_mean": 0.0008077742604655214, "clip_ratio/low_mean": 0.0004613940982380882, "clip_ratio/low_min": 2.2381378585123457e-05, "clip_ratio/region_mean": 0.0012691683587036096, "epoch": 6.013998250218723, "grad_norm": 0.16550327837467194, "learning_rate": 1e-06, "loss": -0.0506, "step": 1155 }, { "clip_ratio/high_max": 0.0022926710626052227, "clip_ratio/high_mean": 0.0008705636064405553, "clip_ratio/low_mean": 0.0005356902420317056, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001406253835739335, "epoch": 6.0186643336249634, "grad_norm": 0.17232991755008698, "learning_rate": 1e-06, "loss": -0.0157, "step": 1156 }, { "clip_ratio/high_max": 0.0022572078814846464, "clip_ratio/high_mean": 0.0008629720759927295, "clip_ratio/low_mean": 0.0006348547349261935, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014978268227423541, "epoch": 6.023330417031205, "grad_norm": 0.18174593150615692, "learning_rate": 1e-06, "loss": -0.0085, "step": 1157 }, { "clip_ratio/high_max": 0.0021461659562191926, "clip_ratio/high_mean": 0.0009425906382602989, "clip_ratio/low_mean": 0.0006370857663569041, "clip_ratio/low_min": 1.1180679393874016e-05, "clip_ratio/region_mean": 0.0015796764273545705, "epoch": 6.027996500437445, "grad_norm": 0.15307855606079102, "learning_rate": 1e-06, "loss": -0.0491, "step": 1158 }, { "clip_ratio/high_max": 0.0023902650355012156, "clip_ratio/high_mean": 0.0009994628053391352, "clip_ratio/low_mean": 0.0005698486793335178, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015693114764872007, "epoch": 6.0326625838436865, "grad_norm": 0.33632391691207886, "learning_rate": 1e-06, "loss": -0.0511, "step": 1159 }, { "clip_ratio/high_max": 0.0023203793170978315, "clip_ratio/high_mean": 0.0008954471250035567, "clip_ratio/low_mean": 0.0005489798259077361, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014444269399973564, "epoch": 6.037328667249927, "grad_norm": 0.16559644043445587, "learning_rate": 1e-06, "loss": -0.0009, "step": 1160 }, { "clip_ratio/high_max": 0.002605904774100054, "clip_ratio/high_mean": 0.0010145978849323, "clip_ratio/low_mean": 0.0006279068811636535, "clip_ratio/low_min": 2.8908418244100176e-05, "clip_ratio/region_mean": 0.0016425047942902893, "epoch": 6.041994750656168, "grad_norm": 0.16781704127788544, "learning_rate": 1e-06, "loss": -0.0392, "step": 1161 }, { "clip_ratio/high_max": 0.002401622485194821, "clip_ratio/high_mean": 0.0010046773895737715, "clip_ratio/low_mean": 0.0006829894409747794, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0016876667868928052, "epoch": 6.046660834062409, "grad_norm": 0.16037827730178833, "learning_rate": 1e-06, "loss": -0.0432, "step": 1162 }, { "clip_ratio/high_max": 0.002678324828593759, "clip_ratio/high_mean": 0.001070161441930395, "clip_ratio/low_mean": 0.0005487863222697342, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0016189477719308343, "epoch": 6.05132691746865, "grad_norm": 0.33192017674446106, "learning_rate": 1e-06, "loss": -0.0528, "step": 1163 }, { "clip_ratio/high_max": 0.0022977693952270783, "clip_ratio/high_mean": 0.0010240583214908838, "clip_ratio/low_mean": 0.0007470167329302058, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0017710750544210896, "epoch": 6.05599300087489, "grad_norm": 0.16474823653697968, "learning_rate": 1e-06, "loss": -0.0411, "step": 1164 }, { "clip_ratio/high_max": 0.002620228857267648, "clip_ratio/high_mean": 0.0010338915508327773, "clip_ratio/low_mean": 0.0007665776374778943, "clip_ratio/low_min": 4.7866147724562325e-05, "clip_ratio/region_mean": 0.0018004691773967352, "epoch": 6.060659084281132, "grad_norm": 0.14066080749034882, "learning_rate": 1e-06, "loss": -0.0195, "step": 1165 }, { "clip_ratio/high_max": 0.0023299558597500436, "clip_ratio/high_mean": 0.0009172357749775983, "clip_ratio/low_mean": 0.0007585945604660083, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0016758303900132887, "epoch": 6.065325167687372, "grad_norm": 19.465309143066406, "learning_rate": 1e-06, "loss": -0.015, "step": 1166 }, { "clip_ratio/high_max": 0.002731207772740163, "clip_ratio/high_mean": 0.001106960484321462, "clip_ratio/low_mean": 0.0008519178991264198, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001958878361620009, "epoch": 6.069991251093613, "grad_norm": 0.1689734160900116, "learning_rate": 1e-06, "loss": -0.0215, "step": 1167 }, { "clip_ratio/high_max": 0.003216896264348179, "clip_ratio/high_mean": 0.001274391886909143, "clip_ratio/low_mean": 0.0009468493699387182, "clip_ratio/low_min": 2.4347487851628102e-05, "clip_ratio/region_mean": 0.002221241287770681, "epoch": 6.074657334499854, "grad_norm": 0.17878632247447968, "learning_rate": 1e-06, "loss": -0.0317, "step": 1168 }, { "clip_ratio/high_max": 0.006398129946319386, "clip_ratio/high_mean": 0.0023172747787612025, "clip_ratio/low_mean": 0.001806093407139997, "clip_ratio/low_min": 0.00010767232834041351, "clip_ratio/region_mean": 0.00412336815497838, "epoch": 6.079323417906095, "grad_norm": 0.10401533544063568, "learning_rate": 1e-06, "loss": -0.0291, "step": 1169 }, { "clip_ratio/high_max": 0.005229033122304827, "clip_ratio/high_mean": 0.0019465270415821578, "clip_ratio/low_mean": 0.001863125438831048, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.003809652422205545, "epoch": 6.083989501312336, "grad_norm": 0.11464226990938187, "learning_rate": 1e-06, "loss": -0.0403, "step": 1170 }, { "clip_ratio/high_max": 0.004947922789142467, "clip_ratio/high_mean": 0.0019519247216521762, "clip_ratio/low_mean": 0.001802089642296778, "clip_ratio/low_min": 6.003842281643301e-05, "clip_ratio/region_mean": 0.0037540144112426788, "epoch": 6.088655584718577, "grad_norm": 0.11119744181632996, "learning_rate": 1e-06, "loss": -0.0514, "step": 1171 }, { "clip_ratio/high_max": 0.004886896247626282, "clip_ratio/high_mean": 0.0020213665266055614, "clip_ratio/low_mean": 0.0019833549740724266, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.004004721544333734, "epoch": 6.093321668124818, "grad_norm": 0.11953351646661758, "learning_rate": 1e-06, "loss": -0.0167, "step": 1172 }, { "clip_ratio/high_max": 0.005028811312513426, "clip_ratio/high_mean": 0.0019269078438810539, "clip_ratio/low_mean": 0.0020151378375885542, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.003942045572330244, "epoch": 6.0979877515310585, "grad_norm": 0.11792458593845367, "learning_rate": 1e-06, "loss": -0.0095, "step": 1173 }, { "clip_ratio/high_max": 0.005057659742305987, "clip_ratio/high_mean": 0.0020642356648750138, "clip_ratio/low_mean": 0.0017972054811252747, "clip_ratio/low_min": 2.466131809342187e-05, "clip_ratio/region_mean": 0.0038614411605522037, "epoch": 6.1026538349373, "grad_norm": 0.11919765174388885, "learning_rate": 1e-06, "loss": -0.0499, "step": 1174 }, { "clip_ratio/high_max": 0.005015512273530476, "clip_ratio/high_mean": 0.0019917262616218068, "clip_ratio/low_mean": 0.0016201725011342205, "clip_ratio/low_min": 3.2929398003034294e-05, "clip_ratio/region_mean": 0.0036118987191002816, "epoch": 6.10731991834354, "grad_norm": 0.10362087935209274, "learning_rate": 1e-06, "loss": -0.0518, "step": 1175 }, { "clip_ratio/high_max": 0.005355516172130592, "clip_ratio/high_mean": 0.0019264529473730363, "clip_ratio/low_mean": 0.0016400171480199788, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0035664700117195025, "epoch": 6.111986001749782, "grad_norm": 0.10846006870269775, "learning_rate": 1e-06, "loss": -0.0019, "step": 1176 }, { "clip_ratio/high_max": 0.005369786260416731, "clip_ratio/high_mean": 0.001958933575224364, "clip_ratio/low_mean": 0.0017523655042168684, "clip_ratio/low_min": 5.529749978450127e-05, "clip_ratio/region_mean": 0.0037112990394234657, "epoch": 6.116652085156022, "grad_norm": 0.20648466050624847, "learning_rate": 1e-06, "loss": -0.04, "step": 1177 }, { "clip_ratio/high_max": 0.0046888306678738445, "clip_ratio/high_mean": 0.0020841070472670253, "clip_ratio/low_mean": 0.00181154056917876, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.003895647605531849, "epoch": 6.121318168562263, "grad_norm": 0.11252682656049728, "learning_rate": 1e-06, "loss": -0.044, "step": 1178 }, { "clip_ratio/high_max": 0.00471595144335879, "clip_ratio/high_mean": 0.0020018911563965958, "clip_ratio/low_mean": 0.0016060811203715275, "clip_ratio/low_min": 3.790942355408333e-05, "clip_ratio/region_mean": 0.0036079723286093213, "epoch": 6.125984251968504, "grad_norm": 0.17672204971313477, "learning_rate": 1e-06, "loss": -0.0536, "step": 1179 }, { "clip_ratio/high_max": 0.0044734130933647975, "clip_ratio/high_mean": 0.0020178506383672357, "clip_ratio/low_mean": 0.0019025384244741872, "clip_ratio/low_min": 2.3165308448369615e-05, "clip_ratio/region_mean": 0.003920389121049084, "epoch": 6.130650335374745, "grad_norm": 0.329829603433609, "learning_rate": 1e-06, "loss": -0.042, "step": 1180 }, { "clip_ratio/high_max": 0.004576621045998763, "clip_ratio/high_mean": 0.0018703969071793836, "clip_ratio/low_mean": 0.0017712166081764735, "clip_ratio/low_min": 7.933452070574276e-05, "clip_ratio/region_mean": 0.0036416134535102174, "epoch": 6.135316418780985, "grad_norm": 0.10979607701301575, "learning_rate": 1e-06, "loss": -0.0203, "step": 1181 }, { "clip_ratio/high_max": 0.002820237939886283, "clip_ratio/high_mean": 0.001266337185370503, "clip_ratio/low_mean": 0.001247524796781363, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0025138619530480355, "epoch": 6.139982502187227, "grad_norm": 0.16531234979629517, "learning_rate": 1e-06, "loss": -0.0028, "step": 1182 }, { "clip_ratio/high_max": 0.00467645660683047, "clip_ratio/high_mean": 0.0019299036539450753, "clip_ratio/low_mean": 0.0019747214591916418, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.003904625147697516, "epoch": 6.144648585593467, "grad_norm": 0.11468078941106796, "learning_rate": 1e-06, "loss": -0.0224, "step": 1183 }, { "clip_ratio/high_max": 0.00507454094622517, "clip_ratio/high_mean": 0.002216409702668898, "clip_ratio/low_mean": 0.0019296281898277812, "clip_ratio/low_min": 0.00010956369078485295, "clip_ratio/region_mean": 0.004146037841564976, "epoch": 6.1493146689997085, "grad_norm": 0.12081310153007507, "learning_rate": 1e-06, "loss": -0.0325, "step": 1184 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0782645089285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 4091.0, "completions/mean_length": 818.815673828125, "completions/mean_terminated_length": 540.5501098632812, "completions/min_length": 5.0, "completions/min_terminated_length": 5.0, "epoch": 6.153980752405949, "grad_norm": 0.18249818682670593, "learning_rate": 1e-06, "loss": -0.051, "num_tokens": 345663720.0, "reward": 0.6590402126312256, "reward_std": 0.1498425006866455, "rewards/simpleverify_reward/mean": 0.6590401530265808, "rewards/simpleverify_reward/std": 0.4740484058856964, "step": 1185 }, { "clip_ratio/high_max": 0.0019249927281634882, "clip_ratio/high_mean": 0.0007629003885085694, "clip_ratio/low_mean": 0.0005314342215569923, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012943346118845511, "epoch": 6.15864683581219, "grad_norm": 0.14904159307479858, "learning_rate": 1e-06, "loss": -0.0101, "step": 1186 }, { "clip_ratio/high_max": 0.002117677671776619, "clip_ratio/high_mean": 0.0007956616900628433, "clip_ratio/low_mean": 0.0005051299444858159, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013007916350034066, "epoch": 6.163312919218431, "grad_norm": 0.18026763200759888, "learning_rate": 1e-06, "loss": -0.0017, "step": 1187 }, { "clip_ratio/high_max": 0.001876542019090266, "clip_ratio/high_mean": 0.0006761473482583824, "clip_ratio/low_mean": 0.00043008777993236436, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011062351295549888, "epoch": 6.167979002624672, "grad_norm": 0.15997542440891266, "learning_rate": 1e-06, "loss": -0.0118, "step": 1188 }, { "clip_ratio/high_max": 0.002008318668231368, "clip_ratio/high_mean": 0.0009143218667304609, "clip_ratio/low_mean": 0.00048609018358547473, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014004120457684621, "epoch": 6.172645086030913, "grad_norm": 0.17452406883239746, "learning_rate": 1e-06, "loss": -0.0824, "step": 1189 }, { "clip_ratio/high_max": 0.001978471605980303, "clip_ratio/high_mean": 0.0008168477779690875, "clip_ratio/low_mean": 0.0006283910734055098, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.00144523890412529, "epoch": 6.177311169437154, "grad_norm": 0.2948774993419647, "learning_rate": 1e-06, "loss": -0.0065, "step": 1190 }, { "clip_ratio/high_max": 0.0021932979361736216, "clip_ratio/high_mean": 0.0009638114697736455, "clip_ratio/low_mean": 0.0006231919232959626, "clip_ratio/low_min": 1.5723269825684838e-05, "clip_ratio/region_mean": 0.0015870034258114174, "epoch": 6.181977252843395, "grad_norm": 0.16570262610912323, "learning_rate": 1e-06, "loss": -0.035, "step": 1191 }, { "clip_ratio/high_max": 0.002613428106997162, "clip_ratio/high_mean": 0.000963521282756119, "clip_ratio/low_mean": 0.0005710237746825442, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001534545106551377, "epoch": 6.186643336249635, "grad_norm": 0.15736079216003418, "learning_rate": 1e-06, "loss": -0.0483, "step": 1192 }, { "clip_ratio/high_max": 0.0024491959520673845, "clip_ratio/high_mean": 0.0009756617546372581, "clip_ratio/low_mean": 0.0007420453548547812, "clip_ratio/low_min": 5.886508006369695e-05, "clip_ratio/region_mean": 0.0017177070694742724, "epoch": 6.191309419655877, "grad_norm": 0.34508633613586426, "learning_rate": 1e-06, "loss": -0.0271, "step": 1193 }, { "clip_ratio/high_max": 0.0022464419234893285, "clip_ratio/high_mean": 0.0009135547807090916, "clip_ratio/low_mean": 0.0005562316064242623, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001469786380766891, "epoch": 6.195975503062117, "grad_norm": 0.1551639586687088, "learning_rate": 1e-06, "loss": -0.0163, "step": 1194 }, { "clip_ratio/high_max": 0.002401197954895906, "clip_ratio/high_mean": 0.0009552566370985005, "clip_ratio/low_mean": 0.0008104696398731903, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0017657262942520902, "epoch": 6.200641586468358, "grad_norm": 0.16015034914016724, "learning_rate": 1e-06, "loss": -0.0157, "step": 1195 }, { "clip_ratio/high_max": 0.0027758564829127863, "clip_ratio/high_mean": 0.0010753981696325354, "clip_ratio/low_mean": 0.0007095154569469742, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0017849136056611314, "epoch": 6.205307669874599, "grad_norm": 0.13522781431674957, "learning_rate": 1e-06, "loss": -0.0569, "step": 1196 }, { "clip_ratio/high_max": 0.002522499817132484, "clip_ratio/high_mean": 0.0010829261827893788, "clip_ratio/low_mean": 0.0006819726731919218, "clip_ratio/low_min": 1.3718174159293994e-05, "clip_ratio/region_mean": 0.0017648988723522052, "epoch": 6.20997375328084, "grad_norm": 0.21462500095367432, "learning_rate": 1e-06, "loss": -0.05, "step": 1197 }, { "clip_ratio/high_max": 0.0022522393410326913, "clip_ratio/high_mean": 0.0010421756705909502, "clip_ratio/low_mean": 0.000800628297838557, "clip_ratio/low_min": 1.6212710761465132e-05, "clip_ratio/region_mean": 0.0018428039984428324, "epoch": 6.2146398366870805, "grad_norm": 0.14750517904758453, "learning_rate": 1e-06, "loss": -0.0119, "step": 1198 }, { "clip_ratio/high_max": 0.0026642066295607947, "clip_ratio/high_mean": 0.001035273053275887, "clip_ratio/low_mean": 0.0007639464683961705, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0017992195062106475, "epoch": 6.219305920093322, "grad_norm": 0.16001133620738983, "learning_rate": 1e-06, "loss": -0.0313, "step": 1199 }, { "clip_ratio/high_max": 0.002767722893622704, "clip_ratio/high_mean": 0.0010996754263032926, "clip_ratio/low_mean": 0.0007883840153226629, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0018880594252550509, "epoch": 6.223972003499562, "grad_norm": 0.16357660293579102, "learning_rate": 1e-06, "loss": -0.0246, "step": 1200 }, { "clip_ratio/high_max": 0.006018579952069558, "clip_ratio/high_mean": 0.0023861274639784824, "clip_ratio/low_mean": 0.001599101200554287, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.003985228526289575, "epoch": 6.228638086905804, "grad_norm": 0.10670291632413864, "learning_rate": 1e-06, "loss": -0.052, "step": 1201 }, { "clip_ratio/high_max": 0.004198655595246237, "clip_ratio/high_mean": 0.001708283551124623, "clip_ratio/low_mean": 0.0017101457924582064, "clip_ratio/low_min": 7.571168680442497e-05, "clip_ratio/region_mean": 0.003418429318116978, "epoch": 6.233304170312044, "grad_norm": 0.11198136955499649, "learning_rate": 1e-06, "loss": -0.011, "step": 1202 }, { "clip_ratio/high_max": 0.005343893571989611, "clip_ratio/high_mean": 0.0018912964369519614, "clip_ratio/low_mean": 0.0018836276194633683, "clip_ratio/low_min": 8.408155190409161e-05, "clip_ratio/region_mean": 0.0037749240873381495, "epoch": 6.237970253718285, "grad_norm": 0.12194002419710159, "learning_rate": 1e-06, "loss": -0.0028, "step": 1203 }, { "clip_ratio/high_max": 0.004177896309556672, "clip_ratio/high_mean": 0.0015575670386169804, "clip_ratio/low_mean": 0.001511940914497245, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0030695079622091725, "epoch": 6.242636337124526, "grad_norm": 0.11400716006755829, "learning_rate": 1e-06, "loss": -0.0126, "step": 1204 }, { "clip_ratio/high_max": 0.004740503485663794, "clip_ratio/high_mean": 0.002137157742254203, "clip_ratio/low_mean": 0.0014760709636902902, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0036132286622887477, "epoch": 6.247302420530767, "grad_norm": 0.1198422759771347, "learning_rate": 1e-06, "loss": -0.0833, "step": 1205 }, { "clip_ratio/high_max": 0.0046564669464714825, "clip_ratio/high_mean": 0.0018952506725327112, "clip_ratio/low_mean": 0.0019569300529838074, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.003852180772810243, "epoch": 6.251968503937007, "grad_norm": 0.11283595114946365, "learning_rate": 1e-06, "loss": -0.0075, "step": 1206 }, { "clip_ratio/high_max": 0.0046794385416433215, "clip_ratio/high_mean": 0.002063173582428135, "clip_ratio/low_mean": 0.0018245583032694412, "clip_ratio/low_min": 0.00010994718832080252, "clip_ratio/region_mean": 0.003887731916620396, "epoch": 6.256634587343249, "grad_norm": 0.11396816372871399, "learning_rate": 1e-06, "loss": -0.0359, "step": 1207 }, { "clip_ratio/high_max": 0.005474982172017917, "clip_ratio/high_mean": 0.0019591440504882485, "clip_ratio/low_mean": 0.001593008233612636, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0035521522513590753, "epoch": 6.26130067074949, "grad_norm": 0.10996533930301666, "learning_rate": 1e-06, "loss": -0.0491, "step": 1208 }, { "clip_ratio/high_max": 0.004971664020558819, "clip_ratio/high_mean": 0.0018457627957104705, "clip_ratio/low_mean": 0.0020515508913376834, "clip_ratio/low_min": 0.00012034660176141188, "clip_ratio/region_mean": 0.003897313741617836, "epoch": 6.2659667541557305, "grad_norm": 0.10976897925138474, "learning_rate": 1e-06, "loss": -0.0281, "step": 1209 }, { "clip_ratio/high_max": 0.003971547328546876, "clip_ratio/high_mean": 0.001673905384450336, "clip_ratio/low_mean": 0.0016067895248852437, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.003280694865679834, "epoch": 6.270632837561972, "grad_norm": 0.09842522442340851, "learning_rate": 1e-06, "loss": -0.0171, "step": 1210 }, { "clip_ratio/high_max": 0.0043607683328446, "clip_ratio/high_mean": 0.0018856956739909947, "clip_ratio/low_mean": 0.0018964723094541114, "clip_ratio/low_min": 6.259389192564413e-05, "clip_ratio/region_mean": 0.003782167928875424, "epoch": 6.275298920968212, "grad_norm": 0.11316193640232086, "learning_rate": 1e-06, "loss": -0.0165, "step": 1211 }, { "clip_ratio/high_max": 0.0042263482173439115, "clip_ratio/high_mean": 0.0018156182231905404, "clip_ratio/low_mean": 0.0015529227275692392, "clip_ratio/low_min": 1.8729397197603248e-05, "clip_ratio/region_mean": 0.003368540928931907, "epoch": 6.2799650043744535, "grad_norm": 0.09869983792304993, "learning_rate": 1e-06, "loss": -0.0576, "step": 1212 }, { "clip_ratio/high_max": 0.004855348859564401, "clip_ratio/high_mean": 0.002084437852317933, "clip_ratio/low_mean": 0.001737388603942236, "clip_ratio/low_min": 1.3718174159293994e-05, "clip_ratio/region_mean": 0.003821826510829851, "epoch": 6.284631087780694, "grad_norm": 0.11986733227968216, "learning_rate": 1e-06, "loss": -0.0511, "step": 1213 }, { "clip_ratio/high_max": 0.004412332120409701, "clip_ratio/high_mean": 0.0018371955775364768, "clip_ratio/low_mean": 0.0018056739954772638, "clip_ratio/low_min": 0.00014779370030737482, "clip_ratio/region_mean": 0.0036428695602808148, "epoch": 6.289297171186935, "grad_norm": 0.10239291936159134, "learning_rate": 1e-06, "loss": -0.0127, "step": 1214 }, { "clip_ratio/high_max": 0.004548331198748201, "clip_ratio/high_mean": 0.0017318788741249591, "clip_ratio/low_mean": 0.0016407292569056153, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.003372608101926744, "epoch": 6.293963254593176, "grad_norm": 0.10203015804290771, "learning_rate": 1e-06, "loss": -0.0321, "step": 1215 }, { "clip_ratio/high_max": 0.005308132007485256, "clip_ratio/high_mean": 0.001979669748834567, "clip_ratio/low_mean": 0.001633669660805026, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0036133393805357628, "epoch": 6.298629337999417, "grad_norm": 0.10270557552576065, "learning_rate": 1e-06, "loss": -0.0255, "step": 1216 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0863560267857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 4007.0, "completions/mean_length": 852.9284057617188, "completions/mean_terminated_length": 546.39892578125, "completions/min_length": 102.0, "completions/min_terminated_length": 102.0, "epoch": 6.303295421405657, "grad_norm": 0.1704927235841751, "learning_rate": 1e-06, "loss": -0.0395, "num_tokens": 354247965.0, "reward": 0.6564592719078064, "reward_std": 0.16587857902050018, "rewards/simpleverify_reward/mean": 0.6564592719078064, "rewards/simpleverify_reward/std": 0.4749065339565277, "step": 1217 }, { "clip_ratio/high_max": 0.002146811945749505, "clip_ratio/high_mean": 0.0008467401848974987, "clip_ratio/low_mean": 0.0005049297005825792, "clip_ratio/low_min": 1.367315690004034e-05, "clip_ratio/region_mean": 0.0013516699109459296, "epoch": 6.307961504811899, "grad_norm": 0.15601028501987457, "learning_rate": 1e-06, "loss": -0.0368, "step": 1218 }, { "clip_ratio/high_max": 0.001707838546280982, "clip_ratio/high_mean": 0.0007874560560594546, "clip_ratio/low_mean": 0.0005103927223899518, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001297848779358901, "epoch": 6.312627588218139, "grad_norm": 0.1513843685388565, "learning_rate": 1e-06, "loss": -0.0558, "step": 1219 }, { "clip_ratio/high_max": 0.002039637894995394, "clip_ratio/high_mean": 0.0008051618733588839, "clip_ratio/low_mean": 0.0006329370862658834, "clip_ratio/low_min": 1.1438506589911412e-05, "clip_ratio/region_mean": 0.0014380989778146613, "epoch": 6.31729367162438, "grad_norm": 0.18281784653663635, "learning_rate": 1e-06, "loss": -0.0055, "step": 1220 }, { "clip_ratio/high_max": 0.002099801327858586, "clip_ratio/high_mean": 0.0007978338417160558, "clip_ratio/low_mean": 0.0006451449980886537, "clip_ratio/low_min": 3.295685201010201e-05, "clip_ratio/region_mean": 0.001442978831619257, "epoch": 6.321959755030621, "grad_norm": 0.19137556850910187, "learning_rate": 1e-06, "loss": 0.0367, "step": 1221 }, { "clip_ratio/high_max": 0.0022526787251990754, "clip_ratio/high_mean": 0.0009003073064377531, "clip_ratio/low_mean": 0.0004778951874868653, "clip_ratio/low_min": 1.9672646885737777e-05, "clip_ratio/region_mean": 0.0013782024871034082, "epoch": 6.326625838436862, "grad_norm": 0.1912624090909958, "learning_rate": 1e-06, "loss": -0.0486, "step": 1222 }, { "clip_ratio/high_max": 0.0025964861852116883, "clip_ratio/high_mean": 0.0010127169734914787, "clip_ratio/low_mean": 0.0006189005343912868, "clip_ratio/low_min": 2.3710166715318337e-05, "clip_ratio/region_mean": 0.0016316174805979244, "epoch": 6.331291921843103, "grad_norm": 0.17751407623291016, "learning_rate": 1e-06, "loss": -0.0433, "step": 1223 }, { "clip_ratio/high_max": 0.0022765305984648876, "clip_ratio/high_mean": 0.0008940868356148712, "clip_ratio/low_mean": 0.0007412671930069337, "clip_ratio/low_min": 2.9157919925637543e-05, "clip_ratio/region_mean": 0.0016353540486306883, "epoch": 6.335958005249344, "grad_norm": 0.17658056318759918, "learning_rate": 1e-06, "loss": -0.0183, "step": 1224 }, { "clip_ratio/high_max": 0.002015412970649777, "clip_ratio/high_mean": 0.0008641426793474238, "clip_ratio/low_mean": 0.0006890414470035466, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015531841272604652, "epoch": 6.340624088655585, "grad_norm": 0.17431703209877014, "learning_rate": 1e-06, "loss": -0.0496, "step": 1225 }, { "clip_ratio/high_max": 0.0024399706453550607, "clip_ratio/high_mean": 0.0009905127262754831, "clip_ratio/low_mean": 0.0006987517472225591, "clip_ratio/low_min": 6.957571713428479e-05, "clip_ratio/region_mean": 0.0016892644816834945, "epoch": 6.3452901720618256, "grad_norm": 0.14510475099086761, "learning_rate": 1e-06, "loss": -0.0308, "step": 1226 }, { "clip_ratio/high_max": 0.002814829713315703, "clip_ratio/high_mean": 0.0012531741922430228, "clip_ratio/low_mean": 0.0008226851459767204, "clip_ratio/low_min": 1.344086012977641e-05, "clip_ratio/region_mean": 0.002075859301839955, "epoch": 6.349956255468067, "grad_norm": 0.17179343104362488, "learning_rate": 1e-06, "loss": -0.0631, "step": 1227 }, { "clip_ratio/high_max": 0.002603218672447838, "clip_ratio/high_mean": 0.0010548194804869127, "clip_ratio/low_mean": 0.0009420140968359192, "clip_ratio/low_min": 3.553155329427682e-05, "clip_ratio/region_mean": 0.001996833576413337, "epoch": 6.354622338874307, "grad_norm": 0.14865462481975555, "learning_rate": 1e-06, "loss": -0.0198, "step": 1228 }, { "clip_ratio/high_max": 0.0028972109430469573, "clip_ratio/high_mean": 0.0013142562929715496, "clip_ratio/low_mean": 0.000696472498930234, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002010728800087236, "epoch": 6.359288422280549, "grad_norm": 0.1715545803308487, "learning_rate": 1e-06, "loss": -0.0856, "step": 1229 }, { "clip_ratio/high_max": 0.0021622092863253783, "clip_ratio/high_mean": 0.0009393882737640524, "clip_ratio/low_mean": 0.0009024466435221257, "clip_ratio/low_min": 0.00012624126884475118, "clip_ratio/region_mean": 0.0018418349136481993, "epoch": 6.363954505686789, "grad_norm": 0.14434809982776642, "learning_rate": 1e-06, "loss": -0.0654, "step": 1230 }, { "clip_ratio/high_max": 0.002288113973918371, "clip_ratio/high_mean": 0.0010011587855842663, "clip_ratio/low_mean": 0.0009305912826675922, "clip_ratio/low_min": 3.062224277528003e-05, "clip_ratio/region_mean": 0.0019317500773468055, "epoch": 6.36862058909303, "grad_norm": 0.18370015919208527, "learning_rate": 1e-06, "loss": -0.0274, "step": 1231 }, { "clip_ratio/high_max": 0.0027717963748727925, "clip_ratio/high_mean": 0.001122956851759227, "clip_ratio/low_mean": 0.0008680083974468289, "clip_ratio/low_min": 3.7004145269747823e-05, "clip_ratio/region_mean": 0.0019909652255591936, "epoch": 6.373286672499271, "grad_norm": 0.16649724543094635, "learning_rate": 1e-06, "loss": -0.0261, "step": 1232 }, { "clip_ratio/high_max": 0.004594765974616166, "clip_ratio/high_mean": 0.0019482305069686845, "clip_ratio/low_mean": 0.0017197363849845715, "clip_ratio/low_min": 7.365940109593794e-05, "clip_ratio/region_mean": 0.003667966928333044, "epoch": 6.377952755905512, "grad_norm": 0.09771525859832764, "learning_rate": 1e-06, "loss": -0.0403, "step": 1233 }, { "clip_ratio/high_max": 0.004898667881207075, "clip_ratio/high_mean": 0.0018849668031180045, "clip_ratio/low_mean": 0.0018895881294156425, "clip_ratio/low_min": 0.0001461853680666536, "clip_ratio/region_mean": 0.003774554963456467, "epoch": 6.3826188393117524, "grad_norm": 0.11019729822874069, "learning_rate": 1e-06, "loss": -0.0376, "step": 1234 }, { "clip_ratio/high_max": 0.0046329854521900415, "clip_ratio/high_mean": 0.001956934458576143, "clip_ratio/low_mean": 0.001620308859855868, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.003577243347535841, "epoch": 6.387284922717994, "grad_norm": 0.11509384214878082, "learning_rate": 1e-06, "loss": -0.0565, "step": 1235 }, { "clip_ratio/high_max": 0.005114857107400894, "clip_ratio/high_mean": 0.0019909675138478633, "clip_ratio/low_mean": 0.002072450406558346, "clip_ratio/low_min": 0.00013840092287864536, "clip_ratio/region_mean": 0.004063417931320146, "epoch": 6.391951006124234, "grad_norm": 0.11662361770868301, "learning_rate": 1e-06, "loss": -0.0065, "step": 1236 }, { "clip_ratio/high_max": 0.004130422894377261, "clip_ratio/high_mean": 0.0016557166736674844, "clip_ratio/low_mean": 0.0022906041849637404, "clip_ratio/low_min": 0.00019724996309378184, "clip_ratio/region_mean": 0.003946320852264762, "epoch": 6.3966170895304755, "grad_norm": 0.11197635531425476, "learning_rate": 1e-06, "loss": 0.0357, "step": 1237 }, { "clip_ratio/high_max": 0.005132495876750909, "clip_ratio/high_mean": 0.0020246076237526722, "clip_ratio/low_mean": 0.001689822347543668, "clip_ratio/low_min": 0.00017310166731476784, "clip_ratio/region_mean": 0.003714430014952086, "epoch": 6.401283172936716, "grad_norm": 0.11977191269397736, "learning_rate": 1e-06, "loss": -0.0497, "step": 1238 }, { "clip_ratio/high_max": 0.005149161588633433, "clip_ratio/high_mean": 0.0020841576406382956, "clip_ratio/low_mean": 0.0017784126466722228, "clip_ratio/low_min": 1.5078408068802673e-05, "clip_ratio/region_mean": 0.003862570272758603, "epoch": 6.405949256342957, "grad_norm": 0.12726153433322906, "learning_rate": 1e-06, "loss": -0.0443, "step": 1239 }, { "clip_ratio/high_max": 0.005396588516305201, "clip_ratio/high_mean": 0.00198792675655568, "clip_ratio/low_mean": 0.0019857112056342885, "clip_ratio/low_min": 0.00023187206261354731, "clip_ratio/region_mean": 0.003973637925810181, "epoch": 6.410615339749198, "grad_norm": 0.11807332187891006, "learning_rate": 1e-06, "loss": -0.0193, "step": 1240 }, { "clip_ratio/high_max": 0.005263083818135783, "clip_ratio/high_mean": 0.001937594061018899, "clip_ratio/low_mean": 0.001664674262428889, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.003602268305257894, "epoch": 6.415281423155439, "grad_norm": 0.10560297220945358, "learning_rate": 1e-06, "loss": -0.0504, "step": 1241 }, { "clip_ratio/high_max": 0.005251878552371636, "clip_ratio/high_mean": 0.0019670318288262933, "clip_ratio/low_mean": 0.00175074038997991, "clip_ratio/low_min": 0.0002072963397949934, "clip_ratio/region_mean": 0.0037177721678745, "epoch": 6.41994750656168, "grad_norm": 0.10284769535064697, "learning_rate": 1e-06, "loss": -0.0315, "step": 1242 }, { "clip_ratio/high_max": 0.005487411763169803, "clip_ratio/high_mean": 0.002174908047891222, "clip_ratio/low_mean": 0.0018319984519621357, "clip_ratio/low_min": 9.408601908944547e-05, "clip_ratio/region_mean": 0.004006906572612934, "epoch": 6.424613589967921, "grad_norm": 0.12225215137004852, "learning_rate": 1e-06, "loss": -0.0641, "step": 1243 }, { "clip_ratio/high_max": 0.00431483234569896, "clip_ratio/high_mean": 0.0018213755975011736, "clip_ratio/low_mean": 0.0020509240312094335, "clip_ratio/low_min": 0.00010236504022032022, "clip_ratio/region_mean": 0.0038722997705917805, "epoch": 6.429279673374162, "grad_norm": 0.1257714480161667, "learning_rate": 1e-06, "loss": -0.0206, "step": 1244 }, { "clip_ratio/high_max": 0.006187643710291013, "clip_ratio/high_mean": 0.002480797928001266, "clip_ratio/low_mean": 0.0016642445298202801, "clip_ratio/low_min": 7.527852721977979e-05, "clip_ratio/region_mean": 0.004145042417803779, "epoch": 6.433945756780402, "grad_norm": 0.13330836594104767, "learning_rate": 1e-06, "loss": -0.0866, "step": 1245 }, { "clip_ratio/high_max": 0.004366725261206739, "clip_ratio/high_mean": 0.0018188512294727843, "clip_ratio/low_mean": 0.0018376738680672133, "clip_ratio/low_min": 0.0002837841821019538, "clip_ratio/region_mean": 0.0036565251066349447, "epoch": 6.438611840186644, "grad_norm": 0.10906616598367691, "learning_rate": 1e-06, "loss": -0.0661, "step": 1246 }, { "clip_ratio/high_max": 0.0054893064516363665, "clip_ratio/high_mean": 0.002120609839039389, "clip_ratio/low_mean": 0.0023535639520559926, "clip_ratio/low_min": 9.18667356017977e-05, "clip_ratio/region_mean": 0.00447417379473336, "epoch": 6.443277923592884, "grad_norm": 0.1393863707780838, "learning_rate": 1e-06, "loss": -0.0286, "step": 1247 }, { "clip_ratio/high_max": 0.004559741108096205, "clip_ratio/high_mean": 0.0020982206879125442, "clip_ratio/low_mean": 0.0020661255875893403, "clip_ratio/low_min": 7.400829053949565e-05, "clip_ratio/region_mean": 0.0041643462900538, "epoch": 6.447944006999125, "grad_norm": 0.11689954251050949, "learning_rate": 1e-06, "loss": -0.0271, "step": 1248 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0832868303571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 4090.0, "completions/mean_length": 832.9703369140625, "completions/mean_terminated_length": 536.5117797851562, "completions/min_length": 58.0, "completions/min_terminated_length": 58.0, "epoch": 6.452610090405366, "grad_norm": 0.18437311053276062, "learning_rate": 1e-06, "loss": -0.0279, "num_tokens": 362704363.0, "reward": 0.6664341688156128, "reward_std": 0.1563601940870285, "rewards/simpleverify_reward/mean": 0.6664341688156128, "rewards/simpleverify_reward/std": 0.4715031087398529, "step": 1249 }, { "clip_ratio/high_max": 0.0020737703671329655, "clip_ratio/high_mean": 0.0007545195212514955, "clip_ratio/low_mean": 0.0005650765860991669, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013195961037126835, "epoch": 6.457276173811607, "grad_norm": 0.18921905755996704, "learning_rate": 1e-06, "loss": -0.0131, "step": 1250 }, { "clip_ratio/high_max": 0.002053243668342475, "clip_ratio/high_mean": 0.0007782194943501963, "clip_ratio/low_mean": 0.0004931878283969127, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012714073091046885, "epoch": 6.4619422572178475, "grad_norm": 0.15873084962368011, "learning_rate": 1e-06, "loss": -0.0096, "step": 1251 }, { "clip_ratio/high_max": 0.0022888294042786583, "clip_ratio/high_mean": 0.0008206631173379719, "clip_ratio/low_mean": 0.00042945601444444037, "clip_ratio/low_min": 2.7268761186860502e-05, "clip_ratio/region_mean": 0.0012501191413321067, "epoch": 6.466608340624089, "grad_norm": 0.26511502265930176, "learning_rate": 1e-06, "loss": -0.0045, "step": 1252 }, { "clip_ratio/high_max": 0.002409052576695103, "clip_ratio/high_mean": 0.0008763158675719751, "clip_ratio/low_mean": 0.0005598667421509163, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001436182632460259, "epoch": 6.471274424030329, "grad_norm": 0.1777404248714447, "learning_rate": 1e-06, "loss": -0.0395, "step": 1253 }, { "clip_ratio/high_max": 0.002106476815242786, "clip_ratio/high_mean": 0.000838238598589669, "clip_ratio/low_mean": 0.0005465806000302109, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001384819217491895, "epoch": 6.475940507436571, "grad_norm": 0.20361950993537903, "learning_rate": 1e-06, "loss": -0.067, "step": 1254 }, { "clip_ratio/high_max": 0.002371962858887855, "clip_ratio/high_mean": 0.0010376533973612823, "clip_ratio/low_mean": 0.0005475880329868232, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015852413853281178, "epoch": 6.480606590842811, "grad_norm": 0.1775011420249939, "learning_rate": 1e-06, "loss": -0.0581, "step": 1255 }, { "clip_ratio/high_max": 0.002263595502881799, "clip_ratio/high_mean": 0.0010149803056265227, "clip_ratio/low_mean": 0.0006556017633556621, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0016705820708011743, "epoch": 6.485272674249052, "grad_norm": 0.16694331169128418, "learning_rate": 1e-06, "loss": -0.0429, "step": 1256 }, { "clip_ratio/high_max": 0.0021845341616426595, "clip_ratio/high_mean": 0.0009564846168359509, "clip_ratio/low_mean": 0.0007550688642368186, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.00171155347925378, "epoch": 6.489938757655293, "grad_norm": 0.18367934226989746, "learning_rate": 1e-06, "loss": 0.0047, "step": 1257 }, { "clip_ratio/high_max": 0.0020965269177395385, "clip_ratio/high_mean": 0.0008520956016582204, "clip_ratio/low_mean": 0.0006599311291211052, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015120267344173044, "epoch": 6.494604841061534, "grad_norm": 0.1605372279882431, "learning_rate": 1e-06, "loss": -0.0199, "step": 1258 }, { "clip_ratio/high_max": 0.002465185083565302, "clip_ratio/high_mean": 0.0009113260202866513, "clip_ratio/low_mean": 0.0007340760166698601, "clip_ratio/low_min": 5.369415885070339e-05, "clip_ratio/region_mean": 0.0016454020369565114, "epoch": 6.499270924467774, "grad_norm": 0.18609274923801422, "learning_rate": 1e-06, "loss": -0.029, "step": 1259 }, { "clip_ratio/high_max": 0.002885288467950886, "clip_ratio/high_mean": 0.001226793581736274, "clip_ratio/low_mean": 0.0005141270394233288, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001740920626616571, "epoch": 6.503937007874016, "grad_norm": 0.17888261377811432, "learning_rate": 1e-06, "loss": -0.0992, "step": 1260 }, { "clip_ratio/high_max": 0.0023663549291086383, "clip_ratio/high_mean": 0.0010804844005178893, "clip_ratio/low_mean": 0.0007774941750540165, "clip_ratio/low_min": 1.991714452742599e-05, "clip_ratio/region_mean": 0.001857978517364245, "epoch": 6.508603091280257, "grad_norm": 0.24670886993408203, "learning_rate": 1e-06, "loss": 0.0081, "step": 1261 }, { "clip_ratio/high_max": 0.0024268515917356126, "clip_ratio/high_mean": 0.0009823406107898336, "clip_ratio/low_mean": 0.0008366631766421051, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0018190037553722505, "epoch": 6.5132691746864975, "grad_norm": 0.15875528752803802, "learning_rate": 1e-06, "loss": -0.0198, "step": 1262 }, { "clip_ratio/high_max": 0.0030257625112426467, "clip_ratio/high_mean": 0.0012849957784055732, "clip_ratio/low_mean": 0.0007378506988970912, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0020228464709362015, "epoch": 6.517935258092739, "grad_norm": 0.16234177350997925, "learning_rate": 1e-06, "loss": -0.0786, "step": 1263 }, { "clip_ratio/high_max": 0.0027808162121800706, "clip_ratio/high_mean": 0.0010067533930850914, "clip_ratio/low_mean": 0.00081771037002909, "clip_ratio/low_min": 2.89754279947374e-05, "clip_ratio/region_mean": 0.0018244637685711496, "epoch": 6.522601341498979, "grad_norm": 0.18712608516216278, "learning_rate": 1e-06, "loss": -0.0535, "step": 1264 }, { "clip_ratio/high_max": 0.005155796650797129, "clip_ratio/high_mean": 0.0021140287244634237, "clip_ratio/low_mean": 0.0021710636428906582, "clip_ratio/low_min": 7.776395068503916e-05, "clip_ratio/region_mean": 0.004285092378268018, "epoch": 6.5272674249052205, "grad_norm": 0.10155811905860901, "learning_rate": 1e-06, "loss": -0.0288, "step": 1265 }, { "clip_ratio/high_max": 0.004997641284717247, "clip_ratio/high_mean": 0.0019011046751984395, "clip_ratio/low_mean": 0.0018498868375900201, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.003750991469132714, "epoch": 6.531933508311461, "grad_norm": 0.11834058165550232, "learning_rate": 1e-06, "loss": -0.014, "step": 1266 }, { "clip_ratio/high_max": 0.004403603772516362, "clip_ratio/high_mean": 0.001696750034170691, "clip_ratio/low_mean": 0.001828711505368119, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.003525461463141255, "epoch": 6.536599591717702, "grad_norm": 0.10455948114395142, "learning_rate": 1e-06, "loss": -0.0104, "step": 1267 }, { "clip_ratio/high_max": 0.00482768785150256, "clip_ratio/high_mean": 0.0018547420841059648, "clip_ratio/low_mean": 0.0019660325760924025, "clip_ratio/low_min": 5.827505810884759e-05, "clip_ratio/region_mean": 0.0038207746256375685, "epoch": 6.541265675123943, "grad_norm": 0.11500366777181625, "learning_rate": 1e-06, "loss": -0.0056, "step": 1268 }, { "clip_ratio/high_max": 0.005339832496247254, "clip_ratio/high_mean": 0.0020964208597433753, "clip_ratio/low_mean": 0.0015764073596074013, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.003672828242997639, "epoch": 6.545931758530184, "grad_norm": 0.16839399933815002, "learning_rate": 1e-06, "loss": -0.0403, "step": 1269 }, { "clip_ratio/high_max": 0.004849508506595157, "clip_ratio/high_mean": 0.002016175138123799, "clip_ratio/low_mean": 0.0018071316762870993, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0038233067170949653, "epoch": 6.550597841936424, "grad_norm": 0.12244345247745514, "learning_rate": 1e-06, "loss": -0.068, "step": 1270 }, { "clip_ratio/high_max": 0.005067257021437399, "clip_ratio/high_mean": 0.0022382496936188545, "clip_ratio/low_mean": 0.0017724037643347401, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.004010653487057425, "epoch": 6.555263925342666, "grad_norm": 0.11108452826738358, "learning_rate": 1e-06, "loss": -0.0591, "step": 1271 }, { "clip_ratio/high_max": 0.00570140773197636, "clip_ratio/high_mean": 0.0021544756782532204, "clip_ratio/low_mean": 0.001804309693397954, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.003958785404392984, "epoch": 6.559930008748906, "grad_norm": 0.11429981887340546, "learning_rate": 1e-06, "loss": -0.0438, "step": 1272 }, { "clip_ratio/high_max": 0.00486232855473645, "clip_ratio/high_mean": 0.0020633556341635995, "clip_ratio/low_mean": 0.0021252667502267286, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0041886223771143705, "epoch": 6.564596092155147, "grad_norm": 0.171039417386055, "learning_rate": 1e-06, "loss": 0.0037, "step": 1273 }, { "clip_ratio/high_max": 0.004511142098635901, "clip_ratio/high_mean": 0.001732706052280264, "clip_ratio/low_mean": 0.0017383242447976954, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.003471030227956362, "epoch": 6.569262175561388, "grad_norm": 0.10713984817266464, "learning_rate": 1e-06, "loss": -0.0207, "step": 1274 }, { "clip_ratio/high_max": 0.004778518385137431, "clip_ratio/high_mean": 0.0017622845261939801, "clip_ratio/low_mean": 0.0017426028571208008, "clip_ratio/low_min": 0.00010738831770140678, "clip_ratio/region_mean": 0.0035048873542109504, "epoch": 6.573928258967629, "grad_norm": 0.10906531661748886, "learning_rate": 1e-06, "loss": -0.0297, "step": 1275 }, { "clip_ratio/high_max": 0.005706370429834351, "clip_ratio/high_mean": 0.002383837396337185, "clip_ratio/low_mean": 0.001539257813419681, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.003923095064237714, "epoch": 6.57859434237387, "grad_norm": 0.10869808495044708, "learning_rate": 1e-06, "loss": -0.1001, "step": 1276 }, { "clip_ratio/high_max": 0.004778533839271404, "clip_ratio/high_mean": 0.001964031733223237, "clip_ratio/low_mean": 0.001914957661938388, "clip_ratio/low_min": 1.991714452742599e-05, "clip_ratio/region_mean": 0.0038789894024375826, "epoch": 6.583260425780111, "grad_norm": 0.11235576868057251, "learning_rate": 1e-06, "loss": 0.0071, "step": 1277 }, { "clip_ratio/high_max": 0.004232354753185064, "clip_ratio/high_mean": 0.001736542191792978, "clip_ratio/low_mean": 0.001948829783941619, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.003685372030304279, "epoch": 6.587926509186351, "grad_norm": 0.1074599176645279, "learning_rate": 1e-06, "loss": -0.0206, "step": 1278 }, { "clip_ratio/high_max": 0.005401410424383357, "clip_ratio/high_mean": 0.002200720671680756, "clip_ratio/low_mean": 0.001666042415308766, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.003866763086989522, "epoch": 6.592592592592593, "grad_norm": 0.115765780210495, "learning_rate": 1e-06, "loss": -0.0796, "step": 1279 }, { "clip_ratio/high_max": 0.005033837980590761, "clip_ratio/high_mean": 0.0017954665381694213, "clip_ratio/low_mean": 0.0016746928176871734, "clip_ratio/low_min": 9.289076115237549e-05, "clip_ratio/region_mean": 0.0034701594413490966, "epoch": 6.597258675998834, "grad_norm": 0.11097413301467896, "learning_rate": 1e-06, "loss": -0.0543, "step": 1280 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0845424107142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 4087.0, "completions/mean_length": 834.0260009765625, "completions/mean_terminated_length": 532.782958984375, "completions/min_length": 85.0, "completions/min_terminated_length": 85.0, "epoch": 6.601924759405074, "grad_norm": 0.20398442447185516, "learning_rate": 1e-06, "loss": -0.0454, "num_tokens": 371163879.0, "reward": 0.6556919813156128, "reward_std": 0.15002141892910004, "rewards/simpleverify_reward/mean": 0.6556919813156128, "rewards/simpleverify_reward/std": 0.4751586616039276, "step": 1281 }, { "clip_ratio/high_max": 0.002215487271314487, "clip_ratio/high_mean": 0.0007864549188525416, "clip_ratio/low_mean": 0.0004259348663708806, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001212389764987165, "epoch": 6.606590842811316, "grad_norm": 0.15625602006912231, "learning_rate": 1e-06, "loss": -0.0263, "step": 1282 }, { "clip_ratio/high_max": 0.0023638328893866856, "clip_ratio/high_mean": 0.0008452327674604021, "clip_ratio/low_mean": 0.00041162919524140307, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012568619931698777, "epoch": 6.611256926217556, "grad_norm": 0.16662487387657166, "learning_rate": 1e-06, "loss": -0.0606, "step": 1283 }, { "clip_ratio/high_max": 0.002677121628948953, "clip_ratio/high_mean": 0.000907784080482088, "clip_ratio/low_mean": 0.0004303069626985234, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013380910531850532, "epoch": 6.615923009623797, "grad_norm": 0.1663932353258133, "learning_rate": 1e-06, "loss": -0.0634, "step": 1284 }, { "clip_ratio/high_max": 0.002282319521327736, "clip_ratio/high_mean": 0.0008145811334543396, "clip_ratio/low_mean": 0.0005402464821600006, "clip_ratio/low_min": 1.6204303392441943e-05, "clip_ratio/region_mean": 0.001354827585601015, "epoch": 6.620589093030038, "grad_norm": 0.17662297189235687, "learning_rate": 1e-06, "loss": -0.0471, "step": 1285 }, { "clip_ratio/high_max": 0.002419477932562586, "clip_ratio/high_mean": 0.0008632155258965213, "clip_ratio/low_mean": 0.0006132813959993655, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014764969346288126, "epoch": 6.625255176436279, "grad_norm": 0.16501308977603912, "learning_rate": 1e-06, "loss": -0.0431, "step": 1286 }, { "clip_ratio/high_max": 0.002202585805207491, "clip_ratio/high_mean": 0.0008467680581816239, "clip_ratio/low_mean": 0.0006372148459377058, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001483982883655699, "epoch": 6.6299212598425195, "grad_norm": 0.19370634853839874, "learning_rate": 1e-06, "loss": -0.0171, "step": 1287 }, { "clip_ratio/high_max": 0.0025696512675494887, "clip_ratio/high_mean": 0.0009489596814091783, "clip_ratio/low_mean": 0.000713155426637968, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001662115115323104, "epoch": 6.634587343248761, "grad_norm": 0.16496732831001282, "learning_rate": 1e-06, "loss": -0.0263, "step": 1288 }, { "clip_ratio/high_max": 0.0023115937801776454, "clip_ratio/high_mean": 0.0009358461966257892, "clip_ratio/low_mean": 0.0006549650615852443, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001590811265486991, "epoch": 6.639253426655001, "grad_norm": 0.16607387363910675, "learning_rate": 1e-06, "loss": -0.0148, "step": 1289 }, { "clip_ratio/high_max": 0.0020615349858417176, "clip_ratio/high_mean": 0.0008180037402780727, "clip_ratio/low_mean": 0.0008169778120645788, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0016349815632565878, "epoch": 6.6439195100612425, "grad_norm": 0.1518193483352661, "learning_rate": 1e-06, "loss": -0.0078, "step": 1290 }, { "clip_ratio/high_max": 0.0027319323489791714, "clip_ratio/high_mean": 0.0011226893511775415, "clip_ratio/low_mean": 0.0007116372271411819, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001834326598327607, "epoch": 6.648585593467483, "grad_norm": 0.1648380607366562, "learning_rate": 1e-06, "loss": -0.0338, "step": 1291 }, { "clip_ratio/high_max": 0.002710820783249801, "clip_ratio/high_mean": 0.0009817145200941013, "clip_ratio/low_mean": 0.0009063977213372709, "clip_ratio/low_min": 1.6037978639360517e-05, "clip_ratio/region_mean": 0.0018881122014136054, "epoch": 6.653251676873724, "grad_norm": 0.16792446374893188, "learning_rate": 1e-06, "loss": -0.0176, "step": 1292 }, { "clip_ratio/high_max": 0.002489826343662571, "clip_ratio/high_mean": 0.0009659428797021974, "clip_ratio/low_mean": 0.0007873951799410861, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0017533380560053047, "epoch": 6.657917760279965, "grad_norm": 0.16345618665218353, "learning_rate": 1e-06, "loss": -0.0651, "step": 1293 }, { "clip_ratio/high_max": 0.0024271590664284304, "clip_ratio/high_mean": 0.000883765984326601, "clip_ratio/low_mean": 0.0010309950848750304, "clip_ratio/low_min": 7.438704778905958e-05, "clip_ratio/region_mean": 0.0019147610437357798, "epoch": 6.662583843686206, "grad_norm": 0.18774068355560303, "learning_rate": 1e-06, "loss": -0.006, "step": 1294 }, { "clip_ratio/high_max": 0.0030691239226143807, "clip_ratio/high_mean": 0.0011352186629665084, "clip_ratio/low_mean": 0.0008994620147859678, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0020346806923043914, "epoch": 6.667249927092447, "grad_norm": 0.1552181988954544, "learning_rate": 1e-06, "loss": -0.0288, "step": 1295 }, { "clip_ratio/high_max": 0.00267529708071379, "clip_ratio/high_mean": 0.0010187566240347223, "clip_ratio/low_mean": 0.0009265052731279866, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001945261872606352, "epoch": 6.671916010498688, "grad_norm": 0.18326842784881592, "learning_rate": 1e-06, "loss": -0.0284, "step": 1296 }, { "clip_ratio/high_max": 0.005656683992128819, "clip_ratio/high_mean": 0.0024527592868253123, "clip_ratio/low_mean": 0.0019266367798991269, "clip_ratio/low_min": 0.00011856878518301528, "clip_ratio/region_mean": 0.004379396152216941, "epoch": 6.676582093904928, "grad_norm": 0.10701899230480194, "learning_rate": 1e-06, "loss": -0.0464, "step": 1297 }, { "clip_ratio/high_max": 0.005006497594877146, "clip_ratio/high_mean": 0.0017055287426046561, "clip_ratio/low_mean": 0.001643291503569344, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.003348820246174, "epoch": 6.681248177311169, "grad_norm": 0.11276061832904816, "learning_rate": 1e-06, "loss": -0.027, "step": 1298 }, { "clip_ratio/high_max": 0.004742298056953587, "clip_ratio/high_mean": 0.0018760728344204836, "clip_ratio/low_mean": 0.0014991972202551551, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0033752700401237234, "epoch": 6.685914260717411, "grad_norm": 0.1114305928349495, "learning_rate": 1e-06, "loss": -0.0615, "step": 1299 }, { "clip_ratio/high_max": 0.004631867494026665, "clip_ratio/high_mean": 0.0018172040581703186, "clip_ratio/low_mean": 0.001295792108066962, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.003112996091658715, "epoch": 6.690580344123651, "grad_norm": 0.11501974612474442, "learning_rate": 1e-06, "loss": -0.0641, "step": 1300 }, { "clip_ratio/high_max": 0.005480855506903026, "clip_ratio/high_mean": 0.0018107604737451766, "clip_ratio/low_mean": 0.0016421642758359667, "clip_ratio/low_min": 7.706534961471334e-05, "clip_ratio/region_mean": 0.0034529247423051856, "epoch": 6.695246427529892, "grad_norm": 0.11503326892852783, "learning_rate": 1e-06, "loss": -0.048, "step": 1301 }, { "clip_ratio/high_max": 0.004888847732217982, "clip_ratio/high_mean": 0.0018378643617324997, "clip_ratio/low_mean": 0.0017399261523678433, "clip_ratio/low_min": 1.7313019270659424e-05, "clip_ratio/region_mean": 0.003577790455892682, "epoch": 6.699912510936133, "grad_norm": 0.11644850671291351, "learning_rate": 1e-06, "loss": -0.044, "step": 1302 }, { "clip_ratio/high_max": 0.005241201331955381, "clip_ratio/high_mean": 0.001978694406716386, "clip_ratio/low_mean": 0.0018992362784047145, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.003877930750604719, "epoch": 6.704578594342374, "grad_norm": 0.11634170264005661, "learning_rate": 1e-06, "loss": -0.0183, "step": 1303 }, { "clip_ratio/high_max": 0.005474980731378309, "clip_ratio/high_mean": 0.0019617874713731, "clip_ratio/low_mean": 0.0017697162311378634, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.003731503675226122, "epoch": 6.7092446777486145, "grad_norm": 0.10746832937002182, "learning_rate": 1e-06, "loss": -0.0271, "step": 1304 }, { "clip_ratio/high_max": 0.00486699357861653, "clip_ratio/high_mean": 0.001828067619499052, "clip_ratio/low_mean": 0.0017811469497246435, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.003609214625612367, "epoch": 6.713910761154856, "grad_norm": 0.10577815771102905, "learning_rate": 1e-06, "loss": -0.0157, "step": 1305 }, { "clip_ratio/high_max": 0.0037362330112955533, "clip_ratio/high_mean": 0.0014327140743262134, "clip_ratio/low_mean": 0.0019113114540232345, "clip_ratio/low_min": 6.948304508114234e-05, "clip_ratio/region_mean": 0.003344025506521575, "epoch": 6.718576844561096, "grad_norm": 0.09911538660526276, "learning_rate": 1e-06, "loss": -0.0085, "step": 1306 }, { "clip_ratio/high_max": 0.005705976247554645, "clip_ratio/high_mean": 0.0021988089283695444, "clip_ratio/low_mean": 0.0016976686129055452, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0038964774721534923, "epoch": 6.723242927967338, "grad_norm": 0.10936553031206131, "learning_rate": 1e-06, "loss": -0.0347, "step": 1307 }, { "clip_ratio/high_max": 0.004975000294507481, "clip_ratio/high_mean": 0.0018125532697013114, "clip_ratio/low_mean": 0.002116371295414865, "clip_ratio/low_min": 9.434979438083246e-05, "clip_ratio/region_mean": 0.003928924517822452, "epoch": 6.727909011373578, "grad_norm": 0.11729011684656143, "learning_rate": 1e-06, "loss": -0.0185, "step": 1308 }, { "clip_ratio/high_max": 0.00514453426876571, "clip_ratio/high_mean": 0.0019941599275625776, "clip_ratio/low_mean": 0.001806404394301353, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.003800564241828397, "epoch": 6.732575094779819, "grad_norm": 0.11275199055671692, "learning_rate": 1e-06, "loss": -0.0661, "step": 1309 }, { "clip_ratio/high_max": 0.004617457329004537, "clip_ratio/high_mean": 0.0016860247269505635, "clip_ratio/low_mean": 0.002243429706140887, "clip_ratio/low_min": 0.0002863877161871642, "clip_ratio/region_mean": 0.003929454382159747, "epoch": 6.73724117818606, "grad_norm": 0.11298539489507675, "learning_rate": 1e-06, "loss": -0.007, "step": 1310 }, { "clip_ratio/high_max": 0.004866485498496331, "clip_ratio/high_mean": 0.0019647844710561913, "clip_ratio/low_mean": 0.001932533371473255, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0038973178016021848, "epoch": 6.741907261592301, "grad_norm": 0.10804520547389984, "learning_rate": 1e-06, "loss": -0.0297, "step": 1311 }, { "clip_ratio/high_max": 0.005027879757108167, "clip_ratio/high_mean": 0.0018853389883588534, "clip_ratio/low_mean": 0.002026876427407842, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.003912215426680632, "epoch": 6.746573344998541, "grad_norm": 0.11780452728271484, "learning_rate": 1e-06, "loss": -0.0294, "step": 1312 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0920061383928571, "completions/max_length": 4096.0, "completions/max_terminated_length": 4090.0, "completions/mean_length": 875.3718872070312, "completions/mean_terminated_length": 549.0287475585938, "completions/min_length": 100.0, "completions/min_terminated_length": 100.0, "epoch": 6.751239428404783, "grad_norm": 0.17691953480243683, "learning_rate": 1e-06, "loss": -0.0489, "num_tokens": 379747162.0, "reward": 0.6428571939468384, "reward_std": 0.1478021889925003, "rewards/simpleverify_reward/mean": 0.6428571343421936, "rewards/simpleverify_reward/std": 0.4791741371154785, "step": 1313 }, { "clip_ratio/high_max": 0.001906260171381291, "clip_ratio/high_mean": 0.0007849453104427084, "clip_ratio/low_mean": 0.00043045746815550956, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012154028008808382, "epoch": 6.755905511811024, "grad_norm": 0.15455153584480286, "learning_rate": 1e-06, "loss": -0.0425, "step": 1314 }, { "clip_ratio/high_max": 0.001524897052149754, "clip_ratio/high_mean": 0.0006239451231522253, "clip_ratio/low_mean": 0.00047506844839517726, "clip_ratio/low_min": 1.5574383724015206e-05, "clip_ratio/region_mean": 0.0010990135742758866, "epoch": 6.7605715952172645, "grad_norm": 0.1451701521873474, "learning_rate": 1e-06, "loss": -0.0424, "step": 1315 }, { "clip_ratio/high_max": 0.0017683661353657953, "clip_ratio/high_mean": 0.0006558477889484493, "clip_ratio/low_mean": 0.00047970484774850775, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011355526330589782, "epoch": 6.765237678623506, "grad_norm": 0.17063352465629578, "learning_rate": 1e-06, "loss": -0.0305, "step": 1316 }, { "clip_ratio/high_max": 0.0021927087000221945, "clip_ratio/high_mean": 0.0008543516833015019, "clip_ratio/low_mean": 0.00045433903778757667, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001308690738369478, "epoch": 6.769903762029746, "grad_norm": 0.15368495881557465, "learning_rate": 1e-06, "loss": -0.0733, "step": 1317 }, { "clip_ratio/high_max": 0.002204972661274951, "clip_ratio/high_mean": 0.0008919720303310896, "clip_ratio/low_mean": 0.0006047590723028407, "clip_ratio/low_min": 5.2741667786904145e-05, "clip_ratio/region_mean": 0.0014967311253712978, "epoch": 6.7745698454359875, "grad_norm": 0.18146827816963196, "learning_rate": 1e-06, "loss": -0.0481, "step": 1318 }, { "clip_ratio/high_max": 0.001990661730815191, "clip_ratio/high_mean": 0.000726216256225598, "clip_ratio/low_mean": 0.0005277826821838971, "clip_ratio/low_min": 1.663561306486372e-05, "clip_ratio/region_mean": 0.001253998969332315, "epoch": 6.779235928842228, "grad_norm": 0.1759994775056839, "learning_rate": 1e-06, "loss": -0.0289, "step": 1319 }, { "clip_ratio/high_max": 0.0016953256672422867, "clip_ratio/high_mean": 0.0007611136843479471, "clip_ratio/low_mean": 0.0005802895593660651, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013414032146101817, "epoch": 6.783902012248469, "grad_norm": 0.14383743703365326, "learning_rate": 1e-06, "loss": -0.0263, "step": 1320 }, { "clip_ratio/high_max": 0.0019516379761626013, "clip_ratio/high_mean": 0.0007484894540539244, "clip_ratio/low_mean": 0.0006153910271677887, "clip_ratio/low_min": 1.2173743925814051e-05, "clip_ratio/region_mean": 0.0013638804775837343, "epoch": 6.78856809565471, "grad_norm": 0.1594594120979309, "learning_rate": 1e-06, "loss": -0.0515, "step": 1321 }, { "clip_ratio/high_max": 0.001825968316552462, "clip_ratio/high_mean": 0.0007441487150572357, "clip_ratio/low_mean": 0.0005924970091655268, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013366457205847837, "epoch": 6.793234179060951, "grad_norm": 0.16743826866149902, "learning_rate": 1e-06, "loss": -0.0423, "step": 1322 }, { "clip_ratio/high_max": 0.0022548476554220542, "clip_ratio/high_mean": 0.0009680027742433595, "clip_ratio/low_mean": 0.0006425620831578271, "clip_ratio/low_min": 2.6310250177630223e-05, "clip_ratio/region_mean": 0.0016105648646771442, "epoch": 6.797900262467191, "grad_norm": 0.16004908084869385, "learning_rate": 1e-06, "loss": -0.0746, "step": 1323 }, { "clip_ratio/high_max": 0.0022688432363793254, "clip_ratio/high_mean": 0.0009288006385759218, "clip_ratio/low_mean": 0.0006257376398934866, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001554538252094062, "epoch": 6.802566345873433, "grad_norm": 0.18003647029399872, "learning_rate": 1e-06, "loss": -0.0203, "step": 1324 }, { "clip_ratio/high_max": 0.002170684554585023, "clip_ratio/high_mean": 0.0008112687664834084, "clip_ratio/low_mean": 0.0007583111546409782, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015695799447712488, "epoch": 6.807232429279673, "grad_norm": 0.16193418204784393, "learning_rate": 1e-06, "loss": -0.0116, "step": 1325 }, { "clip_ratio/high_max": 0.0020727996015921235, "clip_ratio/high_mean": 0.0008878374073901796, "clip_ratio/low_mean": 0.0006438633945435868, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015317008037527557, "epoch": 6.811898512685914, "grad_norm": 0.16347189247608185, "learning_rate": 1e-06, "loss": -0.0395, "step": 1326 }, { "clip_ratio/high_max": 0.002393558643234428, "clip_ratio/high_mean": 0.0009384025979670696, "clip_ratio/low_mean": 0.0007500589053961448, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0016884614888112992, "epoch": 6.816564596092155, "grad_norm": 0.15207186341285706, "learning_rate": 1e-06, "loss": -0.043, "step": 1327 }, { "clip_ratio/high_max": 0.0025760254211490974, "clip_ratio/high_mean": 0.0010946907386824023, "clip_ratio/low_mean": 0.0008016890255930775, "clip_ratio/low_min": 6.64626459183637e-05, "clip_ratio/region_mean": 0.0018963798211188987, "epoch": 6.821230679498396, "grad_norm": 0.16872480511665344, "learning_rate": 1e-06, "loss": -0.0435, "step": 1328 }, { "clip_ratio/high_max": 0.006034898586221971, "clip_ratio/high_mean": 0.0022732440993422642, "clip_ratio/low_mean": 0.001452861906727776, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0037261059042066336, "epoch": 6.8258967629046365, "grad_norm": 0.09486385434865952, "learning_rate": 1e-06, "loss": -0.0497, "step": 1329 }, { "clip_ratio/high_max": 0.0044036493709427305, "clip_ratio/high_mean": 0.001887764796265401, "clip_ratio/low_mean": 0.0015684372774558142, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.003456202073721215, "epoch": 6.830562846310878, "grad_norm": 0.11497434973716736, "learning_rate": 1e-06, "loss": -0.0433, "step": 1330 }, { "clip_ratio/high_max": 0.004667037501349114, "clip_ratio/high_mean": 0.001613572116184514, "clip_ratio/low_mean": 0.001647188651986653, "clip_ratio/low_min": 0.00018259330681758001, "clip_ratio/region_mean": 0.0032607608009129763, "epoch": 6.835228929717118, "grad_norm": 0.1100429967045784, "learning_rate": 1e-06, "loss": -0.0431, "step": 1331 }, { "clip_ratio/high_max": 0.004490446168347262, "clip_ratio/high_mean": 0.0017626873268454801, "clip_ratio/low_mean": 0.0017411917870049365, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.003503879110212438, "epoch": 6.83989501312336, "grad_norm": 0.11178852617740631, "learning_rate": 1e-06, "loss": -0.0314, "step": 1332 }, { "clip_ratio/high_max": 0.0048140449725906365, "clip_ratio/high_mean": 0.0019473693137115333, "clip_ratio/low_mean": 0.0013984386205265764, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0033458078833064064, "epoch": 6.844561096529601, "grad_norm": 0.09910988062620163, "learning_rate": 1e-06, "loss": -0.074, "step": 1333 }, { "clip_ratio/high_max": 0.004908949893433601, "clip_ratio/high_mean": 0.0018512294846004806, "clip_ratio/low_mean": 0.0017365465391776524, "clip_ratio/low_min": 0.00014344750525197014, "clip_ratio/region_mean": 0.0035877760383300483, "epoch": 6.849227179935841, "grad_norm": 0.10603483766317368, "learning_rate": 1e-06, "loss": -0.0491, "step": 1334 }, { "clip_ratio/high_max": 0.0044283151073614135, "clip_ratio/high_mean": 0.0017259447886317503, "clip_ratio/low_mean": 0.0018376313601038419, "clip_ratio/low_min": 0.00011309658293612301, "clip_ratio/region_mean": 0.0035635762324091047, "epoch": 6.853893263342083, "grad_norm": 0.10247107595205307, "learning_rate": 1e-06, "loss": -0.0298, "step": 1335 }, { "clip_ratio/high_max": 0.004174747125944123, "clip_ratio/high_mean": 0.0016834230918902904, "clip_ratio/low_mean": 0.0015430074236064684, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0032264305191347376, "epoch": 6.858559346748323, "grad_norm": 0.10308176279067993, "learning_rate": 1e-06, "loss": -0.027, "step": 1336 }, { "clip_ratio/high_max": 0.004295174905564636, "clip_ratio/high_mean": 0.0016767433189670555, "clip_ratio/low_mean": 0.0016643594535707962, "clip_ratio/low_min": 7.304246537387371e-05, "clip_ratio/region_mean": 0.0033411026961402968, "epoch": 6.863225430154564, "grad_norm": 0.09953628480434418, "learning_rate": 1e-06, "loss": -0.0523, "step": 1337 }, { "clip_ratio/high_max": 0.004616234728018753, "clip_ratio/high_mean": 0.0016249923919531284, "clip_ratio/low_mean": 0.0014194940486049745, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0030444864605669864, "epoch": 6.867891513560805, "grad_norm": 0.09776140004396439, "learning_rate": 1e-06, "loss": -0.0431, "step": 1338 }, { "clip_ratio/high_max": 0.00528674409724772, "clip_ratio/high_mean": 0.0019236216685385443, "clip_ratio/low_mean": 0.0017211059221153846, "clip_ratio/low_min": 4.8744346713647246e-05, "clip_ratio/region_mean": 0.0036447275488171726, "epoch": 6.872557596967046, "grad_norm": 0.11920148879289627, "learning_rate": 1e-06, "loss": -0.0756, "step": 1339 }, { "clip_ratio/high_max": 0.004785435143276118, "clip_ratio/high_mean": 0.0019439743300608825, "clip_ratio/low_mean": 0.0019317464757477865, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.003875720751238987, "epoch": 6.8772236803732865, "grad_norm": 0.12807448208332062, "learning_rate": 1e-06, "loss": -0.0214, "step": 1340 }, { "clip_ratio/high_max": 0.0038489385187858716, "clip_ratio/high_mean": 0.0015965677812346257, "clip_ratio/low_mean": 0.001979966225917451, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0035765339416684583, "epoch": 6.881889763779528, "grad_norm": 0.10300271958112717, "learning_rate": 1e-06, "loss": -0.0124, "step": 1341 }, { "clip_ratio/high_max": 0.0043094366701552644, "clip_ratio/high_mean": 0.001805086252716137, "clip_ratio/low_mean": 0.0016290790117636789, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0034341652935836464, "epoch": 6.886555847185768, "grad_norm": 0.10691041499376297, "learning_rate": 1e-06, "loss": -0.0404, "step": 1342 }, { "clip_ratio/high_max": 0.004835788655327633, "clip_ratio/high_mean": 0.0018454608316460508, "clip_ratio/low_mean": 0.0016319052192557137, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0034773660299833864, "epoch": 6.8912219305920095, "grad_norm": 0.1092439815402031, "learning_rate": 1e-06, "loss": -0.0438, "step": 1343 }, { "clip_ratio/high_max": 0.0047408887767232955, "clip_ratio/high_mean": 0.00193258942454122, "clip_ratio/low_mean": 0.0018805811305355746, "clip_ratio/low_min": 9.941027383320034e-05, "clip_ratio/region_mean": 0.0038131705223349854, "epoch": 6.89588801399825, "grad_norm": 0.11647233366966248, "learning_rate": 1e-06, "loss": -0.0445, "step": 1344 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0899832589285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 4053.0, "completions/mean_length": 863.3488159179688, "completions/mean_terminated_length": 543.7013549804688, "completions/min_length": 62.0, "completions/min_terminated_length": 62.0, "epoch": 7.004666083406241, "grad_norm": 0.17813444137573242, "learning_rate": 1e-06, "loss": -0.0041, "num_tokens": 388280834.0, "reward": 0.6662248969078064, "reward_std": 0.14652320742607117, "rewards/simpleverify_reward/mean": 0.6662248969078064, "rewards/simpleverify_reward/std": 0.4715769290924072, "step": 1345 }, { "clip_ratio/high_max": 0.00196512307957164, "clip_ratio/high_mean": 0.0007401004359053331, "clip_ratio/low_mean": 0.0003890584271175612, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011291588671156205, "epoch": 7.009332166812482, "grad_norm": 0.1573495864868164, "learning_rate": 1e-06, "loss": -0.057, "step": 1346 }, { "clip_ratio/high_max": 0.0022226737237360794, "clip_ratio/high_mean": 0.0008395700351684354, "clip_ratio/low_mean": 0.00041905999614755274, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012586300326802302, "epoch": 7.013998250218723, "grad_norm": 0.16541199386119843, "learning_rate": 1e-06, "loss": -0.063, "step": 1347 }, { "clip_ratio/high_max": 0.002010684343986213, "clip_ratio/high_mean": 0.0008371070343855536, "clip_ratio/low_mean": 0.00048016077971624327, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.00131726781182806, "epoch": 7.0186643336249634, "grad_norm": 0.15292195975780487, "learning_rate": 1e-06, "loss": -0.05, "step": 1348 }, { "clip_ratio/high_max": 0.0018231259600725025, "clip_ratio/high_mean": 0.0007352385819103802, "clip_ratio/low_mean": 0.0005110916599733173, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001246330237336224, "epoch": 7.023330417031205, "grad_norm": 0.1643696278333664, "learning_rate": 1e-06, "loss": -0.0207, "step": 1349 }, { "clip_ratio/high_max": 0.002042754211288411, "clip_ratio/high_mean": 0.0008114329157251632, "clip_ratio/low_mean": 0.0005021154647693038, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013135483568476047, "epoch": 7.027996500437445, "grad_norm": 0.16176468133926392, "learning_rate": 1e-06, "loss": -0.0233, "step": 1350 }, { "clip_ratio/high_max": 0.0025353020973852836, "clip_ratio/high_mean": 0.0009854450399870984, "clip_ratio/low_mean": 0.0005678753614120069, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015533203804807272, "epoch": 7.0326625838436865, "grad_norm": 0.1666657030582428, "learning_rate": 1e-06, "loss": -0.0372, "step": 1351 }, { "clip_ratio/high_max": 0.0022654654894722626, "clip_ratio/high_mean": 0.0009013194921863033, "clip_ratio/low_mean": 0.000599218793468026, "clip_ratio/low_min": 2.0620256691472605e-05, "clip_ratio/region_mean": 0.0015005382920207921, "epoch": 7.037328667249927, "grad_norm": 0.15766793489456177, "learning_rate": 1e-06, "loss": -0.0508, "step": 1352 }, { "clip_ratio/high_max": 0.0025432959155295976, "clip_ratio/high_mean": 0.0009783390960365068, "clip_ratio/low_mean": 0.000559994508876116, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015383336030936334, "epoch": 7.041994750656168, "grad_norm": 0.32127246260643005, "learning_rate": 1e-06, "loss": -0.048, "step": 1353 }, { "clip_ratio/high_max": 0.0019405158891458996, "clip_ratio/high_mean": 0.0008728833381610457, "clip_ratio/low_mean": 0.0006260184018174186, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014989017327025067, "epoch": 7.046660834062409, "grad_norm": 0.15023750066757202, "learning_rate": 1e-06, "loss": -0.0504, "step": 1354 }, { "clip_ratio/high_max": 0.0020431004886631854, "clip_ratio/high_mean": 0.0008276506696347496, "clip_ratio/low_mean": 0.0006937142552487785, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001521364953077864, "epoch": 7.05132691746865, "grad_norm": 0.1613745391368866, "learning_rate": 1e-06, "loss": 0.0219, "step": 1355 }, { "clip_ratio/high_max": 0.0024921642507251818, "clip_ratio/high_mean": 0.0009526120193186216, "clip_ratio/low_mean": 0.0005702993935301492, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001522911414213013, "epoch": 7.05599300087489, "grad_norm": 0.15816907584667206, "learning_rate": 1e-06, "loss": -0.0929, "step": 1356 }, { "clip_ratio/high_max": 0.0027314904873492196, "clip_ratio/high_mean": 0.0010463655180501519, "clip_ratio/low_mean": 0.0007975598318807897, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001843925376306288, "epoch": 7.060659084281132, "grad_norm": 0.1678362488746643, "learning_rate": 1e-06, "loss": -0.0132, "step": 1357 }, { "clip_ratio/high_max": 0.0022472815253422596, "clip_ratio/high_mean": 0.0009136543012573384, "clip_ratio/low_mean": 0.0006535810296099953, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015672353511035908, "epoch": 7.065325167687372, "grad_norm": 0.15273858606815338, "learning_rate": 1e-06, "loss": -0.0507, "step": 1358 }, { "clip_ratio/high_max": 0.002006586561037693, "clip_ratio/high_mean": 0.0008285951644211309, "clip_ratio/low_mean": 0.000700228871210129, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015288240610971116, "epoch": 7.069991251093613, "grad_norm": 0.15688160061836243, "learning_rate": 1e-06, "loss": -0.0464, "step": 1359 }, { "clip_ratio/high_max": 0.0027816422007163055, "clip_ratio/high_mean": 0.0010781456767290365, "clip_ratio/low_mean": 0.0008850695403452846, "clip_ratio/low_min": 9.020498328027315e-05, "clip_ratio/region_mean": 0.0019632151888799854, "epoch": 7.074657334499854, "grad_norm": 0.16717086732387543, "learning_rate": 1e-06, "loss": -0.0585, "step": 1360 }, { "clip_ratio/high_max": 0.005641895477310754, "clip_ratio/high_mean": 0.002011078944633482, "clip_ratio/low_mean": 0.002089990914100781, "clip_ratio/low_min": 3.89650886063464e-05, "clip_ratio/region_mean": 0.004101069891476072, "epoch": 7.079323417906095, "grad_norm": 0.09761442989110947, "learning_rate": 1e-06, "loss": -0.0049, "step": 1361 }, { "clip_ratio/high_max": 0.004146211882471107, "clip_ratio/high_mean": 0.0018958972541440744, "clip_ratio/low_mean": 0.0015333277688114322, "clip_ratio/low_min": 1.711859840725083e-05, "clip_ratio/region_mean": 0.0034292250784346834, "epoch": 7.083989501312336, "grad_norm": 0.10227840393781662, "learning_rate": 1e-06, "loss": -0.0578, "step": 1362 }, { "clip_ratio/high_max": 0.0043850333750015125, "clip_ratio/high_mean": 0.001878627903352026, "clip_ratio/low_mean": 0.001450759133149404, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.00332938703650143, "epoch": 7.088655584718577, "grad_norm": 0.11053407192230225, "learning_rate": 1e-06, "loss": -0.0638, "step": 1363 }, { "clip_ratio/high_max": 0.004361015395261347, "clip_ratio/high_mean": 0.0019230335019528866, "clip_ratio/low_mean": 0.0016113154924823903, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0035343490162631497, "epoch": 7.093321668124818, "grad_norm": 0.10813720524311066, "learning_rate": 1e-06, "loss": -0.0507, "step": 1364 }, { "clip_ratio/high_max": 0.004132738540647551, "clip_ratio/high_mean": 0.0015850717500143219, "clip_ratio/low_mean": 0.0017067740809579846, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0032918459037318826, "epoch": 7.0979877515310585, "grad_norm": 0.11003698408603668, "learning_rate": 1e-06, "loss": -0.0216, "step": 1365 }, { "clip_ratio/high_max": 0.004409364344610367, "clip_ratio/high_mean": 0.0016792884925962426, "clip_ratio/low_mean": 0.001629454465728486, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0033087429619627073, "epoch": 7.1026538349373, "grad_norm": 0.10226931422948837, "learning_rate": 1e-06, "loss": -0.0241, "step": 1366 }, { "clip_ratio/high_max": 0.005147559306351468, "clip_ratio/high_mean": 0.001902114148833789, "clip_ratio/low_mean": 0.0015262373508448945, "clip_ratio/low_min": 5.9222995332675055e-05, "clip_ratio/region_mean": 0.00342835157061927, "epoch": 7.10731991834354, "grad_norm": 0.11141891777515411, "learning_rate": 1e-06, "loss": -0.038, "step": 1367 }, { "clip_ratio/high_max": 0.004734716727398336, "clip_ratio/high_mean": 0.0019135097281832714, "clip_ratio/low_mean": 0.0016106326511362568, "clip_ratio/low_min": 0.00010110577932209708, "clip_ratio/region_mean": 0.0035241423174738884, "epoch": 7.111986001749782, "grad_norm": 0.1046186313033104, "learning_rate": 1e-06, "loss": -0.0517, "step": 1368 }, { "clip_ratio/high_max": 0.005908478982746601, "clip_ratio/high_mean": 0.002036659760051407, "clip_ratio/low_mean": 0.0017616262812225614, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0037982860521879047, "epoch": 7.116652085156022, "grad_norm": 0.13145965337753296, "learning_rate": 1e-06, "loss": -0.0492, "step": 1369 }, { "clip_ratio/high_max": 0.004432545145391487, "clip_ratio/high_mean": 0.0017289940151385963, "clip_ratio/low_mean": 0.0013501349712896626, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.003079129040997941, "epoch": 7.121318168562263, "grad_norm": 0.10737859457731247, "learning_rate": 1e-06, "loss": -0.0511, "step": 1370 }, { "clip_ratio/high_max": 0.003907241902197711, "clip_ratio/high_mean": 0.0017393593007000163, "clip_ratio/low_mean": 0.0018372789018030744, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.00357663808244979, "epoch": 7.125984251968504, "grad_norm": 0.12362054735422134, "learning_rate": 1e-06, "loss": 0.021, "step": 1371 }, { "clip_ratio/high_max": 0.0046059713495196775, "clip_ratio/high_mean": 0.0018688447016756982, "clip_ratio/low_mean": 0.0014330796243484656, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.003301924305560533, "epoch": 7.130650335374745, "grad_norm": 0.11188887804746628, "learning_rate": 1e-06, "loss": -0.0938, "step": 1372 }, { "clip_ratio/high_max": 0.0047178358800010756, "clip_ratio/high_mean": 0.001794469528249465, "clip_ratio/low_mean": 0.0019175440938852262, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0037120135966688395, "epoch": 7.135316418780985, "grad_norm": 0.1485195755958557, "learning_rate": 1e-06, "loss": -0.0141, "step": 1373 }, { "clip_ratio/high_max": 0.004903913533780724, "clip_ratio/high_mean": 0.0019435995709500276, "clip_ratio/low_mean": 0.001473207804338017, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0034168073907494545, "epoch": 7.139982502187227, "grad_norm": 0.10329054296016693, "learning_rate": 1e-06, "loss": -0.0515, "step": 1374 }, { "clip_ratio/high_max": 0.003859941331029404, "clip_ratio/high_mean": 0.0016578102586208843, "clip_ratio/low_mean": 0.001723356243019225, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0033811665780376643, "epoch": 7.144648585593467, "grad_norm": 0.11751967668533325, "learning_rate": 1e-06, "loss": -0.0473, "step": 1375 }, { "clip_ratio/high_max": 0.005230279944953509, "clip_ratio/high_mean": 0.0018768908958008979, "clip_ratio/low_mean": 0.001889971495984355, "clip_ratio/low_min": 0.00016869320097612217, "clip_ratio/region_mean": 0.003766862311749719, "epoch": 7.1493146689997085, "grad_norm": 0.10344547033309937, "learning_rate": 1e-06, "loss": -0.0594, "step": 1376 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0876116071428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 4080.0, "completions/mean_length": 855.8145751953125, "completions/mean_terminated_length": 544.6774291992188, "completions/min_length": 103.0, "completions/min_terminated_length": 103.0, "epoch": 7.153980752405949, "grad_norm": 0.1809016764163971, "learning_rate": 1e-06, "loss": -0.0312, "num_tokens": 396821783.0, "reward": 0.6712472438812256, "reward_std": 0.15039551258087158, "rewards/simpleverify_reward/mean": 0.6712471842765808, "rewards/simpleverify_reward/std": 0.46977630257606506, "step": 1377 }, { "clip_ratio/high_max": 0.0018236126634292305, "clip_ratio/high_mean": 0.0007716597483522492, "clip_ratio/low_mean": 0.0004701726845723897, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001241832444065949, "epoch": 7.15864683581219, "grad_norm": 0.17265312373638153, "learning_rate": 1e-06, "loss": -0.0118, "step": 1378 }, { "clip_ratio/high_max": 0.0018742871543508954, "clip_ratio/high_mean": 0.0008148845290634199, "clip_ratio/low_mean": 0.0004923002179566538, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013071847533865366, "epoch": 7.163312919218431, "grad_norm": 0.18306361138820648, "learning_rate": 1e-06, "loss": -0.0475, "step": 1379 }, { "clip_ratio/high_max": 0.0020929436868755147, "clip_ratio/high_mean": 0.000829510898256558, "clip_ratio/low_mean": 0.0005188782524783164, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013483892034855671, "epoch": 7.167979002624672, "grad_norm": 0.16039815545082092, "learning_rate": 1e-06, "loss": -0.0348, "step": 1380 }, { "clip_ratio/high_max": 0.002377553861151682, "clip_ratio/high_mean": 0.0009278282250306802, "clip_ratio/low_mean": 0.0005433923106465954, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014712205666000955, "epoch": 7.172645086030913, "grad_norm": 0.1640148013830185, "learning_rate": 1e-06, "loss": -0.0759, "step": 1381 }, { "clip_ratio/high_max": 0.0019394484988879412, "clip_ratio/high_mean": 0.0008216792430175701, "clip_ratio/low_mean": 0.0005633455002680421, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013850247487425804, "epoch": 7.177311169437154, "grad_norm": 0.18771474063396454, "learning_rate": 1e-06, "loss": -0.0171, "step": 1382 }, { "clip_ratio/high_max": 0.002581395805464126, "clip_ratio/high_mean": 0.0010066006580018438, "clip_ratio/low_mean": 0.0005797643070764025, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015863649787206668, "epoch": 7.181977252843395, "grad_norm": 0.15857377648353577, "learning_rate": 1e-06, "loss": -0.0421, "step": 1383 }, { "clip_ratio/high_max": 0.002351253788219765, "clip_ratio/high_mean": 0.000866480566401151, "clip_ratio/low_mean": 0.0005698717595805647, "clip_ratio/low_min": 1.1390559848223347e-05, "clip_ratio/region_mean": 0.0014363523296196945, "epoch": 7.186643336249635, "grad_norm": 0.18740224838256836, "learning_rate": 1e-06, "loss": -0.0322, "step": 1384 }, { "clip_ratio/high_max": 0.0022295348317129537, "clip_ratio/high_mean": 0.0007747080489934888, "clip_ratio/low_mean": 0.0006551564933943155, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014298645219241735, "epoch": 7.191309419655877, "grad_norm": 0.5735350251197815, "learning_rate": 1e-06, "loss": 0.0, "step": 1385 }, { "clip_ratio/high_max": 0.002230768157460261, "clip_ratio/high_mean": 0.0008987396358861588, "clip_ratio/low_mean": 0.0006252862276596716, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015240258289850317, "epoch": 7.195975503062117, "grad_norm": 0.17041385173797607, "learning_rate": 1e-06, "loss": -0.0216, "step": 1386 }, { "clip_ratio/high_max": 0.0024237356992671266, "clip_ratio/high_mean": 0.001005453639663756, "clip_ratio/low_mean": 0.0006922833144926699, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0016977369741653092, "epoch": 7.200641586468358, "grad_norm": 0.30392420291900635, "learning_rate": 1e-06, "loss": -0.0452, "step": 1387 }, { "clip_ratio/high_max": 0.002598405546450522, "clip_ratio/high_mean": 0.0011434978987381328, "clip_ratio/low_mean": 0.0006337152381092892, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0017772130813682452, "epoch": 7.205307669874599, "grad_norm": 0.16137278079986572, "learning_rate": 1e-06, "loss": -0.0726, "step": 1388 }, { "clip_ratio/high_max": 0.003019725263584405, "clip_ratio/high_mean": 0.0011345838793204166, "clip_ratio/low_mean": 0.0006780875637559802, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0018126713839592412, "epoch": 7.20997375328084, "grad_norm": 0.18081513047218323, "learning_rate": 1e-06, "loss": -0.0831, "step": 1389 }, { "clip_ratio/high_max": 0.0027243441800237633, "clip_ratio/high_mean": 0.0011600549551076256, "clip_ratio/low_mean": 0.0008558843055652687, "clip_ratio/low_min": 1.6212710761465132e-05, "clip_ratio/region_mean": 0.0020159392443019897, "epoch": 7.2146398366870805, "grad_norm": 0.18026256561279297, "learning_rate": 1e-06, "loss": -0.0514, "step": 1390 }, { "clip_ratio/high_max": 0.002497679044608958, "clip_ratio/high_mean": 0.0010388221890025306, "clip_ratio/low_mean": 0.0007422344824590255, "clip_ratio/low_min": 2.137117371603381e-05, "clip_ratio/region_mean": 0.001781056635081768, "epoch": 7.219305920093322, "grad_norm": 0.18348857760429382, "learning_rate": 1e-06, "loss": -0.0227, "step": 1391 }, { "clip_ratio/high_max": 0.002736147085670382, "clip_ratio/high_mean": 0.0010727581902756356, "clip_ratio/low_mean": 0.000830135806609178, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0019028940150747076, "epoch": 7.223972003499562, "grad_norm": 0.16022197902202606, "learning_rate": 1e-06, "loss": -0.0713, "step": 1392 }, { "clip_ratio/high_max": 0.005690870952093974, "clip_ratio/high_mean": 0.002291249758854974, "clip_ratio/low_mean": 0.0018303840442968067, "clip_ratio/low_min": 6.855576066300273e-05, "clip_ratio/region_mean": 0.004121633814065717, "epoch": 7.228638086905804, "grad_norm": 0.10240629315376282, "learning_rate": 1e-06, "loss": -0.032, "step": 1393 }, { "clip_ratio/high_max": 0.004066449255333282, "clip_ratio/high_mean": 0.0016682984787621535, "clip_ratio/low_mean": 0.0017844524718384491, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0034527509487816133, "epoch": 7.233304170312044, "grad_norm": 0.11568844318389893, "learning_rate": 1e-06, "loss": -0.0125, "step": 1394 }, { "clip_ratio/high_max": 0.004259329340129625, "clip_ratio/high_mean": 0.0019790427177213132, "clip_ratio/low_mean": 0.001971847035747487, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.003950889775296673, "epoch": 7.237970253718285, "grad_norm": 0.10941144078969955, "learning_rate": 1e-06, "loss": -0.0484, "step": 1395 }, { "clip_ratio/high_max": 0.005220945196924731, "clip_ratio/high_mean": 0.002107031708874274, "clip_ratio/low_mean": 0.001557528074044967, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0036645597574533895, "epoch": 7.242636337124526, "grad_norm": 0.10521205514669418, "learning_rate": 1e-06, "loss": -0.0357, "step": 1396 }, { "clip_ratio/high_max": 0.005285412407829426, "clip_ratio/high_mean": 0.001960551064257743, "clip_ratio/low_mean": 0.0016429114912170917, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.003603462566388771, "epoch": 7.247302420530767, "grad_norm": 0.2321300506591797, "learning_rate": 1e-06, "loss": -0.0766, "step": 1397 }, { "clip_ratio/high_max": 0.0041945805423893034, "clip_ratio/high_mean": 0.0018376325715507846, "clip_ratio/low_mean": 0.0019463643220660742, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.003783996871788986, "epoch": 7.251968503937007, "grad_norm": 0.12059333920478821, "learning_rate": 1e-06, "loss": -0.018, "step": 1398 }, { "clip_ratio/high_max": 0.005382211311371066, "clip_ratio/high_mean": 0.0021551075151364785, "clip_ratio/low_mean": 0.001576917595230043, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.003732025215867907, "epoch": 7.256634587343249, "grad_norm": 0.11205251514911652, "learning_rate": 1e-06, "loss": -0.0429, "step": 1399 }, { "clip_ratio/high_max": 0.0046652302698930725, "clip_ratio/high_mean": 0.0018036202418443281, "clip_ratio/low_mean": 0.001764358294167323, "clip_ratio/low_min": 6.834335363237187e-05, "clip_ratio/region_mean": 0.0035679785505635664, "epoch": 7.26130067074949, "grad_norm": 0.11788871884346008, "learning_rate": 1e-06, "loss": -0.033, "step": 1400 }, { "clip_ratio/high_max": 0.004217626075842418, "clip_ratio/high_mean": 0.0015111187949514715, "clip_ratio/low_mean": 0.0020126082345086616, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0035237269403296523, "epoch": 7.2659667541557305, "grad_norm": 0.10562445968389511, "learning_rate": 1e-06, "loss": -0.0009, "step": 1401 }, { "clip_ratio/high_max": 0.0043130563863087445, "clip_ratio/high_mean": 0.0017501031688880175, "clip_ratio/low_mean": 0.0015789838580531068, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0033290870196651667, "epoch": 7.270632837561972, "grad_norm": 0.10943048447370529, "learning_rate": 1e-06, "loss": -0.0223, "step": 1402 }, { "clip_ratio/high_max": 0.0045612145331688225, "clip_ratio/high_mean": 0.0020862354067503475, "clip_ratio/low_mean": 0.0016740255960030481, "clip_ratio/low_min": 6.851192301837727e-05, "clip_ratio/region_mean": 0.0037602610245812684, "epoch": 7.275298920968212, "grad_norm": 0.1013014167547226, "learning_rate": 1e-06, "loss": -0.046, "step": 1403 }, { "clip_ratio/high_max": 0.005731341414502822, "clip_ratio/high_mean": 0.002243163253297098, "clip_ratio/low_mean": 0.0015785994473844767, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0038217626279219985, "epoch": 7.2799650043744535, "grad_norm": 0.10732930898666382, "learning_rate": 1e-06, "loss": -0.0734, "step": 1404 }, { "clip_ratio/high_max": 0.005860163291799836, "clip_ratio/high_mean": 0.0021694279712392017, "clip_ratio/low_mean": 0.0015291275412891991, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.003698555432492867, "epoch": 7.284631087780694, "grad_norm": 0.1119331642985344, "learning_rate": 1e-06, "loss": -0.084, "step": 1405 }, { "clip_ratio/high_max": 0.00473516571946675, "clip_ratio/high_mean": 0.0020533799288386945, "clip_ratio/low_mean": 0.001929699592437828, "clip_ratio/low_min": 8.106355380732566e-05, "clip_ratio/region_mean": 0.003983079477620777, "epoch": 7.289297171186935, "grad_norm": 0.11101459711790085, "learning_rate": 1e-06, "loss": -0.0523, "step": 1406 }, { "clip_ratio/high_max": 0.004839467161218636, "clip_ratio/high_mean": 0.0018198908364865929, "clip_ratio/low_mean": 0.0017049133493856061, "clip_ratio/low_min": 5.105166565044783e-05, "clip_ratio/region_mean": 0.0035248041385784745, "epoch": 7.293963254593176, "grad_norm": 0.10066674649715424, "learning_rate": 1e-06, "loss": -0.0236, "step": 1407 }, { "clip_ratio/high_max": 0.004732530520414002, "clip_ratio/high_mean": 0.0018126526301784907, "clip_ratio/low_mean": 0.0016282599026453681, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.00344091262013535, "epoch": 7.298629337999417, "grad_norm": 0.10134289413690567, "learning_rate": 1e-06, "loss": -0.072, "step": 1408 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0848214285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 4090.0, "completions/mean_length": 836.7695922851562, "completions/mean_terminated_length": 534.6945190429688, "completions/min_length": 95.0, "completions/min_terminated_length": 95.0, "epoch": 7.303295421405657, "grad_norm": 0.1717565357685089, "learning_rate": 1e-06, "loss": -0.0632, "num_tokens": 405258927.0, "reward": 0.6694336533546448, "reward_std": 0.15305370092391968, "rewards/simpleverify_reward/mean": 0.66943359375, "rewards/simpleverify_reward/std": 0.4704335033893585, "step": 1409 }, { "clip_ratio/high_max": 0.001910406703245826, "clip_ratio/high_mean": 0.0007401852672046516, "clip_ratio/low_mean": 0.00033430713938287226, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010744924220489338, "epoch": 7.307961504811899, "grad_norm": 0.1613350659608841, "learning_rate": 1e-06, "loss": -0.0806, "step": 1410 }, { "clip_ratio/high_max": 0.002128610689396737, "clip_ratio/high_mean": 0.0007921959731902461, "clip_ratio/low_mean": 0.0004581749753924669, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012503709513111971, "epoch": 7.312627588218139, "grad_norm": 0.2004249095916748, "learning_rate": 1e-06, "loss": -0.0193, "step": 1411 }, { "clip_ratio/high_max": 0.002432449735351838, "clip_ratio/high_mean": 0.0008601702575106174, "clip_ratio/low_mean": 0.0005078526764918934, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013680229094461538, "epoch": 7.31729367162438, "grad_norm": 0.20410509407520294, "learning_rate": 1e-06, "loss": -0.0328, "step": 1412 }, { "clip_ratio/high_max": 0.0021632494463119656, "clip_ratio/high_mean": 0.000842975479827146, "clip_ratio/low_mean": 0.0005120429727867304, "clip_ratio/low_min": 1.560549389978405e-05, "clip_ratio/region_mean": 0.0013550184412451927, "epoch": 7.321959755030621, "grad_norm": 0.1816713511943817, "learning_rate": 1e-06, "loss": -0.0283, "step": 1413 }, { "clip_ratio/high_max": 0.002122090674674837, "clip_ratio/high_mean": 0.0007969508715177653, "clip_ratio/low_mean": 0.0005472665761772078, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001344217460427899, "epoch": 7.326625838436862, "grad_norm": 0.18468163907527924, "learning_rate": 1e-06, "loss": -0.0135, "step": 1414 }, { "clip_ratio/high_max": 0.0022095729073043913, "clip_ratio/high_mean": 0.0009760061520864838, "clip_ratio/low_mean": 0.000473567757580895, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001449573908757884, "epoch": 7.331291921843103, "grad_norm": 0.17401708662509918, "learning_rate": 1e-06, "loss": -0.0524, "step": 1415 }, { "clip_ratio/high_max": 0.0019754380937229143, "clip_ratio/high_mean": 0.0008297085032609175, "clip_ratio/low_mean": 0.0005366010082070716, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013663095305673778, "epoch": 7.335958005249344, "grad_norm": 0.22758713364601135, "learning_rate": 1e-06, "loss": -0.034, "step": 1416 }, { "clip_ratio/high_max": 0.0021523628747672774, "clip_ratio/high_mean": 0.0008522993321093963, "clip_ratio/low_mean": 0.0006713470738759497, "clip_ratio/low_min": 3.659964932012372e-05, "clip_ratio/region_mean": 0.0015236463805194944, "epoch": 7.340624088655585, "grad_norm": 0.16731558740139008, "learning_rate": 1e-06, "loss": -0.0573, "step": 1417 }, { "clip_ratio/high_max": 0.00222210639185505, "clip_ratio/high_mean": 0.0009079246665351093, "clip_ratio/low_mean": 0.0005921211709392082, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001500045840657549, "epoch": 7.3452901720618256, "grad_norm": 0.1762041300535202, "learning_rate": 1e-06, "loss": -0.0157, "step": 1418 }, { "clip_ratio/high_max": 0.0024547977081965655, "clip_ratio/high_mean": 0.0010494962698430754, "clip_ratio/low_mean": 0.0007614346050104359, "clip_ratio/low_min": 1.907814475998748e-05, "clip_ratio/region_mean": 0.001810930851206649, "epoch": 7.349956255468067, "grad_norm": 0.17196029424667358, "learning_rate": 1e-06, "loss": -0.0291, "step": 1419 }, { "clip_ratio/high_max": 0.0025236308138119057, "clip_ratio/high_mean": 0.0009809689472604077, "clip_ratio/low_mean": 0.0007225120289149345, "clip_ratio/low_min": 3.866378028760664e-05, "clip_ratio/region_mean": 0.001703480946162017, "epoch": 7.354622338874307, "grad_norm": 0.17653904855251312, "learning_rate": 1e-06, "loss": -0.0409, "step": 1420 }, { "clip_ratio/high_max": 0.0026429281570017338, "clip_ratio/high_mean": 0.001069426047251909, "clip_ratio/low_mean": 0.0007676737559449975, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.00183709980774438, "epoch": 7.359288422280549, "grad_norm": 0.18160517513751984, "learning_rate": 1e-06, "loss": -0.0753, "step": 1421 }, { "clip_ratio/high_max": 0.002748325336142443, "clip_ratio/high_mean": 0.0011163898634549696, "clip_ratio/low_mean": 0.0008297794315694773, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0019461693009361625, "epoch": 7.363954505686789, "grad_norm": 0.13417622447013855, "learning_rate": 1e-06, "loss": -0.0394, "step": 1422 }, { "clip_ratio/high_max": 0.002782740990369348, "clip_ratio/high_mean": 0.0010608749453240307, "clip_ratio/low_mean": 0.0007423377719533164, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0018032126827165484, "epoch": 7.36862058909303, "grad_norm": 0.19278652966022491, "learning_rate": 1e-06, "loss": -0.0573, "step": 1423 }, { "clip_ratio/high_max": 0.0024179421307053417, "clip_ratio/high_mean": 0.0009885039999062428, "clip_ratio/low_mean": 0.0007979971933309571, "clip_ratio/low_min": 2.891510484914761e-05, "clip_ratio/region_mean": 0.001786501205060631, "epoch": 7.373286672499271, "grad_norm": 0.1465693861246109, "learning_rate": 1e-06, "loss": -0.0383, "step": 1424 }, { "clip_ratio/high_max": 0.005212131407461129, "clip_ratio/high_mean": 0.002197588426497532, "clip_ratio/low_mean": 0.0015867582442297135, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0037843466852791607, "epoch": 7.377952755905512, "grad_norm": 0.09423007071018219, "learning_rate": 1e-06, "loss": -0.064, "step": 1425 }, { "clip_ratio/high_max": 0.004595384525600821, "clip_ratio/high_mean": 0.0018918486566690262, "clip_ratio/low_mean": 0.0012599759438671754, "clip_ratio/low_min": 1.4361213288793806e-05, "clip_ratio/region_mean": 0.003151824545057025, "epoch": 7.3826188393117524, "grad_norm": 0.09894728660583496, "learning_rate": 1e-06, "loss": -0.0813, "step": 1426 }, { "clip_ratio/high_max": 0.0055534994608024135, "clip_ratio/high_mean": 0.0020802060316782445, "clip_ratio/low_mean": 0.0017840438267739955, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0038642498911940493, "epoch": 7.387284922717994, "grad_norm": 0.10407675802707672, "learning_rate": 1e-06, "loss": -0.0202, "step": 1427 }, { "clip_ratio/high_max": 0.0059713546797866, "clip_ratio/high_mean": 0.0021649666523444466, "clip_ratio/low_mean": 0.0020009116851724684, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0041658783302409574, "epoch": 7.391951006124234, "grad_norm": 0.12078286707401276, "learning_rate": 1e-06, "loss": -0.0338, "step": 1428 }, { "clip_ratio/high_max": 0.004623753986379597, "clip_ratio/high_mean": 0.001861581979028415, "clip_ratio/low_mean": 0.0018612840467540082, "clip_ratio/low_min": 3.12109877995681e-05, "clip_ratio/region_mean": 0.0037228660221444443, "epoch": 7.3966170895304755, "grad_norm": 0.11224246770143509, "learning_rate": 1e-06, "loss": -0.0292, "step": 1429 }, { "clip_ratio/high_max": 0.005018732277676463, "clip_ratio/high_mean": 0.002022135609877296, "clip_ratio/low_mean": 0.0019154238689225167, "clip_ratio/low_min": 6.55411058687605e-05, "clip_ratio/region_mean": 0.003937559537007473, "epoch": 7.401283172936716, "grad_norm": 0.11177490651607513, "learning_rate": 1e-06, "loss": -0.0145, "step": 1430 }, { "clip_ratio/high_max": 0.0052157543250359595, "clip_ratio/high_mean": 0.0021048767048341688, "clip_ratio/low_mean": 0.0016186998545890674, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0037235765485092998, "epoch": 7.405949256342957, "grad_norm": 0.1137535572052002, "learning_rate": 1e-06, "loss": -0.0533, "step": 1431 }, { "clip_ratio/high_max": 0.005313912908604834, "clip_ratio/high_mean": 0.002002594046643935, "clip_ratio/low_mean": 0.0016767497618275229, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.003679343812109437, "epoch": 7.410615339749198, "grad_norm": 0.11435745656490326, "learning_rate": 1e-06, "loss": -0.035, "step": 1432 }, { "clip_ratio/high_max": 0.005562998943787534, "clip_ratio/high_mean": 0.0019858788618876133, "clip_ratio/low_mean": 0.0016467466184622026, "clip_ratio/low_min": 7.319929864024743e-05, "clip_ratio/region_mean": 0.003632625564932823, "epoch": 7.415281423155439, "grad_norm": 0.11291353404521942, "learning_rate": 1e-06, "loss": -0.0581, "step": 1433 }, { "clip_ratio/high_max": 0.004402678270707838, "clip_ratio/high_mean": 0.0017808313277782872, "clip_ratio/low_mean": 0.0016999414292513393, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.003480772633338347, "epoch": 7.41994750656168, "grad_norm": 0.1485971361398697, "learning_rate": 1e-06, "loss": -0.0165, "step": 1434 }, { "clip_ratio/high_max": 0.004717145566246472, "clip_ratio/high_mean": 0.0019051778508583084, "clip_ratio/low_mean": 0.0019269497170171235, "clip_ratio/low_min": 5.344925739336759e-05, "clip_ratio/region_mean": 0.003832127636997029, "epoch": 7.424613589967921, "grad_norm": 0.1016089916229248, "learning_rate": 1e-06, "loss": -0.0299, "step": 1435 }, { "clip_ratio/high_max": 0.005057233145635109, "clip_ratio/high_mean": 0.0019317501974001061, "clip_ratio/low_mean": 0.0019208869780413806, "clip_ratio/low_min": 1.5085686754900962e-05, "clip_ratio/region_mean": 0.0038526371645275503, "epoch": 7.429279673374162, "grad_norm": 0.1129857674241066, "learning_rate": 1e-06, "loss": -0.042, "step": 1436 }, { "clip_ratio/high_max": 0.005249914574960712, "clip_ratio/high_mean": 0.002101309542922536, "clip_ratio/low_mean": 0.0016488298042531824, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0037501393526326865, "epoch": 7.433945756780402, "grad_norm": 0.1125134825706482, "learning_rate": 1e-06, "loss": -0.0762, "step": 1437 }, { "clip_ratio/high_max": 0.004501464907662012, "clip_ratio/high_mean": 0.0018051889055641368, "clip_ratio/low_mean": 0.0016548458297620527, "clip_ratio/low_min": 0.00010890343401115388, "clip_ratio/region_mean": 0.0034600348444655538, "epoch": 7.438611840186644, "grad_norm": 0.10212256759405136, "learning_rate": 1e-06, "loss": -0.04, "step": 1438 }, { "clip_ratio/high_max": 0.0056072988954838365, "clip_ratio/high_mean": 0.0020926353899994865, "clip_ratio/low_mean": 0.0017589937488082796, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.003851629138807766, "epoch": 7.443277923592884, "grad_norm": 0.10990682244300842, "learning_rate": 1e-06, "loss": -0.0582, "step": 1439 }, { "clip_ratio/high_max": 0.0038470451763714664, "clip_ratio/high_mean": 0.001627318106329767, "clip_ratio/low_mean": 0.0016522667692697723, "clip_ratio/low_min": 5.818012505187653e-05, "clip_ratio/region_mean": 0.0032795848819660023, "epoch": 7.447944006999125, "grad_norm": 0.10437417775392532, "learning_rate": 1e-06, "loss": -0.039, "step": 1440 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0936802455357143, "completions/max_length": 4096.0, "completions/max_terminated_length": 4040.0, "completions/mean_length": 889.7994995117188, "completions/mean_terminated_length": 558.3958129882812, "completions/min_length": 5.0, "completions/min_terminated_length": 5.0, "epoch": 7.452610090405366, "grad_norm": 0.19270630180835724, "learning_rate": 1e-06, "loss": -0.066, "num_tokens": 413934732.0, "reward": 0.65869140625, "reward_std": 0.1502629965543747, "rewards/simpleverify_reward/mean": 0.65869140625, "rewards/simpleverify_reward/std": 0.47416529059410095, "step": 1441 }, { "clip_ratio/high_max": 0.0018305212288396433, "clip_ratio/high_mean": 0.0007223765041999286, "clip_ratio/low_mean": 0.00042316487360949395, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001145541391451843, "epoch": 7.457276173811607, "grad_norm": 0.15830476582050323, "learning_rate": 1e-06, "loss": -0.0616, "step": 1442 }, { "clip_ratio/high_max": 0.0017543510439281818, "clip_ratio/high_mean": 0.0007199926803878043, "clip_ratio/low_mean": 0.00048615474452162744, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012061474117217585, "epoch": 7.4619422572178475, "grad_norm": 0.15458270907402039, "learning_rate": 1e-06, "loss": -0.0129, "step": 1443 }, { "clip_ratio/high_max": 0.002132483608875191, "clip_ratio/high_mean": 0.000872970713317045, "clip_ratio/low_mean": 0.00045244008833833504, "clip_ratio/low_min": 3.901373202097602e-05, "clip_ratio/region_mean": 0.0013254108162072953, "epoch": 7.466608340624089, "grad_norm": 0.25120580196380615, "learning_rate": 1e-06, "loss": -0.0481, "step": 1444 }, { "clip_ratio/high_max": 0.0016460290462418925, "clip_ratio/high_mean": 0.0007130425437935628, "clip_ratio/low_mean": 0.0005152259427632089, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012282684638194041, "epoch": 7.471274424030329, "grad_norm": 0.15117956697940826, "learning_rate": 1e-06, "loss": -0.0018, "step": 1445 }, { "clip_ratio/high_max": 0.002032519470958505, "clip_ratio/high_mean": 0.0008346390459337272, "clip_ratio/low_mean": 0.00047760332381585613, "clip_ratio/low_min": 1.4484357052424457e-05, "clip_ratio/region_mean": 0.0013122423624736257, "epoch": 7.475940507436571, "grad_norm": 0.16944393515586853, "learning_rate": 1e-06, "loss": -0.0183, "step": 1446 }, { "clip_ratio/high_max": 0.00216623317101039, "clip_ratio/high_mean": 0.0008510964617016725, "clip_ratio/low_mean": 0.0006067982267268235, "clip_ratio/low_min": 2.5863851988106035e-05, "clip_ratio/region_mean": 0.0014578947011614218, "epoch": 7.480606590842811, "grad_norm": 0.2070213109254837, "learning_rate": 1e-06, "loss": -0.0629, "step": 1447 }, { "clip_ratio/high_max": 0.0022003844460414257, "clip_ratio/high_mean": 0.0008437011820205953, "clip_ratio/low_mean": 0.000610818784480216, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014545199446729384, "epoch": 7.485272674249052, "grad_norm": 0.16186128556728363, "learning_rate": 1e-06, "loss": -0.0606, "step": 1448 }, { "clip_ratio/high_max": 0.0022420187924581114, "clip_ratio/high_mean": 0.0008143928571371362, "clip_ratio/low_mean": 0.0005092860565127921, "clip_ratio/low_min": 1.367016648146091e-05, "clip_ratio/region_mean": 0.0013236789345683064, "epoch": 7.489938757655293, "grad_norm": 0.18141378462314606, "learning_rate": 1e-06, "loss": -0.0621, "step": 1449 }, { "clip_ratio/high_max": 0.002240421952592442, "clip_ratio/high_mean": 0.0008935572950576898, "clip_ratio/low_mean": 0.0006871131508887629, "clip_ratio/low_min": 3.1871495593804866e-05, "clip_ratio/region_mean": 0.0015806704614078626, "epoch": 7.494604841061534, "grad_norm": 0.3602309226989746, "learning_rate": 1e-06, "loss": -0.0056, "step": 1450 }, { "clip_ratio/high_max": 0.0023575757222715765, "clip_ratio/high_mean": 0.0008182661877071951, "clip_ratio/low_mean": 0.0006290435480877932, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014473097580776084, "epoch": 7.499270924467774, "grad_norm": 0.17450061440467834, "learning_rate": 1e-06, "loss": -0.0214, "step": 1451 }, { "clip_ratio/high_max": 0.002445107926178025, "clip_ratio/high_mean": 0.0009358380939374911, "clip_ratio/low_mean": 0.000705860047673923, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001641698177991202, "epoch": 7.503937007874016, "grad_norm": 0.15424740314483643, "learning_rate": 1e-06, "loss": -0.0787, "step": 1452 }, { "clip_ratio/high_max": 0.00229677321476629, "clip_ratio/high_mean": 0.0009761892433743924, "clip_ratio/low_mean": 0.0005903874534851639, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015665766877646092, "epoch": 7.508603091280257, "grad_norm": 0.15647411346435547, "learning_rate": 1e-06, "loss": -0.0829, "step": 1453 }, { "clip_ratio/high_max": 0.0025270449550589547, "clip_ratio/high_mean": 0.000984163023531437, "clip_ratio/low_mean": 0.0006354441438816139, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.00161960718469345, "epoch": 7.5132691746864975, "grad_norm": 0.7884874939918518, "learning_rate": 1e-06, "loss": -0.0567, "step": 1454 }, { "clip_ratio/high_max": 0.0021180681869736873, "clip_ratio/high_mean": 0.0008684027852723375, "clip_ratio/low_mean": 0.0008248721624113386, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0016932749676925596, "epoch": 7.517935258092739, "grad_norm": 0.18797625601291656, "learning_rate": 1e-06, "loss": -0.0367, "step": 1455 }, { "clip_ratio/high_max": 0.0024011031237023417, "clip_ratio/high_mean": 0.001046918996507884, "clip_ratio/low_mean": 0.0007792649557814002, "clip_ratio/low_min": 5.631966450891923e-05, "clip_ratio/region_mean": 0.0018261839577462524, "epoch": 7.522601341498979, "grad_norm": 0.183904767036438, "learning_rate": 1e-06, "loss": -0.0603, "step": 1456 }, { "clip_ratio/high_max": 0.005467097056680359, "clip_ratio/high_mean": 0.0022446903967647813, "clip_ratio/low_mean": 0.0017680774253676645, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.00401276774937287, "epoch": 7.5272674249052205, "grad_norm": 0.10196895897388458, "learning_rate": 1e-06, "loss": -0.0669, "step": 1457 }, { "clip_ratio/high_max": 0.004395519688841887, "clip_ratio/high_mean": 0.0017328943613392767, "clip_ratio/low_mean": 0.0015983417761162855, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0033312361483694986, "epoch": 7.531933508311461, "grad_norm": 0.11472579091787338, "learning_rate": 1e-06, "loss": -0.0623, "step": 1458 }, { "clip_ratio/high_max": 0.003650425722298678, "clip_ratio/high_mean": 0.0017068459255824564, "clip_ratio/low_mean": 0.001875266996648861, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0035821128549287096, "epoch": 7.536599591717702, "grad_norm": 0.1033015325665474, "learning_rate": 1e-06, "loss": -0.0137, "step": 1459 }, { "clip_ratio/high_max": 0.0046282951516332105, "clip_ratio/high_mean": 0.0018897248010034673, "clip_ratio/low_mean": 0.001587980423209956, "clip_ratio/low_min": 9.103204502025619e-05, "clip_ratio/region_mean": 0.0034777051987475716, "epoch": 7.541265675123943, "grad_norm": 0.11019978672266006, "learning_rate": 1e-06, "loss": -0.0488, "step": 1460 }, { "clip_ratio/high_max": 0.0038333841803250834, "clip_ratio/high_mean": 0.0015590045550197829, "clip_ratio/low_mean": 0.0017600630017113872, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0033190676476806402, "epoch": 7.545931758530184, "grad_norm": 0.10194586962461472, "learning_rate": 1e-06, "loss": -0.0025, "step": 1461 }, { "clip_ratio/high_max": 0.004471045824175235, "clip_ratio/high_mean": 0.001729344734485494, "clip_ratio/low_mean": 0.0015799501961737406, "clip_ratio/low_min": 8.690614049555734e-05, "clip_ratio/region_mean": 0.0033092949452111498, "epoch": 7.550597841936424, "grad_norm": 0.10734774172306061, "learning_rate": 1e-06, "loss": -0.0191, "step": 1462 }, { "clip_ratio/high_max": 0.00559540864196606, "clip_ratio/high_mean": 0.0020423575733730104, "clip_ratio/low_mean": 0.001704288773908047, "clip_ratio/low_min": 0.00011638733849395066, "clip_ratio/region_mean": 0.0037466463109012693, "epoch": 7.555263925342666, "grad_norm": 0.11830799281597137, "learning_rate": 1e-06, "loss": -0.0637, "step": 1463 }, { "clip_ratio/high_max": 0.004802870040293783, "clip_ratio/high_mean": 0.0018571846376289614, "clip_ratio/low_mean": 0.0015380685372292646, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.003395253195776604, "epoch": 7.559930008748906, "grad_norm": 0.10258246213197708, "learning_rate": 1e-06, "loss": -0.0612, "step": 1464 }, { "clip_ratio/high_max": 0.004036631864437368, "clip_ratio/high_mean": 0.0016082869442470837, "clip_ratio/low_mean": 0.0015946135245030746, "clip_ratio/low_min": 5.468066592584364e-05, "clip_ratio/region_mean": 0.0032029004796640947, "epoch": 7.564596092155147, "grad_norm": 0.10809513926506042, "learning_rate": 1e-06, "loss": -0.0628, "step": 1465 }, { "clip_ratio/high_max": 0.004903237408143468, "clip_ratio/high_mean": 0.0018993035228049848, "clip_ratio/low_mean": 0.0019947527835029177, "clip_ratio/low_min": 0.00018593157255963888, "clip_ratio/region_mean": 0.0038940563244977966, "epoch": 7.569262175561388, "grad_norm": 0.135222390294075, "learning_rate": 1e-06, "loss": -0.0065, "step": 1466 }, { "clip_ratio/high_max": 0.004870341552305035, "clip_ratio/high_mean": 0.0017010760784614831, "clip_ratio/low_mean": 0.001801039088604739, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.003502115185256116, "epoch": 7.573928258967629, "grad_norm": 0.10393046587705612, "learning_rate": 1e-06, "loss": -0.0222, "step": 1467 }, { "clip_ratio/high_max": 0.00513146941375453, "clip_ratio/high_mean": 0.0019443690944171976, "clip_ratio/low_mean": 0.0016804766783025116, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.003624845776357688, "epoch": 7.57859434237387, "grad_norm": 0.11001542210578918, "learning_rate": 1e-06, "loss": -0.0794, "step": 1468 }, { "clip_ratio/high_max": 0.004238697511027567, "clip_ratio/high_mean": 0.0018393221835140139, "clip_ratio/low_mean": 0.0015272068740159739, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0033665290102362633, "epoch": 7.583260425780111, "grad_norm": 0.11938067525625229, "learning_rate": 1e-06, "loss": -0.0836, "step": 1469 }, { "clip_ratio/high_max": 0.004776272995513864, "clip_ratio/high_mean": 0.001788714580470696, "clip_ratio/low_mean": 0.0019292604447400663, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0037179750652285293, "epoch": 7.587926509186351, "grad_norm": 0.11862795799970627, "learning_rate": 1e-06, "loss": -0.0576, "step": 1470 }, { "clip_ratio/high_max": 0.00465654103027191, "clip_ratio/high_mean": 0.0017225914998562075, "clip_ratio/low_mean": 0.0018139689054805785, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0035365603398531675, "epoch": 7.592592592592593, "grad_norm": 0.1361895054578781, "learning_rate": 1e-06, "loss": -0.0374, "step": 1471 }, { "clip_ratio/high_max": 0.004124632258026395, "clip_ratio/high_mean": 0.0018400444132566918, "clip_ratio/low_mean": 0.0017699192176223733, "clip_ratio/low_min": 0.00014766353706363589, "clip_ratio/region_mean": 0.0036099636199651286, "epoch": 7.597258675998834, "grad_norm": 0.11981182545423508, "learning_rate": 1e-06, "loss": -0.0611, "step": 1472 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0891462053571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 4012.0, "completions/mean_length": 854.3715209960938, "completions/mean_terminated_length": 537.1099853515625, "completions/min_length": 78.0, "completions/min_terminated_length": 78.0, "epoch": 7.601924759405074, "grad_norm": 0.20368652045726776, "learning_rate": 1e-06, "loss": -0.0432, "num_tokens": 422382450.0, "reward": 0.6672014594078064, "reward_std": 0.14123475551605225, "rewards/simpleverify_reward/mean": 0.6672014594078064, "rewards/simpleverify_reward/std": 0.4712315499782562, "step": 1473 }, { "clip_ratio/high_max": 0.0019321077816130128, "clip_ratio/high_mean": 0.0007545481576016755, "clip_ratio/low_mean": 0.0003618924652073474, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011164406496391166, "epoch": 7.606590842811316, "grad_norm": 0.162889301776886, "learning_rate": 1e-06, "loss": -0.0523, "step": 1474 }, { "clip_ratio/high_max": 0.00210403493110789, "clip_ratio/high_mean": 0.0008370339382963721, "clip_ratio/low_mean": 0.00038616514302702853, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012231991240696516, "epoch": 7.611256926217556, "grad_norm": 0.1682155430316925, "learning_rate": 1e-06, "loss": -0.0286, "step": 1475 }, { "clip_ratio/high_max": 0.002167194747016765, "clip_ratio/high_mean": 0.0008144522307702573, "clip_ratio/low_mean": 0.0004282597008113953, "clip_ratio/low_min": 1.713267556624487e-05, "clip_ratio/region_mean": 0.001242711914528627, "epoch": 7.615923009623797, "grad_norm": 0.1678251177072525, "learning_rate": 1e-06, "loss": -0.0633, "step": 1476 }, { "clip_ratio/high_max": 0.0023542965172964614, "clip_ratio/high_mean": 0.0008686422679602401, "clip_ratio/low_mean": 0.00048729862601248897, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013559409089793917, "epoch": 7.620589093030038, "grad_norm": 0.20742952823638916, "learning_rate": 1e-06, "loss": -0.0664, "step": 1477 }, { "clip_ratio/high_max": 0.0020516479780781083, "clip_ratio/high_mean": 0.0009298053355450975, "clip_ratio/low_mean": 0.00047200236349453917, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014018077126820572, "epoch": 7.625255176436279, "grad_norm": 0.168976292014122, "learning_rate": 1e-06, "loss": -0.0611, "step": 1478 }, { "clip_ratio/high_max": 0.001802879025490256, "clip_ratio/high_mean": 0.0006674254759673204, "clip_ratio/low_mean": 0.0005177582534088288, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.00118518371891696, "epoch": 7.6299212598425195, "grad_norm": 0.1982998251914978, "learning_rate": 1e-06, "loss": -0.0097, "step": 1479 }, { "clip_ratio/high_max": 0.002118727847118862, "clip_ratio/high_mean": 0.000792240407463396, "clip_ratio/low_mean": 0.0004534460072136426, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012456864060368389, "epoch": 7.634587343248761, "grad_norm": 0.14808295667171478, "learning_rate": 1e-06, "loss": -0.0327, "step": 1480 }, { "clip_ratio/high_max": 0.002626515328302048, "clip_ratio/high_mean": 0.0008949331077019451, "clip_ratio/low_mean": 0.0005744934651374933, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014694265883008484, "epoch": 7.639253426655001, "grad_norm": 0.18225401639938354, "learning_rate": 1e-06, "loss": -0.0274, "step": 1481 }, { "clip_ratio/high_max": 0.0023778744362061843, "clip_ratio/high_mean": 0.0009791627198865172, "clip_ratio/low_mean": 0.000501624456319405, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014807871957600582, "epoch": 7.6439195100612425, "grad_norm": 0.16968855261802673, "learning_rate": 1e-06, "loss": -0.0572, "step": 1482 }, { "clip_ratio/high_max": 0.0021586116636171937, "clip_ratio/high_mean": 0.0008928717506933026, "clip_ratio/low_mean": 0.000576390482365241, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014692622353322804, "epoch": 7.648585593467483, "grad_norm": 0.18596337735652924, "learning_rate": 1e-06, "loss": -0.0606, "step": 1483 }, { "clip_ratio/high_max": 0.002512024228053633, "clip_ratio/high_mean": 0.0009815312405407894, "clip_ratio/low_mean": 0.0006831329665146768, "clip_ratio/low_min": 3.410641147638671e-05, "clip_ratio/region_mean": 0.0016646642143314239, "epoch": 7.653251676873724, "grad_norm": 0.21784955263137817, "learning_rate": 1e-06, "loss": -0.0123, "step": 1484 }, { "clip_ratio/high_max": 0.002348741385503672, "clip_ratio/high_mean": 0.0008950869196269196, "clip_ratio/low_mean": 0.0006804961440138868, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015755830681882799, "epoch": 7.657917760279965, "grad_norm": 0.2041342705488205, "learning_rate": 1e-06, "loss": -0.0552, "step": 1485 }, { "clip_ratio/high_max": 0.002357837547606323, "clip_ratio/high_mean": 0.0009750519384397194, "clip_ratio/low_mean": 0.0007263015977514442, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001701353583484888, "epoch": 7.662583843686206, "grad_norm": 0.2158804088830948, "learning_rate": 1e-06, "loss": -0.0309, "step": 1486 }, { "clip_ratio/high_max": 0.003039932722458616, "clip_ratio/high_mean": 0.0010895789946516743, "clip_ratio/low_mean": 0.0007536582807006198, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0018432372962706722, "epoch": 7.667249927092447, "grad_norm": 0.18324705958366394, "learning_rate": 1e-06, "loss": -0.0167, "step": 1487 }, { "clip_ratio/high_max": 0.002248109351057792, "clip_ratio/high_mean": 0.0009024063429023954, "clip_ratio/low_mean": 0.0008267970879387576, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0017292034681304358, "epoch": 7.671916010498688, "grad_norm": 0.2000238001346588, "learning_rate": 1e-06, "loss": -0.0186, "step": 1488 }, { "clip_ratio/high_max": 0.005637190231936984, "clip_ratio/high_mean": 0.0023293374470085837, "clip_ratio/low_mean": 0.0019038573627767619, "clip_ratio/low_min": 5.559509008890018e-05, "clip_ratio/region_mean": 0.004233194937114604, "epoch": 7.676582093904928, "grad_norm": 0.09963242709636688, "learning_rate": 1e-06, "loss": -0.0441, "step": 1489 }, { "clip_ratio/high_max": 0.00421796010050457, "clip_ratio/high_mean": 0.0016571489177295007, "clip_ratio/low_mean": 0.0014588923477276694, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0031160412763711065, "epoch": 7.681248177311169, "grad_norm": 0.10052011162042618, "learning_rate": 1e-06, "loss": -0.053, "step": 1490 }, { "clip_ratio/high_max": 0.0038538493317901157, "clip_ratio/high_mean": 0.0017124608166341204, "clip_ratio/low_mean": 0.001361603297482361, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0030740640722797252, "epoch": 7.685914260717411, "grad_norm": 0.10273779928684235, "learning_rate": 1e-06, "loss": -0.0292, "step": 1491 }, { "clip_ratio/high_max": 0.004771810876263771, "clip_ratio/high_mean": 0.0019117758638458326, "clip_ratio/low_mean": 0.0014474881827482022, "clip_ratio/low_min": 6.952168769203126e-05, "clip_ratio/region_mean": 0.003359264104801696, "epoch": 7.690580344123651, "grad_norm": 0.10847639292478561, "learning_rate": 1e-06, "loss": -0.0641, "step": 1492 }, { "clip_ratio/high_max": 0.00603368591691833, "clip_ratio/high_mean": 0.0020579002193699125, "clip_ratio/low_mean": 0.0018534946320869494, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.003911394829628989, "epoch": 7.695246427529892, "grad_norm": 0.11956518888473511, "learning_rate": 1e-06, "loss": -0.0675, "step": 1493 }, { "clip_ratio/high_max": 0.003999712091172114, "clip_ratio/high_mean": 0.0017442529133404605, "clip_ratio/low_mean": 0.0016306047491525533, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0033748576388461515, "epoch": 7.699912510936133, "grad_norm": 0.10121136903762817, "learning_rate": 1e-06, "loss": -0.0619, "step": 1494 }, { "clip_ratio/high_max": 0.004087908600922674, "clip_ratio/high_mean": 0.0013940994795120787, "clip_ratio/low_mean": 0.001587731054314645, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0029818304828950204, "epoch": 7.704578594342374, "grad_norm": 0.09256784617900848, "learning_rate": 1e-06, "loss": -0.0104, "step": 1495 }, { "clip_ratio/high_max": 0.00432162905781297, "clip_ratio/high_mean": 0.001621355659153778, "clip_ratio/low_mean": 0.001472005038522184, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0030933607486076653, "epoch": 7.7092446777486145, "grad_norm": 0.09782762080430984, "learning_rate": 1e-06, "loss": -0.0333, "step": 1496 }, { "clip_ratio/high_max": 0.004736357841466088, "clip_ratio/high_mean": 0.0017792898579500616, "clip_ratio/low_mean": 0.0016899218298931373, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0034692116896621883, "epoch": 7.713910761154856, "grad_norm": 0.10189592093229294, "learning_rate": 1e-06, "loss": -0.0282, "step": 1497 }, { "clip_ratio/high_max": 0.005149058604729362, "clip_ratio/high_mean": 0.0020658233697758988, "clip_ratio/low_mean": 0.0013104771314829122, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.003376300497620832, "epoch": 7.718576844561096, "grad_norm": 0.10006771236658096, "learning_rate": 1e-06, "loss": -0.0579, "step": 1498 }, { "clip_ratio/high_max": 0.004630413815903012, "clip_ratio/high_mean": 0.0018425955859129317, "clip_ratio/low_mean": 0.0015567682085020351, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.003399363878997974, "epoch": 7.723242927967338, "grad_norm": 0.09941881150007248, "learning_rate": 1e-06, "loss": -0.0614, "step": 1499 }, { "clip_ratio/high_max": 0.004295340389944613, "clip_ratio/high_mean": 0.0017776333370420616, "clip_ratio/low_mean": 0.0018908265737991314, "clip_ratio/low_min": 8.526603050995618e-05, "clip_ratio/region_mean": 0.0036684599763248116, "epoch": 7.727909011373578, "grad_norm": 0.11763487011194229, "learning_rate": 1e-06, "loss": -0.0134, "step": 1500 }, { "clip_ratio/high_max": 0.005803777487017214, "clip_ratio/high_mean": 0.001966384254046716, "clip_ratio/low_mean": 0.001671139914833475, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0036375242489157245, "epoch": 7.732575094779819, "grad_norm": 0.12014557421207428, "learning_rate": 1e-06, "loss": -0.0562, "step": 1501 }, { "clip_ratio/high_max": 0.0042572036472847685, "clip_ratio/high_mean": 0.001770846010913374, "clip_ratio/low_mean": 0.0018354734675085638, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.003606319412938319, "epoch": 7.73724117818606, "grad_norm": 0.10845109075307846, "learning_rate": 1e-06, "loss": -0.0318, "step": 1502 }, { "clip_ratio/high_max": 0.005232460724073462, "clip_ratio/high_mean": 0.0018847653409466147, "clip_ratio/low_mean": 0.0018729867369984277, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.003757752027013339, "epoch": 7.741907261592301, "grad_norm": 0.107645682990551, "learning_rate": 1e-06, "loss": -0.0176, "step": 1503 }, { "clip_ratio/high_max": 0.0044113863332313485, "clip_ratio/high_mean": 0.0016779145080363378, "clip_ratio/low_mean": 0.0019482728639559355, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.003626187266490888, "epoch": 7.746573344998541, "grad_norm": 0.11277163028717041, "learning_rate": 1e-06, "loss": -0.0197, "step": 1504 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0836356026785714, "completions/max_length": 4096.0, "completions/max_terminated_length": 4012.0, "completions/mean_length": 823.0955810546875, "completions/mean_terminated_length": 524.3810424804688, "completions/min_length": 104.0, "completions/min_terminated_length": 104.0, "epoch": 7.751239428404783, "grad_norm": 0.18448418378829956, "learning_rate": 1e-06, "loss": -0.0322, "num_tokens": 430715788.0, "reward": 0.6712472438812256, "reward_std": 0.13961604237556458, "rewards/simpleverify_reward/mean": 0.6712471842765808, "rewards/simpleverify_reward/std": 0.46977630257606506, "step": 1505 }, { "clip_ratio/high_max": 0.0016724604211049154, "clip_ratio/high_mean": 0.0006774661960662343, "clip_ratio/low_mean": 0.0004023361912004475, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001079802394087892, "epoch": 7.755905511811024, "grad_norm": 0.19419632852077484, "learning_rate": 1e-06, "loss": -0.0247, "step": 1506 }, { "clip_ratio/high_max": 0.002159832467441447, "clip_ratio/high_mean": 0.0008151285328494851, "clip_ratio/low_mean": 0.00047424965123354923, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012893781731690979, "epoch": 7.7605715952172645, "grad_norm": 0.19834081828594208, "learning_rate": 1e-06, "loss": -0.0706, "step": 1507 }, { "clip_ratio/high_max": 0.002334334538318217, "clip_ratio/high_mean": 0.000795241558989801, "clip_ratio/low_mean": 0.0005752848264819477, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013705263681913493, "epoch": 7.765237678623506, "grad_norm": 0.20305268466472626, "learning_rate": 1e-06, "loss": 0.0017, "step": 1508 }, { "clip_ratio/high_max": 0.0021882962573727127, "clip_ratio/high_mean": 0.0008869058274285635, "clip_ratio/low_mean": 0.0005346108612229727, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001421516699338099, "epoch": 7.769903762029746, "grad_norm": 0.17426122725009918, "learning_rate": 1e-06, "loss": -0.0906, "step": 1509 }, { "clip_ratio/high_max": 0.002131869288859889, "clip_ratio/high_mean": 0.0008643377805128694, "clip_ratio/low_mean": 0.0005741007180404267, "clip_ratio/low_min": 1.7448352082283236e-05, "clip_ratio/region_mean": 0.001438438455807045, "epoch": 7.7745698454359875, "grad_norm": 0.20878808200359344, "learning_rate": 1e-06, "loss": -0.0588, "step": 1510 }, { "clip_ratio/high_max": 0.001977486965188291, "clip_ratio/high_mean": 0.0007866337273298996, "clip_ratio/low_mean": 0.000651401966024423, "clip_ratio/low_min": 1.414347116224235e-05, "clip_ratio/region_mean": 0.001438035658793524, "epoch": 7.779235928842228, "grad_norm": 0.1959061324596405, "learning_rate": 1e-06, "loss": 0.01, "step": 1511 }, { "clip_ratio/high_max": 0.002065342774585588, "clip_ratio/high_mean": 0.000752827079850249, "clip_ratio/low_mean": 0.0005871303965250263, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013399574636423495, "epoch": 7.783902012248469, "grad_norm": 0.17067955434322357, "learning_rate": 1e-06, "loss": -0.0212, "step": 1512 }, { "clip_ratio/high_max": 0.002073963805742096, "clip_ratio/high_mean": 0.0007970221668074373, "clip_ratio/low_mean": 0.0006472511922765989, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014442733954638243, "epoch": 7.78856809565471, "grad_norm": 0.1597839593887329, "learning_rate": 1e-06, "loss": -0.0176, "step": 1513 }, { "clip_ratio/high_max": 0.0021184734687267337, "clip_ratio/high_mean": 0.0008468701234960463, "clip_ratio/low_mean": 0.0006247014016480534, "clip_ratio/low_min": 1.6543144738534465e-05, "clip_ratio/region_mean": 0.001471571515139658, "epoch": 7.793234179060951, "grad_norm": 0.1552352011203766, "learning_rate": 1e-06, "loss": -0.0401, "step": 1514 }, { "clip_ratio/high_max": 0.001943274319273769, "clip_ratio/high_mean": 0.0007048133215903363, "clip_ratio/low_mean": 0.0006485898538812762, "clip_ratio/low_min": 3.170631953253178e-05, "clip_ratio/region_mean": 0.001353403187749791, "epoch": 7.797900262467191, "grad_norm": 0.1676725596189499, "learning_rate": 1e-06, "loss": 0.016, "step": 1515 }, { "clip_ratio/high_max": 0.002463613374857232, "clip_ratio/high_mean": 0.0009564288939145627, "clip_ratio/low_mean": 0.0007616740522280452, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0017181029325001873, "epoch": 7.802566345873433, "grad_norm": 0.1631651669740677, "learning_rate": 1e-06, "loss": -0.0618, "step": 1516 }, { "clip_ratio/high_max": 0.0024549139125156216, "clip_ratio/high_mean": 0.0010258428337692749, "clip_ratio/low_mean": 0.0007004829876677832, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0017263258268940262, "epoch": 7.807232429279673, "grad_norm": 0.24222761392593384, "learning_rate": 1e-06, "loss": -0.0638, "step": 1517 }, { "clip_ratio/high_max": 0.0028086554957553744, "clip_ratio/high_mean": 0.0011056260191253386, "clip_ratio/low_mean": 0.0006971920656724251, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0018028180493274704, "epoch": 7.811898512685914, "grad_norm": 0.16327685117721558, "learning_rate": 1e-06, "loss": -0.0739, "step": 1518 }, { "clip_ratio/high_max": 0.0028611213856493123, "clip_ratio/high_mean": 0.000996094074253051, "clip_ratio/low_mean": 0.0008683631622261601, "clip_ratio/low_min": 1.674256600381341e-05, "clip_ratio/region_mean": 0.0018644572010089178, "epoch": 7.816564596092155, "grad_norm": 0.17864589393138885, "learning_rate": 1e-06, "loss": -0.0213, "step": 1519 }, { "clip_ratio/high_max": 0.00227695504509029, "clip_ratio/high_mean": 0.0010404180320620071, "clip_ratio/low_mean": 0.000932800197006145, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0019732182008738164, "epoch": 7.821230679498396, "grad_norm": 0.16785147786140442, "learning_rate": 1e-06, "loss": -0.0632, "step": 1520 }, { "clip_ratio/high_max": 0.005287787120323628, "clip_ratio/high_mean": 0.0020277066905691754, "clip_ratio/low_mean": 0.0016498571385454852, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0036775638873223215, "epoch": 7.8258967629046365, "grad_norm": 0.09306744486093521, "learning_rate": 1e-06, "loss": -0.033, "step": 1521 }, { "clip_ratio/high_max": 0.004476631336729042, "clip_ratio/high_mean": 0.0017428647479391657, "clip_ratio/low_mean": 0.001546897921798518, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0032897626952035353, "epoch": 7.830562846310878, "grad_norm": 0.11428961157798767, "learning_rate": 1e-06, "loss": -0.0255, "step": 1522 }, { "clip_ratio/high_max": 0.005534860538318753, "clip_ratio/high_mean": 0.0020855005423072726, "clip_ratio/low_mean": 0.0016807865795271937, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.003766287147300318, "epoch": 7.835228929717118, "grad_norm": 0.13048382103443146, "learning_rate": 1e-06, "loss": -0.0717, "step": 1523 }, { "clip_ratio/high_max": 0.005165198905160651, "clip_ratio/high_mean": 0.0018588128732517362, "clip_ratio/low_mean": 0.0020974801445845515, "clip_ratio/low_min": 9.300595411332324e-05, "clip_ratio/region_mean": 0.003956292959628627, "epoch": 7.83989501312336, "grad_norm": 0.12358823418617249, "learning_rate": 1e-06, "loss": 0.0008, "step": 1524 }, { "clip_ratio/high_max": 0.004943988133163657, "clip_ratio/high_mean": 0.0020040498311573174, "clip_ratio/low_mean": 0.0015913580173219088, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.003595407833927311, "epoch": 7.844561096529601, "grad_norm": 0.11490639299154282, "learning_rate": 1e-06, "loss": -0.0915, "step": 1525 }, { "clip_ratio/high_max": 0.005245728782028891, "clip_ratio/high_mean": 0.0020393589438754134, "clip_ratio/low_mean": 0.001678700245975051, "clip_ratio/low_min": 8.982466533780098e-05, "clip_ratio/region_mean": 0.0037180591316428035, "epoch": 7.849227179935841, "grad_norm": 0.13696928322315216, "learning_rate": 1e-06, "loss": -0.0598, "step": 1526 }, { "clip_ratio/high_max": 0.0034813763631973416, "clip_ratio/high_mean": 0.0015270351323124487, "clip_ratio/low_mean": 0.0020417072737473063, "clip_ratio/low_min": 0.00010003653369494714, "clip_ratio/region_mean": 0.00356874248245731, "epoch": 7.853893263342083, "grad_norm": 0.10824873298406601, "learning_rate": 1e-06, "loss": 0.0092, "step": 1527 }, { "clip_ratio/high_max": 0.00502075794065604, "clip_ratio/high_mean": 0.001747989321302157, "clip_ratio/low_mean": 0.0018746527634903032, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.00362264207797125, "epoch": 7.858559346748323, "grad_norm": 0.10474837571382523, "learning_rate": 1e-06, "loss": -0.0221, "step": 1528 }, { "clip_ratio/high_max": 0.0041661158902570605, "clip_ratio/high_mean": 0.0016701023087080102, "clip_ratio/low_mean": 0.001712045577733079, "clip_ratio/low_min": 5.383290408644825e-05, "clip_ratio/region_mean": 0.003382147966476623, "epoch": 7.863225430154564, "grad_norm": 0.12958404421806335, "learning_rate": 1e-06, "loss": -0.0182, "step": 1529 }, { "clip_ratio/high_max": 0.004341957544966135, "clip_ratio/high_mean": 0.0017415515503671486, "clip_ratio/low_mean": 0.0016735713061279966, "clip_ratio/low_min": 6.16735705989413e-05, "clip_ratio/region_mean": 0.0034151228464907035, "epoch": 7.867891513560805, "grad_norm": 0.1064004898071289, "learning_rate": 1e-06, "loss": -0.0408, "step": 1530 }, { "clip_ratio/high_max": 0.004208986523735803, "clip_ratio/high_mean": 0.0014789962715440197, "clip_ratio/low_mean": 0.0017891750176204368, "clip_ratio/low_min": 5.764903289673384e-05, "clip_ratio/region_mean": 0.0032681712473277003, "epoch": 7.872557596967046, "grad_norm": 0.10802465677261353, "learning_rate": 1e-06, "loss": 0.0152, "step": 1531 }, { "clip_ratio/high_max": 0.004379143705591559, "clip_ratio/high_mean": 0.0018261924305988941, "clip_ratio/low_mean": 0.0016290333787765121, "clip_ratio/low_min": 3.2688283681636676e-05, "clip_ratio/region_mean": 0.003455225822108332, "epoch": 7.8772236803732865, "grad_norm": 0.1274808943271637, "learning_rate": 1e-06, "loss": -0.0625, "step": 1532 }, { "clip_ratio/high_max": 0.00491440951009281, "clip_ratio/high_mean": 0.0020928465128235985, "clip_ratio/low_mean": 0.00162763443950098, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0037204809195827693, "epoch": 7.881889763779528, "grad_norm": 0.12007849663496017, "learning_rate": 1e-06, "loss": -0.0649, "step": 1533 }, { "clip_ratio/high_max": 0.005424270071671344, "clip_ratio/high_mean": 0.002143973426427692, "clip_ratio/low_mean": 0.0017188498586619971, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0038628233887720853, "epoch": 7.886555847185768, "grad_norm": 0.11042458564043045, "learning_rate": 1e-06, "loss": -0.0748, "step": 1534 }, { "clip_ratio/high_max": 0.004832319049455691, "clip_ratio/high_mean": 0.0018328824280615663, "clip_ratio/low_mean": 0.002159742532967357, "clip_ratio/low_min": 0.00013025767111685127, "clip_ratio/region_mean": 0.003992625017417595, "epoch": 7.8912219305920095, "grad_norm": 0.11553628742694855, "learning_rate": 1e-06, "loss": -0.0223, "step": 1535 }, { "clip_ratio/high_max": 0.004251797858159989, "clip_ratio/high_mean": 0.0017704329166008392, "clip_ratio/low_mean": 0.001913725460326532, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0036841584151261486, "epoch": 7.89588801399825, "grad_norm": 0.11286855489015579, "learning_rate": 1e-06, "loss": -0.064, "step": 1536 }, { "epoch": 7.89588801399825, "step": 1536, "total_flos": 0.0, "train_loss": -0.017879978650705047, "train_runtime": 58472.4999, "train_samples_per_second": 24.518, "train_steps_per_second": 0.027 } ], "logging_steps": 1, "max_steps": 1600, "num_input_tokens_seen": 430715788, "num_train_epochs": 8, "save_steps": 160, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }