{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 0.14933296000093332, "eval_steps": 500, "global_step": 1600, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.013706752232142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 4096.0, "completions/mean_length": 601.608154296875, "completions/mean_terminated_length": 553.045654296875, "completions/min_length": 2.0, "completions/min_terminated_length": 2.0, "epoch": 9.333310000058333e-05, "grad_norm": 0.1617286503314972, "learning_rate": 1e-06, "loss": 0.0659, "num_tokens": 80419305.0, "reward": 0.4935739040374756, "reward_std": 0.26311954855918884, "rewards/simpleverify_reward/mean": 0.4935738742351532, "rewards/simpleverify_reward/std": 0.49996092915534973, "step": 1 }, { "clip_ratio/high_max": 0.0021018415718572214, "clip_ratio/high_mean": 0.0008761105800658697, "clip_ratio/low_mean": 0.0006582708429050399, "clip_ratio/low_min": 3.986312549386639e-05, "clip_ratio/region_mean": 0.0015343814302468672, "epoch": 0.00018666620000116666, "grad_norm": 0.13120125234127045, "learning_rate": 1e-06, "loss": 0.0166, "step": 2 }, { "clip_ratio/high_max": 0.0025242570336558856, "clip_ratio/high_mean": 0.0010744326500571333, "clip_ratio/low_mean": 0.000577790844545234, "clip_ratio/low_min": 5.2952727401134325e-05, "clip_ratio/region_mean": 0.001652223498240346, "epoch": 0.00027999930000175, "grad_norm": 0.13642381131649017, "learning_rate": 1e-06, "loss": -0.0195, "step": 3 }, { "clip_ratio/high_max": 0.0025384860928170383, "clip_ratio/high_mean": 0.0010642884444678202, "clip_ratio/low_mean": 0.0007844070605642628, "clip_ratio/low_min": 6.364297132677166e-05, "clip_ratio/region_mean": 0.0018486955013941042, "epoch": 0.0003733324000023333, "grad_norm": 0.13279423117637634, "learning_rate": 1e-06, "loss": 0.029, "step": 4 }, { "clip_ratio/high_max": 0.002351232928049285, "clip_ratio/high_mean": 0.0009795452569960617, "clip_ratio/low_mean": 0.000770336386267445, "clip_ratio/low_min": 7.805315362929832e-05, "clip_ratio/region_mean": 0.0017498816378065385, "epoch": 0.0004666655000029167, "grad_norm": 0.1236974447965622, "learning_rate": 1e-06, "loss": 0.0171, "step": 5 }, { "clip_ratio/high_max": 0.00210517428786261, "clip_ratio/high_mean": 0.0010023075574281393, "clip_ratio/low_mean": 0.0011638228534138761, "clip_ratio/low_min": 0.0002580384380053147, "clip_ratio/region_mean": 0.002166130449040793, "epoch": 0.0005599986000035, "grad_norm": 0.12330015003681183, "learning_rate": 1e-06, "loss": 0.0276, "step": 6 }, { "clip_ratio/high_max": 0.002245760704681743, "clip_ratio/high_mean": 0.0010316514126316179, "clip_ratio/low_mean": 0.001184837798064109, "clip_ratio/low_min": 0.00031664362677474855, "clip_ratio/region_mean": 0.002216489185229875, "epoch": 0.0006533317000040833, "grad_norm": 0.13061660528182983, "learning_rate": 1e-06, "loss": 0.0615, "step": 7 }, { "clip_ratio/high_max": 0.002292564146046061, "clip_ratio/high_mean": 0.0010635452599672135, "clip_ratio/low_mean": 0.0011019938629033277, "clip_ratio/low_min": 0.000150440389461437, "clip_ratio/region_mean": 0.0021655391246895306, "epoch": 0.0007466648000046666, "grad_norm": 0.12682916224002838, "learning_rate": 1e-06, "loss": 0.0411, "step": 8 }, { "clip_ratio/high_max": 0.002415546106931288, "clip_ratio/high_mean": 0.0010379563664173475, "clip_ratio/low_mean": 0.0010673627039068379, "clip_ratio/low_min": 0.0001500330990893417, "clip_ratio/region_mean": 0.002105319101247005, "epoch": 0.00083999790000525, "grad_norm": 0.12642963230609894, "learning_rate": 1e-06, "loss": 0.0212, "step": 9 }, { "clip_ratio/high_max": 0.0022386027485481463, "clip_ratio/high_mean": 0.0010087977134389803, "clip_ratio/low_mean": 0.001090410052711377, "clip_ratio/low_min": 0.0001997013214349863, "clip_ratio/region_mean": 0.0020992077188566327, "epoch": 0.0009333310000058334, "grad_norm": 0.12079668790102005, "learning_rate": 1e-06, "loss": 0.0252, "step": 10 }, { "clip_ratio/high_max": 0.0022823772014817223, "clip_ratio/high_mean": 0.0010938989689748269, "clip_ratio/low_mean": 0.001060018928910722, "clip_ratio/low_min": 0.00016260559914371697, "clip_ratio/region_mean": 0.002153917906980496, "epoch": 0.0010266641000064166, "grad_norm": 0.13344547152519226, "learning_rate": 1e-06, "loss": -0.0337, "step": 11 }, { "clip_ratio/high_max": 0.0023792159336153418, "clip_ratio/high_mean": 0.0011063774581998587, "clip_ratio/low_mean": 0.001029559462040197, "clip_ratio/low_min": 0.0001911046219902346, "clip_ratio/region_mean": 0.0021359368911362253, "epoch": 0.001119997200007, "grad_norm": 0.12550796568393707, "learning_rate": 1e-06, "loss": 0.0236, "step": 12 }, { "clip_ratio/high_max": 0.0021791565377498046, "clip_ratio/high_mean": 0.0010023262584581971, "clip_ratio/low_mean": 0.001116699004342081, "clip_ratio/low_min": 0.00015036796776257688, "clip_ratio/region_mean": 0.0021190252009546384, "epoch": 0.0012133303000075833, "grad_norm": 0.10936053842306137, "learning_rate": 1e-06, "loss": 0.0252, "step": 13 }, { "clip_ratio/high_max": 0.002366712418734096, "clip_ratio/high_mean": 0.0010922391484200489, "clip_ratio/low_mean": 0.0010556360975897405, "clip_ratio/low_min": 0.00016136068188643549, "clip_ratio/region_mean": 0.0021478752678376622, "epoch": 0.0013066634000081666, "grad_norm": 0.11846575140953064, "learning_rate": 1e-06, "loss": -0.0137, "step": 14 }, { "clip_ratio/high_max": 0.002180356503231451, "clip_ratio/high_mean": 0.0011556230310816318, "clip_ratio/low_mean": 0.0012274282853468321, "clip_ratio/low_min": 0.00022236318000068422, "clip_ratio/region_mean": 0.0023830512654967606, "epoch": 0.00139999650000875, "grad_norm": 0.13327762484550476, "learning_rate": 1e-06, "loss": -0.005, "step": 15 }, { "clip_ratio/high_max": 0.0025269318866776302, "clip_ratio/high_mean": 0.0009510802665317897, "clip_ratio/low_mean": 0.0011573204064916354, "clip_ratio/low_min": 0.0001506609914940782, "clip_ratio/region_mean": 0.0021084006220917217, "epoch": 0.0014933296000093333, "grad_norm": 0.1190929263830185, "learning_rate": 1e-06, "loss": 0.061, "step": 16 }, { "clip_ratio/high_max": 0.0024306618215632625, "clip_ratio/high_mean": 0.0009805159934330732, "clip_ratio/low_mean": 0.0010411641524115112, "clip_ratio/low_min": 6.725791308781481e-05, "clip_ratio/region_mean": 0.0020216801203787327, "epoch": 0.0015866627000099165, "grad_norm": 0.12228909134864807, "learning_rate": 1e-06, "loss": 0.0356, "step": 17 }, { "clip_ratio/high_max": 0.002325063502212288, "clip_ratio/high_mean": 0.0010392249423603062, "clip_ratio/low_mean": 0.0011159386403960525, "clip_ratio/low_min": 0.000262607077274879, "clip_ratio/region_mean": 0.002155163587303832, "epoch": 0.0016799958000105, "grad_norm": 0.29469606280326843, "learning_rate": 1e-06, "loss": 0.0383, "step": 18 }, { "clip_ratio/high_max": 0.0020973205209884327, "clip_ratio/high_mean": 0.0009110002392844763, "clip_ratio/low_mean": 0.00112293640995631, "clip_ratio/low_min": 0.0002131746878148988, "clip_ratio/region_mean": 0.0020339366019470617, "epoch": 0.0017733289000110833, "grad_norm": 0.11302761733531952, "learning_rate": 1e-06, "loss": 0.0502, "step": 19 }, { "clip_ratio/high_max": 0.002688774446141906, "clip_ratio/high_mean": 0.0012194520895718597, "clip_ratio/low_mean": 0.0012293192157812882, "clip_ratio/low_min": 0.00025207292401319137, "clip_ratio/region_mean": 0.002448771381750703, "epoch": 0.0018666620000116667, "grad_norm": 0.1296168863773346, "learning_rate": 1e-06, "loss": 0.0095, "step": 20 }, { "clip_ratio/high_max": 0.0024523598622181453, "clip_ratio/high_mean": 0.0010830594583239872, "clip_ratio/low_mean": 0.0011122717642138014, "clip_ratio/low_min": 8.203778361348668e-05, "clip_ratio/region_mean": 0.0021953312098048627, "epoch": 0.00195999510001225, "grad_norm": 0.11740437150001526, "learning_rate": 1e-06, "loss": 0.037, "step": 21 }, { "clip_ratio/high_max": 0.002164478093618527, "clip_ratio/high_mean": 0.0010168895387323573, "clip_ratio/low_mean": 0.001135280243033776, "clip_ratio/low_min": 0.00018341509257879807, "clip_ratio/region_mean": 0.00215216979268007, "epoch": 0.002053328200012833, "grad_norm": 0.12829430401325226, "learning_rate": 1e-06, "loss": 0.0506, "step": 22 }, { "clip_ratio/high_max": 0.002617655773065053, "clip_ratio/high_mean": 0.0011648389627225697, "clip_ratio/low_mean": 0.001181104267743649, "clip_ratio/low_min": 0.00019392528702155687, "clip_ratio/region_mean": 0.0023459432486561127, "epoch": 0.0021466613000134167, "grad_norm": 0.13261590898036957, "learning_rate": 1e-06, "loss": -0.0158, "step": 23 }, { "clip_ratio/high_max": 0.002484638462192379, "clip_ratio/high_mean": 0.0011100026458734646, "clip_ratio/low_mean": 0.0011723230418283492, "clip_ratio/low_min": 0.00020529584980977233, "clip_ratio/region_mean": 0.0022823256804258563, "epoch": 0.002239994400014, "grad_norm": 0.12374509125947952, "learning_rate": 1e-06, "loss": -0.0211, "step": 24 }, { "clip_ratio/high_max": 0.002413399124634452, "clip_ratio/high_mean": 0.0010723094710556325, "clip_ratio/low_mean": 0.0010932834193226881, "clip_ratio/low_min": 0.0001882707147160545, "clip_ratio/region_mean": 0.0021655928358086385, "epoch": 0.002333327500014583, "grad_norm": 0.12279490381479263, "learning_rate": 1e-06, "loss": 0.0228, "step": 25 }, { "clip_ratio/high_max": 0.0025048256866284646, "clip_ratio/high_mean": 0.0011354289417795371, "clip_ratio/low_mean": 0.001106794576116954, "clip_ratio/low_min": 0.00020337846490292577, "clip_ratio/region_mean": 0.0022422235415433533, "epoch": 0.0024266606000151666, "grad_norm": 0.11415532231330872, "learning_rate": 1e-06, "loss": -0.034, "step": 26 }, { "clip_ratio/high_max": 0.0025189682783093303, "clip_ratio/high_mean": 0.0010854579959413968, "clip_ratio/low_mean": 0.0011868625442730263, "clip_ratio/low_min": 0.00022448066738434136, "clip_ratio/region_mean": 0.0022723205474903807, "epoch": 0.00251999370001575, "grad_norm": 0.12822604179382324, "learning_rate": 1e-06, "loss": -0.0113, "step": 27 }, { "clip_ratio/high_max": 0.0028029351378791034, "clip_ratio/high_mean": 0.0012144052852818277, "clip_ratio/low_mean": 0.00119122615615197, "clip_ratio/low_min": 9.276076889364049e-05, "clip_ratio/region_mean": 0.0024056313632172532, "epoch": 0.002613326800016333, "grad_norm": 0.13119523227214813, "learning_rate": 1e-06, "loss": -0.0247, "step": 28 }, { "clip_ratio/high_max": 0.0023764651850797236, "clip_ratio/high_mean": 0.0010464549268363044, "clip_ratio/low_mean": 0.0011260242499702144, "clip_ratio/low_min": 0.00011438582987466361, "clip_ratio/region_mean": 0.002172479151340667, "epoch": 0.0027066599000169166, "grad_norm": 0.12753500044345856, "learning_rate": 1e-06, "loss": 0.0274, "step": 29 }, { "clip_ratio/high_max": 0.002228257624665275, "clip_ratio/high_mean": 0.000980409078692901, "clip_ratio/low_mean": 0.0010730762915045489, "clip_ratio/low_min": 0.00014890969669068, "clip_ratio/region_mean": 0.0020534853829303756, "epoch": 0.0027999930000175, "grad_norm": 0.11440373957157135, "learning_rate": 1e-06, "loss": 0.0488, "step": 30 }, { "clip_ratio/high_max": 0.0028200717933941633, "clip_ratio/high_mean": 0.0012030283360218164, "clip_ratio/low_mean": 0.0012063452377333306, "clip_ratio/low_min": 0.00022382396673492622, "clip_ratio/region_mean": 0.002409373570117168, "epoch": 0.002893326100018083, "grad_norm": 0.12738914787769318, "learning_rate": 1e-06, "loss": 0.0389, "step": 31 }, { "clip_ratio/high_max": 0.0023557710701425094, "clip_ratio/high_mean": 0.0011099687735622865, "clip_ratio/low_mean": 0.0012141624465584755, "clip_ratio/low_min": 0.0001383327376061061, "clip_ratio/region_mean": 0.0023241312301252037, "epoch": 0.0029866592000186666, "grad_norm": 0.1252320408821106, "learning_rate": 1e-06, "loss": 0.0197, "step": 32 }, { "clip_ratio/high_max": 0.002819161250954494, "clip_ratio/high_mean": 0.0012134340831835289, "clip_ratio/low_mean": 0.001090383291739272, "clip_ratio/low_min": 0.00010541199026192771, "clip_ratio/region_mean": 0.002303817331267055, "epoch": 0.00307999230001925, "grad_norm": 0.1348554641008377, "learning_rate": 1e-06, "loss": -0.0516, "step": 33 }, { "clip_ratio/high_max": 0.002043310392764397, "clip_ratio/high_mean": 0.000896439072676003, "clip_ratio/low_mean": 0.0010872985585592687, "clip_ratio/low_min": 0.00016304017844959162, "clip_ratio/region_mean": 0.0019837376166833565, "epoch": 0.003173325400019833, "grad_norm": 0.1039976105093956, "learning_rate": 1e-06, "loss": 0.0273, "step": 34 }, { "clip_ratio/high_max": 0.0020466843379836064, "clip_ratio/high_mean": 0.0009615424060029909, "clip_ratio/low_mean": 0.0010886559557548026, "clip_ratio/low_min": 0.0002417429041088326, "clip_ratio/region_mean": 0.0020501983308349736, "epoch": 0.0032666585000204165, "grad_norm": 0.12059943377971649, "learning_rate": 1e-06, "loss": -0.0128, "step": 35 }, { "clip_ratio/high_max": 0.002565672344644554, "clip_ratio/high_mean": 0.001127321447711438, "clip_ratio/low_mean": 0.0012912679703731555, "clip_ratio/low_min": 0.0002091407413900015, "clip_ratio/region_mean": 0.0024185894362744875, "epoch": 0.003359991600021, "grad_norm": 0.12734465301036835, "learning_rate": 1e-06, "loss": 0.0197, "step": 36 }, { "clip_ratio/high_max": 0.002476813635439612, "clip_ratio/high_mean": 0.0011007639332092367, "clip_ratio/low_mean": 0.00113988334487658, "clip_ratio/low_min": 0.00017671908517513657, "clip_ratio/region_mean": 0.0022406472635339014, "epoch": 0.0034533247000215835, "grad_norm": 0.12069344520568848, "learning_rate": 1e-06, "loss": -0.0171, "step": 37 }, { "clip_ratio/high_max": 0.002689410546736326, "clip_ratio/high_mean": 0.001120532033382915, "clip_ratio/low_mean": 0.0012245463603903772, "clip_ratio/low_min": 0.0001022342685246258, "clip_ratio/region_mean": 0.002345078391954303, "epoch": 0.0035466578000221665, "grad_norm": 0.12638184428215027, "learning_rate": 1e-06, "loss": 0.0294, "step": 38 }, { "clip_ratio/high_max": 0.0025230765459127724, "clip_ratio/high_mean": 0.0010956735313811805, "clip_ratio/low_mean": 0.0011703356758516748, "clip_ratio/low_min": 0.00012289282949495828, "clip_ratio/region_mean": 0.002266009214508813, "epoch": 0.00363999090002275, "grad_norm": 0.1270855963230133, "learning_rate": 1e-06, "loss": 0.0253, "step": 39 }, { "clip_ratio/high_max": 0.0021315785343176685, "clip_ratio/high_mean": 0.0009402183604834136, "clip_ratio/low_mean": 0.0013044903535046615, "clip_ratio/low_min": 0.0002609282209959929, "clip_ratio/region_mean": 0.0022447087758337148, "epoch": 0.0037333240000233334, "grad_norm": 0.12138303369283676, "learning_rate": 1e-06, "loss": 0.0693, "step": 40 }, { "clip_ratio/high_max": 0.0025183569159707986, "clip_ratio/high_mean": 0.0010974137749144575, "clip_ratio/low_mean": 0.0011443735093052965, "clip_ratio/low_min": 8.779424115346046e-05, "clip_ratio/region_mean": 0.002241787311504595, "epoch": 0.0038266571000239165, "grad_norm": 0.11889399588108063, "learning_rate": 1e-06, "loss": 0.0092, "step": 41 }, { "clip_ratio/high_max": 0.002495914122846443, "clip_ratio/high_mean": 0.0011805417052528355, "clip_ratio/low_mean": 0.0013795145059702918, "clip_ratio/low_min": 0.00027673101794789545, "clip_ratio/region_mean": 0.002560056178481318, "epoch": 0.0039199902000245, "grad_norm": 0.12510091066360474, "learning_rate": 1e-06, "loss": -0.0059, "step": 42 }, { "clip_ratio/high_max": 0.002712288376642391, "clip_ratio/high_mean": 0.00125330957598635, "clip_ratio/low_mean": 0.0012523688092187513, "clip_ratio/low_min": 0.00017083998500311282, "clip_ratio/region_mean": 0.002505678407032974, "epoch": 0.004013323300025083, "grad_norm": 0.12959682941436768, "learning_rate": 1e-06, "loss": -0.0066, "step": 43 }, { "clip_ratio/high_max": 0.0025484361685812473, "clip_ratio/high_mean": 0.0010958715029119048, "clip_ratio/low_mean": 0.001375765403281548, "clip_ratio/low_min": 0.00032213253143709153, "clip_ratio/region_mean": 0.002471636929840315, "epoch": 0.004106656400025666, "grad_norm": 0.11655577272176743, "learning_rate": 1e-06, "loss": 0.0254, "step": 44 }, { "clip_ratio/high_max": 0.0023462956669391133, "clip_ratio/high_mean": 0.0010477971027285093, "clip_ratio/low_mean": 0.0013792256177111994, "clip_ratio/low_min": 0.00016807036263344344, "clip_ratio/region_mean": 0.002427022722258698, "epoch": 0.00419998950002625, "grad_norm": 0.12113282829523087, "learning_rate": 1e-06, "loss": 0.0709, "step": 45 }, { "clip_ratio/high_max": 0.0027967745627393015, "clip_ratio/high_mean": 0.0010746633743110579, "clip_ratio/low_mean": 0.001387392152537359, "clip_ratio/low_min": 0.00026867872020375216, "clip_ratio/region_mean": 0.0024620555341243744, "epoch": 0.004293322600026833, "grad_norm": 0.12973441183567047, "learning_rate": 1e-06, "loss": 0.0028, "step": 46 }, { "clip_ratio/high_max": 0.00203274275190779, "clip_ratio/high_mean": 0.0009612759522497072, "clip_ratio/low_mean": 0.0012365398688416462, "clip_ratio/low_min": 0.00011934027997995145, "clip_ratio/region_mean": 0.0021978158474666998, "epoch": 0.004386655700027417, "grad_norm": 0.1308363527059555, "learning_rate": 1e-06, "loss": -0.0234, "step": 47 }, { "clip_ratio/high_max": 0.002577153281890787, "clip_ratio/high_mean": 0.0011867203211295418, "clip_ratio/low_mean": 0.001367621840472566, "clip_ratio/low_min": 0.00017779837753550964, "clip_ratio/region_mean": 0.002554342136136256, "epoch": 0.004479988800028, "grad_norm": 0.11871274560689926, "learning_rate": 1e-06, "loss": 0.0074, "step": 48 }, { "clip_ratio/high_max": 0.0024006854655453935, "clip_ratio/high_mean": 0.001045826658810256, "clip_ratio/low_mean": 0.0014536851449520327, "clip_ratio/low_min": 0.00029494047703337856, "clip_ratio/region_mean": 0.0024995118292281404, "epoch": 0.004573321900028583, "grad_norm": 0.11289472877979279, "learning_rate": 1e-06, "loss": 0.0174, "step": 49 }, { "clip_ratio/high_max": 0.0025945085726561956, "clip_ratio/high_mean": 0.0012336107247392647, "clip_ratio/low_mean": 0.0014252222463255748, "clip_ratio/low_min": 0.0002270189115733956, "clip_ratio/region_mean": 0.002658832920133136, "epoch": 0.004666655000029166, "grad_norm": 0.12433332204818726, "learning_rate": 1e-06, "loss": -0.0122, "step": 50 }, { "clip_ratio/high_max": 0.002196036595705664, "clip_ratio/high_mean": 0.0010196405019087251, "clip_ratio/low_mean": 0.0014380335196619853, "clip_ratio/low_min": 0.00020265601779101416, "clip_ratio/region_mean": 0.002457673988828901, "epoch": 0.00475998810002975, "grad_norm": 0.12437108904123306, "learning_rate": 1e-06, "loss": 0.0449, "step": 51 }, { "clip_ratio/high_max": 0.0025174221154884435, "clip_ratio/high_mean": 0.0010574618390819523, "clip_ratio/low_mean": 0.0013778387219645083, "clip_ratio/low_min": 0.00031915157933326554, "clip_ratio/region_mean": 0.0024353005428565666, "epoch": 0.004853321200030333, "grad_norm": 0.1645129770040512, "learning_rate": 1e-06, "loss": 0.0026, "step": 52 }, { "clip_ratio/high_max": 0.0022650664468528703, "clip_ratio/high_mean": 0.000943411752814427, "clip_ratio/low_mean": 0.0014958809442759957, "clip_ratio/low_min": 0.0003346283510836656, "clip_ratio/region_mean": 0.002439292708004359, "epoch": 0.004946654300030917, "grad_norm": 0.1280021220445633, "learning_rate": 1e-06, "loss": 0.0719, "step": 53 }, { "clip_ratio/high_max": 0.002466234873281792, "clip_ratio/high_mean": 0.0010917734871327411, "clip_ratio/low_mean": 0.0015650268687750213, "clip_ratio/low_min": 0.00026476419770915527, "clip_ratio/region_mean": 0.002656800381373614, "epoch": 0.0050399874000315, "grad_norm": 0.1332934945821762, "learning_rate": 1e-06, "loss": 0.0661, "step": 54 }, { "clip_ratio/high_max": 0.0022577394629479386, "clip_ratio/high_mean": 0.0010745605613919906, "clip_ratio/low_mean": 0.001415130405803211, "clip_ratio/low_min": 0.000132281943479029, "clip_ratio/region_mean": 0.002489690996299032, "epoch": 0.005133320500032083, "grad_norm": 0.11874745786190033, "learning_rate": 1e-06, "loss": 0.0106, "step": 55 }, { "clip_ratio/high_max": 0.0022746101822122, "clip_ratio/high_mean": 0.0010714436393755022, "clip_ratio/low_mean": 0.0016711111493350472, "clip_ratio/low_min": 0.00027733490151149454, "clip_ratio/region_mean": 0.0027425547523307614, "epoch": 0.005226653600032666, "grad_norm": 0.12368755787611008, "learning_rate": 1e-06, "loss": 0.0225, "step": 56 }, { "clip_ratio/high_max": 0.002423052217636723, "clip_ratio/high_mean": 0.0010949523020826746, "clip_ratio/low_mean": 0.0013366991515795235, "clip_ratio/low_min": 0.00018049324626190355, "clip_ratio/region_mean": 0.002431651431834325, "epoch": 0.00531998670003325, "grad_norm": 0.1228901818394661, "learning_rate": 1e-06, "loss": -0.0322, "step": 57 }, { "clip_ratio/high_max": 0.002108056600263808, "clip_ratio/high_mean": 0.0010210948930762243, "clip_ratio/low_mean": 0.0015501944471907336, "clip_ratio/low_min": 0.00027131374463351676, "clip_ratio/region_mean": 0.002571289340266958, "epoch": 0.005413319800033833, "grad_norm": 2.066697359085083, "learning_rate": 1e-06, "loss": 0.0542, "step": 58 }, { "clip_ratio/high_max": 0.0020602999720722437, "clip_ratio/high_mean": 0.0009926433140208246, "clip_ratio/low_mean": 0.0016167068461072631, "clip_ratio/low_min": 0.0003623974162110244, "clip_ratio/region_mean": 0.0026093501510331407, "epoch": 0.005506652900034417, "grad_norm": 0.1115637943148613, "learning_rate": 1e-06, "loss": 0.0361, "step": 59 }, { "clip_ratio/high_max": 0.0023626167603651993, "clip_ratio/high_mean": 0.0010147511529794428, "clip_ratio/low_mean": 0.0015369821448985022, "clip_ratio/low_min": 0.00020909743761876598, "clip_ratio/region_mean": 0.002551733297877945, "epoch": 0.005599986000035, "grad_norm": 0.12371265143156052, "learning_rate": 1e-06, "loss": 0.027, "step": 60 }, { "clip_ratio/high_max": 0.003005859201948624, "clip_ratio/high_mean": 0.001278374755202094, "clip_ratio/low_mean": 0.0015204250266833697, "clip_ratio/low_min": 0.00013415809280559188, "clip_ratio/region_mean": 0.002798799738229718, "epoch": 0.005693319100035584, "grad_norm": 0.13408105075359344, "learning_rate": 1e-06, "loss": -0.0094, "step": 61 }, { "clip_ratio/high_max": 0.002455111301969737, "clip_ratio/high_mean": 0.0011070089421991725, "clip_ratio/low_mean": 0.001333669697487494, "clip_ratio/low_min": 0.000334344715156476, "clip_ratio/region_mean": 0.002440678632410709, "epoch": 0.005786652200036166, "grad_norm": 0.12359930574893951, "learning_rate": 1e-06, "loss": 0.0335, "step": 62 }, { "clip_ratio/high_max": 0.002831522113410756, "clip_ratio/high_mean": 0.0011042114783776924, "clip_ratio/low_mean": 0.0015467919256479945, "clip_ratio/low_min": 0.0003798273955908371, "clip_ratio/region_mean": 0.0026510033931117505, "epoch": 0.00587998530003675, "grad_norm": 0.11751088500022888, "learning_rate": 1e-06, "loss": 0.0195, "step": 63 }, { "clip_ratio/high_max": 0.0023007421332295053, "clip_ratio/high_mean": 0.0011200906992598902, "clip_ratio/low_mean": 0.0013016235207032878, "clip_ratio/low_min": 0.00026949078437610297, "clip_ratio/region_mean": 0.002421714219963178, "epoch": 0.005973318400037333, "grad_norm": 0.11680912226438522, "learning_rate": 1e-06, "loss": -0.0031, "step": 64 }, { "clip_ratio/high_max": 0.0023685664709773846, "clip_ratio/high_mean": 0.0011035526640625903, "clip_ratio/low_mean": 0.0016556819427933078, "clip_ratio/low_min": 0.0002963187589557492, "clip_ratio/region_mean": 0.002759234579571057, "epoch": 0.006066651500037917, "grad_norm": 0.12468979507684708, "learning_rate": 1e-06, "loss": -0.0098, "step": 65 }, { "clip_ratio/high_max": 0.002694370625249576, "clip_ratio/high_mean": 0.001144878155173501, "clip_ratio/low_mean": 0.0017936700824066065, "clip_ratio/low_min": 0.0004304626581870252, "clip_ratio/region_mean": 0.0029385482412180863, "epoch": 0.0061599846000385, "grad_norm": 0.13200826942920685, "learning_rate": 1e-06, "loss": 0.036, "step": 66 }, { "clip_ratio/high_max": 0.0025700881524244323, "clip_ratio/high_mean": 0.0011668774495774414, "clip_ratio/low_mean": 0.001772287880157819, "clip_ratio/low_min": 0.00022172380340634845, "clip_ratio/region_mean": 0.0029391653515631333, "epoch": 0.0062533177000390835, "grad_norm": 0.12837441265583038, "learning_rate": 1e-06, "loss": 0.0477, "step": 67 }, { "clip_ratio/high_max": 0.0025398676152690314, "clip_ratio/high_mean": 0.0010521953736315481, "clip_ratio/low_mean": 0.0014053867671464104, "clip_ratio/low_min": 0.00016638472425256623, "clip_ratio/region_mean": 0.0024575820862082765, "epoch": 0.006346650800039666, "grad_norm": 0.12504403293132782, "learning_rate": 1e-06, "loss": 0.0239, "step": 68 }, { "clip_ratio/high_max": 0.002294306097610388, "clip_ratio/high_mean": 0.0010584088922769297, "clip_ratio/low_mean": 0.0017622818013478536, "clip_ratio/low_min": 0.0003273519287176896, "clip_ratio/region_mean": 0.002820690620865207, "epoch": 0.00643998390004025, "grad_norm": 0.12804903090000153, "learning_rate": 1e-06, "loss": 0.0151, "step": 69 }, { "clip_ratio/high_max": 0.002505788674170617, "clip_ratio/high_mean": 0.0011982092582911719, "clip_ratio/low_mean": 0.001765437627909705, "clip_ratio/low_min": 0.0004004208240075968, "clip_ratio/region_mean": 0.0029636468534590676, "epoch": 0.006533317000040833, "grad_norm": 0.1482425183057785, "learning_rate": 1e-06, "loss": -0.0027, "step": 70 }, { "clip_ratio/high_max": 0.002191903164202813, "clip_ratio/high_mean": 0.0010576176973700058, "clip_ratio/low_mean": 0.0016184864616661798, "clip_ratio/low_min": 0.00029498931053240085, "clip_ratio/region_mean": 0.0026761041081044823, "epoch": 0.0066266501000414165, "grad_norm": 0.11269011348485947, "learning_rate": 1e-06, "loss": 0.0245, "step": 71 }, { "clip_ratio/high_max": 0.0023948498055688106, "clip_ratio/high_mean": 0.0011416518391342834, "clip_ratio/low_mean": 0.001558263975312002, "clip_ratio/low_min": 0.0003109849458269309, "clip_ratio/region_mean": 0.002699915821722243, "epoch": 0.006719983200042, "grad_norm": 0.12281899154186249, "learning_rate": 1e-06, "loss": -0.0014, "step": 72 }, { "clip_ratio/high_max": 0.0022177253194968216, "clip_ratio/high_mean": 0.0010973968346661422, "clip_ratio/low_mean": 0.0019604489934863523, "clip_ratio/low_min": 0.00027768588824983453, "clip_ratio/region_mean": 0.003057845853618346, "epoch": 0.0068133163000425835, "grad_norm": 0.1317450851202011, "learning_rate": 1e-06, "loss": 0.034, "step": 73 }, { "clip_ratio/high_max": 0.0022507070752908476, "clip_ratio/high_mean": 0.0009198605457640951, "clip_ratio/low_mean": 0.0015583668646286242, "clip_ratio/low_min": 0.00020644260621338617, "clip_ratio/region_mean": 0.002478227390383836, "epoch": 0.006906649400043167, "grad_norm": 0.12079409509897232, "learning_rate": 1e-06, "loss": 0.0684, "step": 74 }, { "clip_ratio/high_max": 0.0024843343926477246, "clip_ratio/high_mean": 0.0011641666824289132, "clip_ratio/low_mean": 0.001618807582417503, "clip_ratio/low_min": 0.00015905514101177687, "clip_ratio/region_mean": 0.0027829742248286493, "epoch": 0.0069999825000437495, "grad_norm": 0.1247481182217598, "learning_rate": 1e-06, "loss": 0.0319, "step": 75 }, { "clip_ratio/high_max": 0.002368537876463961, "clip_ratio/high_mean": 0.0010069880518130958, "clip_ratio/low_mean": 0.0016674303624313325, "clip_ratio/low_min": 0.0003192582225892693, "clip_ratio/region_mean": 0.0026744183560367674, "epoch": 0.007093315600044333, "grad_norm": 0.12213882058858871, "learning_rate": 1e-06, "loss": 0.0681, "step": 76 }, { "clip_ratio/high_max": 0.002555884471803438, "clip_ratio/high_mean": 0.0010987444838974625, "clip_ratio/low_mean": 0.001698022650089115, "clip_ratio/low_min": 0.00027045703791372944, "clip_ratio/region_mean": 0.0027967671267106198, "epoch": 0.0071866487000449165, "grad_norm": 0.12074452638626099, "learning_rate": 1e-06, "loss": 0.0465, "step": 77 }, { "clip_ratio/high_max": 0.002588353760074824, "clip_ratio/high_mean": 0.001214584946865216, "clip_ratio/low_mean": 0.0015450818609679118, "clip_ratio/low_min": 0.0002666832497197902, "clip_ratio/region_mean": 0.0027596667569014244, "epoch": 0.0072799818000455, "grad_norm": 0.12084449827671051, "learning_rate": 1e-06, "loss": -0.0297, "step": 78 }, { "clip_ratio/high_max": 0.002571704411820974, "clip_ratio/high_mean": 0.0011780707700381754, "clip_ratio/low_mean": 0.0017109721884480678, "clip_ratio/low_min": 0.00018040862050838768, "clip_ratio/region_mean": 0.002889042931201402, "epoch": 0.007373314900046083, "grad_norm": 0.12859264016151428, "learning_rate": 1e-06, "loss": 0.0135, "step": 79 }, { "clip_ratio/high_max": 0.002559574968472589, "clip_ratio/high_mean": 0.0010937461302091833, "clip_ratio/low_mean": 0.0018106154093402438, "clip_ratio/low_min": 0.00022208099017007044, "clip_ratio/region_mean": 0.002904361601395067, "epoch": 0.007466648000046667, "grad_norm": 0.13039356470108032, "learning_rate": 1e-06, "loss": 0.0142, "step": 80 }, { "clip_ratio/high_max": 0.0024957659406936727, "clip_ratio/high_mean": 0.0011449147241364699, "clip_ratio/low_mean": 0.0017148948754766025, "clip_ratio/low_min": 0.00038437886996689485, "clip_ratio/region_mean": 0.0028598095741472207, "epoch": 0.0075599811000472495, "grad_norm": 0.1267310231924057, "learning_rate": 1e-06, "loss": -0.0168, "step": 81 }, { "clip_ratio/high_max": 0.0024354822526220232, "clip_ratio/high_mean": 0.0010882727219723165, "clip_ratio/low_mean": 0.002038623635598924, "clip_ratio/low_min": 0.00044295336192590185, "clip_ratio/region_mean": 0.0031268963575712405, "epoch": 0.007653314200047833, "grad_norm": 0.11634138226509094, "learning_rate": 1e-06, "loss": 0.0617, "step": 82 }, { "clip_ratio/high_max": 0.0024210622650571167, "clip_ratio/high_mean": 0.0011834132055810187, "clip_ratio/low_mean": 0.0018665518437046558, "clip_ratio/low_min": 0.00018165723258789512, "clip_ratio/region_mean": 0.00304996500199195, "epoch": 0.007746647300048416, "grad_norm": 0.11252500861883163, "learning_rate": 1e-06, "loss": 0.0446, "step": 83 }, { "clip_ratio/high_max": 0.0023836860636947677, "clip_ratio/high_mean": 0.0011186951960553415, "clip_ratio/low_mean": 0.0017712138396746013, "clip_ratio/low_min": 0.00030266990324889775, "clip_ratio/region_mean": 0.0028899090248160064, "epoch": 0.007839980400049, "grad_norm": 0.12852348387241364, "learning_rate": 1e-06, "loss": 0.0135, "step": 84 }, { "clip_ratio/high_max": 0.0024267022527055815, "clip_ratio/high_mean": 0.0011336017669236753, "clip_ratio/low_mean": 0.001672986640187446, "clip_ratio/low_min": 0.00027634374146146, "clip_ratio/region_mean": 0.0028065883816452697, "epoch": 0.007933313500049582, "grad_norm": 0.12799330055713654, "learning_rate": 1e-06, "loss": 0.0108, "step": 85 }, { "clip_ratio/high_max": 0.0025601494053262286, "clip_ratio/high_mean": 0.0012128133821533993, "clip_ratio/low_mean": 0.001678594449913362, "clip_ratio/low_min": 1.711391087155789e-05, "clip_ratio/region_mean": 0.0028914077120134607, "epoch": 0.008026646600050166, "grad_norm": 0.13526631891727448, "learning_rate": 1e-06, "loss": -0.0123, "step": 86 }, { "clip_ratio/high_max": 0.0025152130547212437, "clip_ratio/high_mean": 0.00105414194331388, "clip_ratio/low_mean": 0.0017104383950936608, "clip_ratio/low_min": 0.0002389557321293978, "clip_ratio/region_mean": 0.002764580334769562, "epoch": 0.00811997970005075, "grad_norm": 0.12694324553012848, "learning_rate": 1e-06, "loss": 0.0426, "step": 87 }, { "clip_ratio/high_max": 0.002423869627818931, "clip_ratio/high_mean": 0.0010796450042107608, "clip_ratio/low_mean": 0.0016469793336000293, "clip_ratio/low_min": 0.0003427411829761695, "clip_ratio/region_mean": 0.0027266243123449385, "epoch": 0.008213312800051333, "grad_norm": 0.11928199976682663, "learning_rate": 1e-06, "loss": 0.014, "step": 88 }, { "clip_ratio/high_max": 0.0028011376707581803, "clip_ratio/high_mean": 0.001124978814914357, "clip_ratio/low_mean": 0.0017385409446433187, "clip_ratio/low_min": 0.0004417475101945456, "clip_ratio/region_mean": 0.0028635197231778875, "epoch": 0.008306645900051916, "grad_norm": 0.12897472083568573, "learning_rate": 1e-06, "loss": 0.0174, "step": 89 }, { "clip_ratio/high_max": 0.00299468948651338, "clip_ratio/high_mean": 0.0012471801856008824, "clip_ratio/low_mean": 0.0015908674686215818, "clip_ratio/low_min": 0.00015413759319926612, "clip_ratio/region_mean": 0.002838047694240231, "epoch": 0.0083999790000525, "grad_norm": 0.1397995501756668, "learning_rate": 1e-06, "loss": 0.0509, "step": 90 }, { "clip_ratio/high_max": 0.0024705522664589807, "clip_ratio/high_mean": 0.0011441953447501874, "clip_ratio/low_mean": 0.0015895076357992366, "clip_ratio/low_min": 0.00030276219695224427, "clip_ratio/region_mean": 0.0027337030405760743, "epoch": 0.008493312100053083, "grad_norm": 0.1318233609199524, "learning_rate": 1e-06, "loss": 0.0043, "step": 91 }, { "clip_ratio/high_max": 0.002432436573144514, "clip_ratio/high_mean": 0.0011215947088203393, "clip_ratio/low_mean": 0.0016778742101450916, "clip_ratio/low_min": 0.00024281909190904116, "clip_ratio/region_mean": 0.002799469046294689, "epoch": 0.008586645200053667, "grad_norm": 0.12331629544496536, "learning_rate": 1e-06, "loss": 0.0251, "step": 92 }, { "clip_ratio/high_max": 0.0027539411166799255, "clip_ratio/high_mean": 0.0012068548494426068, "clip_ratio/low_mean": 0.001572672488691751, "clip_ratio/low_min": 0.00018596320569486124, "clip_ratio/region_mean": 0.0027795273199444637, "epoch": 0.00867997830005425, "grad_norm": 0.11946596205234528, "learning_rate": 1e-06, "loss": 0.0072, "step": 93 }, { "clip_ratio/high_max": 0.002665993459231686, "clip_ratio/high_mean": 0.001243518836417934, "clip_ratio/low_mean": 0.0016472251300001517, "clip_ratio/low_min": 0.00028363862384139793, "clip_ratio/region_mean": 0.0028907440428156406, "epoch": 0.008773311400054834, "grad_norm": 0.12517011165618896, "learning_rate": 1e-06, "loss": -0.0061, "step": 94 }, { "clip_ratio/high_max": 0.0022710794837621506, "clip_ratio/high_mean": 0.001101587688026484, "clip_ratio/low_mean": 0.001575810441863723, "clip_ratio/low_min": 0.00016782256079750368, "clip_ratio/region_mean": 0.002677398137166165, "epoch": 0.008866644500055417, "grad_norm": 0.12414906919002533, "learning_rate": 1e-06, "loss": -0.0258, "step": 95 }, { "clip_ratio/high_max": 0.0023688660767220426, "clip_ratio/high_mean": 0.0011477822736196686, "clip_ratio/low_mean": 0.0015816522791283205, "clip_ratio/low_min": 0.00018785280281008454, "clip_ratio/region_mean": 0.002729434512730222, "epoch": 0.008959977600056, "grad_norm": 0.12111297249794006, "learning_rate": 1e-06, "loss": 0.0102, "step": 96 }, { "clip_ratio/high_max": 0.0024623051212984137, "clip_ratio/high_mean": 0.0011101971431344282, "clip_ratio/low_mean": 0.001513851406343747, "clip_ratio/low_min": 0.00027694273103406886, "clip_ratio/region_mean": 0.0026240485749440268, "epoch": 0.009053310700056582, "grad_norm": 0.11820439994335175, "learning_rate": 1e-06, "loss": 0.0104, "step": 97 }, { "clip_ratio/high_max": 0.002735242494964041, "clip_ratio/high_mean": 0.0011144193340442143, "clip_ratio/low_mean": 0.0019397073156142142, "clip_ratio/low_min": 0.00027727985798264854, "clip_ratio/region_mean": 0.0030541266896761954, "epoch": 0.009146643800057166, "grad_norm": 0.11758563667535782, "learning_rate": 1e-06, "loss": -0.0107, "step": 98 }, { "clip_ratio/high_max": 0.0024884922459023073, "clip_ratio/high_mean": 0.001027773903842899, "clip_ratio/low_mean": 0.0019888922324753366, "clip_ratio/low_min": 0.0003608038427955762, "clip_ratio/region_mean": 0.0030166661163093522, "epoch": 0.00923997690005775, "grad_norm": 0.11934468150138855, "learning_rate": 1e-06, "loss": 0.0577, "step": 99 }, { "clip_ratio/high_max": 0.002381966100074351, "clip_ratio/high_mean": 0.0010493788031453732, "clip_ratio/low_mean": 0.0019476753514027223, "clip_ratio/low_min": 0.00017321978066320298, "clip_ratio/region_mean": 0.002997054165462032, "epoch": 0.009333310000058333, "grad_norm": 0.6169129014015198, "learning_rate": 1e-06, "loss": 0.0484, "step": 100 }, { "clip_ratio/high_max": 0.0022514205775223672, "clip_ratio/high_mean": 0.0009954330635082442, "clip_ratio/low_mean": 0.0018879740237025544, "clip_ratio/low_min": 0.00021392679627751932, "clip_ratio/region_mean": 0.002883407098124735, "epoch": 0.009426643100058916, "grad_norm": 0.12227354198694229, "learning_rate": 1e-06, "loss": 0.0531, "step": 101 }, { "clip_ratio/high_max": 0.002587458031484857, "clip_ratio/high_mean": 0.0012456623171601677, "clip_ratio/low_mean": 0.0021699201897718012, "clip_ratio/low_min": 0.00022070783052186016, "clip_ratio/region_mean": 0.0034155824832851067, "epoch": 0.0095199762000595, "grad_norm": 0.13665755093097687, "learning_rate": 1e-06, "loss": 0.0284, "step": 102 }, { "clip_ratio/high_max": 0.002236638232716359, "clip_ratio/high_mean": 0.0010470595807419159, "clip_ratio/low_mean": 0.001851815792178968, "clip_ratio/low_min": 0.00022481873475044267, "clip_ratio/region_mean": 0.0028988753765588626, "epoch": 0.009613309300060083, "grad_norm": 0.12030473351478577, "learning_rate": 1e-06, "loss": 0.018, "step": 103 }, { "clip_ratio/high_max": 0.0022463720888481475, "clip_ratio/high_mean": 0.000988552266790066, "clip_ratio/low_mean": 0.0017420161966583692, "clip_ratio/low_min": 0.0004026728001917945, "clip_ratio/region_mean": 0.0027305684561724775, "epoch": 0.009706642400060667, "grad_norm": 0.12268640846014023, "learning_rate": 1e-06, "loss": 0.0166, "step": 104 }, { "clip_ratio/high_max": 0.002458012240822427, "clip_ratio/high_mean": 0.0010333459977118764, "clip_ratio/low_mean": 0.0020641325318138115, "clip_ratio/low_min": 0.0002868674846467911, "clip_ratio/region_mean": 0.003097478547715582, "epoch": 0.00979997550006125, "grad_norm": 0.12414582818746567, "learning_rate": 1e-06, "loss": 0.0135, "step": 105 }, { "clip_ratio/high_max": 0.0026661057345336303, "clip_ratio/high_mean": 0.0011742309179680888, "clip_ratio/low_mean": 0.002047063160716789, "clip_ratio/low_min": 0.00025208337501680944, "clip_ratio/region_mean": 0.0032212939840974286, "epoch": 0.009893308600061834, "grad_norm": 0.12151814252138138, "learning_rate": 1e-06, "loss": 0.0208, "step": 106 }, { "clip_ratio/high_max": 0.002588125367765315, "clip_ratio/high_mean": 0.0011614342729444616, "clip_ratio/low_mean": 0.001879544579423964, "clip_ratio/low_min": 0.00039051946077961475, "clip_ratio/region_mean": 0.003040978845092468, "epoch": 0.009986641700062417, "grad_norm": 0.12045180052518845, "learning_rate": 1e-06, "loss": 0.0283, "step": 107 }, { "clip_ratio/high_max": 0.002477358590112999, "clip_ratio/high_mean": 0.001163388958957512, "clip_ratio/low_mean": 0.0019053846772294492, "clip_ratio/low_min": 0.00023204635908768978, "clip_ratio/region_mean": 0.0030687737016705796, "epoch": 0.010079974800063, "grad_norm": 0.12709839642047882, "learning_rate": 1e-06, "loss": 0.0223, "step": 108 }, { "clip_ratio/high_max": 0.0024174013669835404, "clip_ratio/high_mean": 0.0011152332481287885, "clip_ratio/low_mean": 0.0019080139172729105, "clip_ratio/low_min": 0.00030921201778255636, "clip_ratio/region_mean": 0.003023247219971381, "epoch": 0.010173307900063584, "grad_norm": 0.12614239752292633, "learning_rate": 1e-06, "loss": 0.031, "step": 109 }, { "clip_ratio/high_max": 0.002114980947226286, "clip_ratio/high_mean": 0.0011003297768183984, "clip_ratio/low_mean": 0.002239013585494831, "clip_ratio/low_min": 0.00033608295962039847, "clip_ratio/region_mean": 0.003339343369589187, "epoch": 0.010266641000064166, "grad_norm": 0.13278627395629883, "learning_rate": 1e-06, "loss": 0.0426, "step": 110 }, { "clip_ratio/high_max": 0.002821992071403656, "clip_ratio/high_mean": 0.001197408088046359, "clip_ratio/low_mean": 0.0019378728247829713, "clip_ratio/low_min": 0.00033207244086952414, "clip_ratio/region_mean": 0.003135280894639436, "epoch": 0.010359974100064749, "grad_norm": 0.12661196291446686, "learning_rate": 1e-06, "loss": 0.0205, "step": 111 }, { "clip_ratio/high_max": 0.0028136772307334468, "clip_ratio/high_mean": 0.0012724134267045883, "clip_ratio/low_mean": 0.001824662227591034, "clip_ratio/low_min": 0.0002584917856438551, "clip_ratio/region_mean": 0.0030970755906309932, "epoch": 0.010453307200065333, "grad_norm": 0.18222671747207642, "learning_rate": 1e-06, "loss": -0.0116, "step": 112 }, { "clip_ratio/high_max": 0.0030055799943511374, "clip_ratio/high_mean": 0.0013276239224069286, "clip_ratio/low_mean": 0.002040242121438496, "clip_ratio/low_min": 0.0003637545923993457, "clip_ratio/region_mean": 0.0033678660402074456, "epoch": 0.010546640300065916, "grad_norm": 0.13751061260700226, "learning_rate": 1e-06, "loss": 0.0025, "step": 113 }, { "clip_ratio/high_max": 0.002621527215524111, "clip_ratio/high_mean": 0.0012256510999577586, "clip_ratio/low_mean": 0.0019793871069850866, "clip_ratio/low_min": 0.0002253668808407383, "clip_ratio/region_mean": 0.003205038323358167, "epoch": 0.0106399734000665, "grad_norm": 0.13413459062576294, "learning_rate": 1e-06, "loss": -0.0018, "step": 114 }, { "clip_ratio/high_max": 0.0028729236510116607, "clip_ratio/high_mean": 0.0012719973692583153, "clip_ratio/low_mean": 0.0018577749287942424, "clip_ratio/low_min": 0.0001991901717701694, "clip_ratio/region_mean": 0.003129772303509526, "epoch": 0.010733306500067083, "grad_norm": 0.13839739561080933, "learning_rate": 1e-06, "loss": 0.0186, "step": 115 }, { "clip_ratio/high_max": 0.0026556626762612723, "clip_ratio/high_mean": 0.0011971744133916218, "clip_ratio/low_mean": 0.001761995896231383, "clip_ratio/low_min": 0.00018820153218257474, "clip_ratio/region_mean": 0.0029591703350888565, "epoch": 0.010826639600067666, "grad_norm": 0.12764184176921844, "learning_rate": 1e-06, "loss": 0.0146, "step": 116 }, { "clip_ratio/high_max": 0.0023235606131493114, "clip_ratio/high_mean": 0.001061090937582776, "clip_ratio/low_mean": 0.002012656448641792, "clip_ratio/low_min": 0.0001853870944614755, "clip_ratio/region_mean": 0.0030737474298803136, "epoch": 0.01091997270006825, "grad_norm": 0.12334010004997253, "learning_rate": 1e-06, "loss": 0.0619, "step": 117 }, { "clip_ratio/high_max": 0.0030490902572637424, "clip_ratio/high_mean": 0.0013991236046422273, "clip_ratio/low_mean": 0.0021201470735832117, "clip_ratio/low_min": 0.0002730680898821447, "clip_ratio/region_mean": 0.003519270714605227, "epoch": 0.011013305800068833, "grad_norm": 0.404316782951355, "learning_rate": 1e-06, "loss": -0.0199, "step": 118 }, { "clip_ratio/high_max": 0.002557805804826785, "clip_ratio/high_mean": 0.0011218647996429354, "clip_ratio/low_mean": 0.0017191165243275464, "clip_ratio/low_min": 0.0001289654164793319, "clip_ratio/region_mean": 0.002840981338522397, "epoch": 0.011106638900069417, "grad_norm": 0.12503401935100555, "learning_rate": 1e-06, "loss": -0.0002, "step": 119 }, { "clip_ratio/high_max": 0.0027102772874059156, "clip_ratio/high_mean": 0.0011512573546497151, "clip_ratio/low_mean": 0.002133196932845749, "clip_ratio/low_min": 0.0004344923236203613, "clip_ratio/region_mean": 0.0032844543602550402, "epoch": 0.01119997200007, "grad_norm": 0.12219876796007156, "learning_rate": 1e-06, "loss": 0.0122, "step": 120 }, { "clip_ratio/high_max": 0.0025009219170897268, "clip_ratio/high_mean": 0.0010768282081698999, "clip_ratio/low_mean": 0.0021460636162373703, "clip_ratio/low_min": 0.0002805102385536884, "clip_ratio/region_mean": 0.003222891842597164, "epoch": 0.011293305100070584, "grad_norm": 0.12973983585834503, "learning_rate": 1e-06, "loss": 0.0052, "step": 121 }, { "clip_ratio/high_max": 0.0025484035577392206, "clip_ratio/high_mean": 0.0011680889656418003, "clip_ratio/low_mean": 0.002039420240180334, "clip_ratio/low_min": 0.000354983488250582, "clip_ratio/region_mean": 0.0032075092458399013, "epoch": 0.011386638200071167, "grad_norm": 0.8162333369255066, "learning_rate": 1e-06, "loss": 0.0448, "step": 122 }, { "clip_ratio/high_max": 0.002766628058452625, "clip_ratio/high_mean": 0.0013111048210703302, "clip_ratio/low_mean": 0.002181306299462449, "clip_ratio/low_min": 0.00037754039840365294, "clip_ratio/region_mean": 0.0034924111532745883, "epoch": 0.011479971300071749, "grad_norm": 0.13874521851539612, "learning_rate": 1e-06, "loss": -0.0215, "step": 123 }, { "clip_ratio/high_max": 0.0030544891633326188, "clip_ratio/high_mean": 0.0013875375298084691, "clip_ratio/low_mean": 0.0020280593271309044, "clip_ratio/low_min": 0.00028534032389870845, "clip_ratio/region_mean": 0.0034155968605773523, "epoch": 0.011573304400072332, "grad_norm": 0.13649602234363556, "learning_rate": 1e-06, "loss": -0.0329, "step": 124 }, { "clip_ratio/high_max": 0.0023659215730731376, "clip_ratio/high_mean": 0.0011312648875900777, "clip_ratio/low_mean": 0.0020022026074002497, "clip_ratio/low_min": 0.00019996420724055497, "clip_ratio/region_mean": 0.0031334675077232532, "epoch": 0.011666637500072916, "grad_norm": 0.12368867546319962, "learning_rate": 1e-06, "loss": 0.0, "step": 125 }, { "clip_ratio/high_max": 0.0024065821125986986, "clip_ratio/high_mean": 0.0011076565897383261, "clip_ratio/low_mean": 0.0019181899551767856, "clip_ratio/low_min": 0.00023527296434622258, "clip_ratio/region_mean": 0.003025846541277133, "epoch": 0.0117599706000735, "grad_norm": 0.13523416221141815, "learning_rate": 1e-06, "loss": 0.027, "step": 126 }, { "clip_ratio/high_max": 0.0024364546334254555, "clip_ratio/high_mean": 0.001143281020631548, "clip_ratio/low_mean": 0.0018475305842002854, "clip_ratio/low_min": 0.0001368756757074152, "clip_ratio/region_mean": 0.002990811612107791, "epoch": 0.011853303700074083, "grad_norm": 0.12084290385246277, "learning_rate": 1e-06, "loss": -0.0238, "step": 127 }, { "clip_ratio/high_max": 0.0024855052542989142, "clip_ratio/high_mean": 0.0010713422670960426, "clip_ratio/low_mean": 0.002105702609696891, "clip_ratio/low_min": 0.000371893281226221, "clip_ratio/region_mean": 0.0031770448258612305, "epoch": 0.011946636800074666, "grad_norm": 0.12129528075456619, "learning_rate": 1e-06, "loss": 0.0562, "step": 128 }, { "clip_ratio/high_max": 0.002161408141546417, "clip_ratio/high_mean": 0.0008277388988062739, "clip_ratio/low_mean": 0.0008564443451177794, "clip_ratio/low_min": 7.493534849345451e-05, "clip_ratio/region_mean": 0.0016841832475620322, "completions/clipped_ratio": 0.013968331473214302, "completions/max_length": 4096.0, "completions/max_terminated_length": 4094.0, "completions/mean_length": 614.7697143554688, "completions/mean_terminated_length": 565.4537963867188, "completions/min_length": 4.0, "completions/min_terminated_length": 4.0, "epoch": 0.01203996990007525, "grad_norm": 0.11894074827432632, "learning_rate": 1e-06, "loss": 0.0127, "num_tokens": 162389510.0, "reward": 0.5611311197280884, "reward_std": 0.20702067017555237, "rewards/simpleverify_reward/mean": 0.5611310601234436, "rewards/simpleverify_reward/std": 0.49625107645988464, "step": 129 }, { "clip_ratio/high_max": 0.0019244481445639394, "clip_ratio/high_mean": 0.0008159040826285491, "clip_ratio/low_mean": 0.0009070359938050387, "clip_ratio/low_min": 4.257251930539496e-05, "clip_ratio/region_mean": 0.0017229400400537997, "epoch": 0.012133303000075833, "grad_norm": 0.09997950494289398, "learning_rate": 1e-06, "loss": 0.0186, "step": 130 }, { "clip_ratio/high_max": 0.001922179748362396, "clip_ratio/high_mean": 0.0007555869933639769, "clip_ratio/low_mean": 0.0009793227145564742, "clip_ratio/low_min": 9.517786565993447e-05, "clip_ratio/region_mean": 0.0017349096815451048, "epoch": 0.012226636100076417, "grad_norm": 0.10746411979198456, "learning_rate": 1e-06, "loss": 0.0676, "step": 131 }, { "clip_ratio/high_max": 0.0025278526372858323, "clip_ratio/high_mean": 0.0009213798912242055, "clip_ratio/low_mean": 0.0008448825083178235, "clip_ratio/low_min": 5.5044874898158014e-05, "clip_ratio/region_mean": 0.0017662623649812303, "epoch": 0.012319969200077, "grad_norm": 0.10569746047258377, "learning_rate": 1e-06, "loss": -0.01, "step": 132 }, { "clip_ratio/high_max": 0.0027123287218273617, "clip_ratio/high_mean": 0.0010814159759320319, "clip_ratio/low_mean": 0.0008209372754208744, "clip_ratio/low_min": 0.00010357579776609782, "clip_ratio/region_mean": 0.0019023532659048215, "epoch": 0.012413302300077584, "grad_norm": 0.10641106963157654, "learning_rate": 1e-06, "loss": -0.0059, "step": 133 }, { "clip_ratio/high_max": 0.0023935708159115165, "clip_ratio/high_mean": 0.0009718834226077888, "clip_ratio/low_mean": 0.000910684108021087, "clip_ratio/low_min": 5.389281614043284e-05, "clip_ratio/region_mean": 0.0018825675433618017, "epoch": 0.012506635400078167, "grad_norm": 0.10697611421346664, "learning_rate": 1e-06, "loss": 0.003, "step": 134 }, { "clip_ratio/high_max": 0.0025179963413393125, "clip_ratio/high_mean": 0.0009670342951721977, "clip_ratio/low_mean": 0.0008859125573508209, "clip_ratio/low_min": 7.011599427642068e-05, "clip_ratio/region_mean": 0.0018529468434280716, "epoch": 0.01259996850007875, "grad_norm": 0.12203706055879593, "learning_rate": 1e-06, "loss": 0.0459, "step": 135 }, { "clip_ratio/high_max": 0.002006823677220382, "clip_ratio/high_mean": 0.0009237128906534053, "clip_ratio/low_mean": 0.0009380030278407503, "clip_ratio/low_min": 8.04917945060879e-05, "clip_ratio/region_mean": 0.001861715929408092, "epoch": 0.012693301600079332, "grad_norm": 0.11572477221488953, "learning_rate": 1e-06, "loss": 0.0349, "step": 136 }, { "clip_ratio/high_max": 0.0022455390280811116, "clip_ratio/high_mean": 0.0009581133708707057, "clip_ratio/low_mean": 0.0008806293171801371, "clip_ratio/low_min": 6.871248660900164e-05, "clip_ratio/region_mean": 0.001838742675317917, "epoch": 0.012786634700079916, "grad_norm": 0.1089639663696289, "learning_rate": 1e-06, "loss": 0.0145, "step": 137 }, { "clip_ratio/high_max": 0.0023253438848769292, "clip_ratio/high_mean": 0.0009870462818071246, "clip_ratio/low_mean": 0.0008008372496988159, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0017878835933515802, "epoch": 0.0128799678000805, "grad_norm": 0.11664681881666183, "learning_rate": 1e-06, "loss": -0.006, "step": 138 }, { "clip_ratio/high_max": 0.002125671318935929, "clip_ratio/high_mean": 0.000952323382080067, "clip_ratio/low_mean": 0.0010867085657082498, "clip_ratio/low_min": 0.00023532289833383402, "clip_ratio/region_mean": 0.0020390319114085287, "epoch": 0.012973300900081083, "grad_norm": 0.11493740975856781, "learning_rate": 1e-06, "loss": 0.0563, "step": 139 }, { "clip_ratio/high_max": 0.0018843048746930435, "clip_ratio/high_mean": 0.0008160637844412122, "clip_ratio/low_mean": 0.0009262105268135201, "clip_ratio/low_min": 6.60446557958494e-05, "clip_ratio/region_mean": 0.0017422743330826052, "epoch": 0.013066634000081666, "grad_norm": 0.10611748695373535, "learning_rate": 1e-06, "loss": 0.0235, "step": 140 }, { "clip_ratio/high_max": 0.002287157425598707, "clip_ratio/high_mean": 0.0009172473055514274, "clip_ratio/low_mean": 0.0010591678183118347, "clip_ratio/low_min": 0.00019992587203887524, "clip_ratio/region_mean": 0.001976415158424061, "epoch": 0.01315996710008225, "grad_norm": 0.11522714048624039, "learning_rate": 1e-06, "loss": 0.0426, "step": 141 }, { "clip_ratio/high_max": 0.001953459155629389, "clip_ratio/high_mean": 0.0008758882304391591, "clip_ratio/low_mean": 0.001051960689437692, "clip_ratio/low_min": 0.00013661285265698098, "clip_ratio/region_mean": 0.001927848905324936, "epoch": 0.013253300200082833, "grad_norm": 0.11635539680719376, "learning_rate": 1e-06, "loss": 0.0604, "step": 142 }, { "clip_ratio/high_max": 0.0019318033482704777, "clip_ratio/high_mean": 0.0007581848458357854, "clip_ratio/low_mean": 0.0011441625974839553, "clip_ratio/low_min": 0.00018216911485069431, "clip_ratio/region_mean": 0.0019023473578272387, "epoch": 0.013346633300083417, "grad_norm": 0.10934232920408249, "learning_rate": 1e-06, "loss": 0.0441, "step": 143 }, { "clip_ratio/high_max": 0.002223715218860889, "clip_ratio/high_mean": 0.0009109926231758436, "clip_ratio/low_mean": 0.0010616847721394151, "clip_ratio/low_min": 0.00012821391919715097, "clip_ratio/region_mean": 0.0019726774335140362, "epoch": 0.013439966400084, "grad_norm": 0.11126284301280975, "learning_rate": 1e-06, "loss": 0.0719, "step": 144 }, { "clip_ratio/high_max": 0.0022930012273718603, "clip_ratio/high_mean": 0.0009078173316083848, "clip_ratio/low_mean": 0.0010419095087854657, "clip_ratio/low_min": 0.0001388745340591413, "clip_ratio/region_mean": 0.0019497268222039565, "epoch": 0.013533299500084583, "grad_norm": 0.11850086599588394, "learning_rate": 1e-06, "loss": 0.0188, "step": 145 }, { "clip_ratio/high_max": 0.0023042570537654683, "clip_ratio/high_mean": 0.0009658703602326568, "clip_ratio/low_mean": 0.0009650740394135937, "clip_ratio/low_min": 8.241498926508939e-05, "clip_ratio/region_mean": 0.001930944352352526, "epoch": 0.013626632600085167, "grad_norm": 0.10880748927593231, "learning_rate": 1e-06, "loss": 0.0257, "step": 146 }, { "clip_ratio/high_max": 0.0021564784183283336, "clip_ratio/high_mean": 0.0009590455920260865, "clip_ratio/low_mean": 0.0009871662841760553, "clip_ratio/low_min": 0.00018428189378028037, "clip_ratio/region_mean": 0.001946211894392036, "epoch": 0.01371996570008575, "grad_norm": 0.11974553018808365, "learning_rate": 1e-06, "loss": -0.0055, "step": 147 }, { "clip_ratio/high_max": 0.0029374732985161245, "clip_ratio/high_mean": 0.001042643802065868, "clip_ratio/low_mean": 0.001084575844288338, "clip_ratio/low_min": 5.8345849538454786e-05, "clip_ratio/region_mean": 0.002127219660906121, "epoch": 0.013813298800086334, "grad_norm": 0.1198730543255806, "learning_rate": 1e-06, "loss": 0.0137, "step": 148 }, { "clip_ratio/high_max": 0.0023015585938992444, "clip_ratio/high_mean": 0.0008549333106202539, "clip_ratio/low_mean": 0.001007696868327912, "clip_ratio/low_min": 4.690584864874836e-05, "clip_ratio/region_mean": 0.0018626301316544414, "epoch": 0.013906631900086916, "grad_norm": 0.3750462830066681, "learning_rate": 1e-06, "loss": 0.0223, "step": 149 }, { "clip_ratio/high_max": 0.0021754269546363503, "clip_ratio/high_mean": 0.0008916510378185194, "clip_ratio/low_mean": 0.0011523259854584467, "clip_ratio/low_min": 5.7523982832208276e-05, "clip_ratio/region_mean": 0.0020439770742086694, "epoch": 0.013999965000087499, "grad_norm": 0.1076037660241127, "learning_rate": 1e-06, "loss": 0.0729, "step": 150 }, { "clip_ratio/high_max": 0.0024364274431718513, "clip_ratio/high_mean": 0.0009684265933174174, "clip_ratio/low_mean": 0.0011742259666789323, "clip_ratio/low_min": 5.952266747044632e-05, "clip_ratio/region_mean": 0.002142652556358371, "epoch": 0.014093298100088083, "grad_norm": 0.11541232466697693, "learning_rate": 1e-06, "loss": 0.0216, "step": 151 }, { "clip_ratio/high_max": 0.0019661002079374157, "clip_ratio/high_mean": 0.0008246558172686491, "clip_ratio/low_mean": 0.0011414500077080447, "clip_ratio/low_min": 0.0001294427815992094, "clip_ratio/region_mean": 0.0019661058468045667, "epoch": 0.014186631200088666, "grad_norm": 0.11190304160118103, "learning_rate": 1e-06, "loss": 0.0752, "step": 152 }, { "clip_ratio/high_max": 0.002053126307146158, "clip_ratio/high_mean": 0.0008752066005399683, "clip_ratio/low_mean": 0.0009766246239450993, "clip_ratio/low_min": 0.00013271671195980161, "clip_ratio/region_mean": 0.0018518312062951736, "epoch": 0.01427996430008925, "grad_norm": 0.10394339263439178, "learning_rate": 1e-06, "loss": 0.0293, "step": 153 }, { "clip_ratio/high_max": 0.001958266308065504, "clip_ratio/high_mean": 0.000847771805638331, "clip_ratio/low_mean": 0.0010144932093680836, "clip_ratio/low_min": 0.00012687309117609402, "clip_ratio/region_mean": 0.0018622650459292345, "epoch": 0.014373297400089833, "grad_norm": 0.10715346038341522, "learning_rate": 1e-06, "loss": 0.0282, "step": 154 }, { "clip_ratio/high_max": 0.0023149186963564716, "clip_ratio/high_mean": 0.0009205482856486924, "clip_ratio/low_mean": 0.0009852970979409292, "clip_ratio/low_min": 9.318105003330857e-05, "clip_ratio/region_mean": 0.0019058454272453673, "epoch": 0.014466630500090416, "grad_norm": 0.1153310164809227, "learning_rate": 1e-06, "loss": 0.0515, "step": 155 }, { "clip_ratio/high_max": 0.0022802790699643083, "clip_ratio/high_mean": 0.0009375233603350352, "clip_ratio/low_mean": 0.001135696209530579, "clip_ratio/low_min": 0.00018112708949047374, "clip_ratio/region_mean": 0.0020732195480377413, "epoch": 0.014559963600091, "grad_norm": 0.11021652817726135, "learning_rate": 1e-06, "loss": 0.0492, "step": 156 }, { "clip_ratio/high_max": 0.002113158334395848, "clip_ratio/high_mean": 0.000859156700244057, "clip_ratio/low_mean": 0.0009488925807090709, "clip_ratio/low_min": 9.812972530198749e-05, "clip_ratio/region_mean": 0.0018080492300214246, "epoch": 0.014653296700091583, "grad_norm": 0.1025896891951561, "learning_rate": 1e-06, "loss": 0.0144, "step": 157 }, { "clip_ratio/high_max": 0.002496098000847269, "clip_ratio/high_mean": 0.0010491368957445957, "clip_ratio/low_mean": 0.0008757588693697471, "clip_ratio/low_min": 5.840846279170364e-05, "clip_ratio/region_mean": 0.0019248957396484911, "epoch": 0.014746629800092167, "grad_norm": 0.11381614953279495, "learning_rate": 1e-06, "loss": 0.0099, "step": 158 }, { "clip_ratio/high_max": 0.002546114192227833, "clip_ratio/high_mean": 0.0009751469042384997, "clip_ratio/low_mean": 0.0010020688314398285, "clip_ratio/low_min": 0.00012872707065980649, "clip_ratio/region_mean": 0.001977215739316307, "epoch": 0.01483996290009275, "grad_norm": 0.1121005266904831, "learning_rate": 1e-06, "loss": 0.061, "step": 159 }, { "clip_ratio/high_max": 0.002150193067791406, "clip_ratio/high_mean": 0.0008916723272704985, "clip_ratio/low_mean": 0.0010205037033301778, "clip_ratio/low_min": 0.00013426712666841922, "clip_ratio/region_mean": 0.0019121760051348247, "epoch": 0.014933296000093334, "grad_norm": 0.12039100378751755, "learning_rate": 1e-06, "loss": 0.0238, "step": 160 }, { "clip_ratio/high_max": 0.002424445243377704, "clip_ratio/high_mean": 0.0010037648717116099, "clip_ratio/low_mean": 0.0010338006359233987, "clip_ratio/low_min": 7.64307069403003e-05, "clip_ratio/region_mean": 0.0020375655221869238, "epoch": 0.015026629100093917, "grad_norm": 0.11659151315689087, "learning_rate": 1e-06, "loss": -0.0014, "step": 161 }, { "clip_ratio/high_max": 0.0021807785524288192, "clip_ratio/high_mean": 0.0008761026565480279, "clip_ratio/low_mean": 0.0010180029348703101, "clip_ratio/low_min": 0.00018196645305579295, "clip_ratio/region_mean": 0.001894105582323391, "epoch": 0.015119962200094499, "grad_norm": 0.11176208406686783, "learning_rate": 1e-06, "loss": 0.0589, "step": 162 }, { "clip_ratio/high_max": 0.001838619944464881, "clip_ratio/high_mean": 0.0008191784290829673, "clip_ratio/low_mean": 0.0009176180592476157, "clip_ratio/low_min": 5.925031837250572e-05, "clip_ratio/region_mean": 0.0017367964755976573, "epoch": 0.015213295300095082, "grad_norm": 0.10197355598211288, "learning_rate": 1e-06, "loss": 0.0355, "step": 163 }, { "clip_ratio/high_max": 0.002138756404747255, "clip_ratio/high_mean": 0.0008771116281423019, "clip_ratio/low_mean": 0.001067933022568468, "clip_ratio/low_min": 8.830692240735516e-05, "clip_ratio/region_mean": 0.0019450446707196534, "epoch": 0.015306628400095666, "grad_norm": 0.26354193687438965, "learning_rate": 1e-06, "loss": 0.057, "step": 164 }, { "clip_ratio/high_max": 0.0022207497313502245, "clip_ratio/high_mean": 0.000880809702721308, "clip_ratio/low_mean": 0.0010350446245865896, "clip_ratio/low_min": 0.00012348507243586937, "clip_ratio/region_mean": 0.001915854329126887, "epoch": 0.01539996150009625, "grad_norm": 0.10693710297346115, "learning_rate": 1e-06, "loss": 0.0462, "step": 165 }, { "clip_ratio/high_max": 0.002169806361052906, "clip_ratio/high_mean": 0.0008588912296545459, "clip_ratio/low_mean": 0.0009284107727580704, "clip_ratio/low_min": 9.478376341576222e-05, "clip_ratio/region_mean": 0.0017873020042316057, "epoch": 0.015493294600096833, "grad_norm": 0.10736197978258133, "learning_rate": 1e-06, "loss": 0.037, "step": 166 }, { "clip_ratio/high_max": 0.0023127159729483537, "clip_ratio/high_mean": 0.0009983401723729912, "clip_ratio/low_mean": 0.0009717591565276962, "clip_ratio/low_min": 0.00011505640941322781, "clip_ratio/region_mean": 0.0019700992852449417, "epoch": 0.015586627700097416, "grad_norm": 0.1165332943201065, "learning_rate": 1e-06, "loss": 0.0066, "step": 167 }, { "clip_ratio/high_max": 0.002206595156167168, "clip_ratio/high_mean": 0.0009195062084472738, "clip_ratio/low_mean": 0.0011236162354180124, "clip_ratio/low_min": 0.00011721985902113374, "clip_ratio/region_mean": 0.002043122469331138, "epoch": 0.015679960800098, "grad_norm": 0.11286017298698425, "learning_rate": 1e-06, "loss": 0.0355, "step": 168 }, { "clip_ratio/high_max": 0.0020838089440076146, "clip_ratio/high_mean": 0.0009937254653777927, "clip_ratio/low_mean": 0.0010102739634021418, "clip_ratio/low_min": 0.00010326492883905303, "clip_ratio/region_mean": 0.002003999368753284, "epoch": 0.015773293900098583, "grad_norm": 0.12594398856163025, "learning_rate": 1e-06, "loss": 0.0394, "step": 169 }, { "clip_ratio/high_max": 0.002377126300416421, "clip_ratio/high_mean": 0.0009301012614741921, "clip_ratio/low_mean": 0.0009329656841146061, "clip_ratio/low_min": 0.00011017421456926968, "clip_ratio/region_mean": 0.0018630669655976817, "epoch": 0.015866627000099165, "grad_norm": 0.11675780266523361, "learning_rate": 1e-06, "loss": -0.0, "step": 170 }, { "clip_ratio/high_max": 0.0020867814382654615, "clip_ratio/high_mean": 0.0008868259756127372, "clip_ratio/low_mean": 0.0010179107489420858, "clip_ratio/low_min": 0.0002070386108243838, "clip_ratio/region_mean": 0.0019047367459279485, "epoch": 0.01595996010009975, "grad_norm": 0.12032227963209152, "learning_rate": 1e-06, "loss": 0.0117, "step": 171 }, { "clip_ratio/high_max": 0.001971932768356055, "clip_ratio/high_mean": 0.0009522500204184325, "clip_ratio/low_mean": 0.0009487866918789223, "clip_ratio/low_min": 4.6684855078638066e-05, "clip_ratio/region_mean": 0.0019010366668226197, "epoch": 0.016053293200100332, "grad_norm": 0.11389598250389099, "learning_rate": 1e-06, "loss": 0.0139, "step": 172 }, { "clip_ratio/high_max": 0.002201709765358828, "clip_ratio/high_mean": 0.0008747073097765679, "clip_ratio/low_mean": 0.0011275410688540433, "clip_ratio/low_min": 0.0001378677898173919, "clip_ratio/region_mean": 0.0020022483586217277, "epoch": 0.016146626300100917, "grad_norm": 0.11421126872301102, "learning_rate": 1e-06, "loss": 0.0531, "step": 173 }, { "clip_ratio/high_max": 0.0022835715499240905, "clip_ratio/high_mean": 0.0009154123854386853, "clip_ratio/low_mean": 0.0008950499250204302, "clip_ratio/low_min": 5.714001599699259e-05, "clip_ratio/region_mean": 0.0018104623231920414, "epoch": 0.0162399594001015, "grad_norm": 0.12231060117483139, "learning_rate": 1e-06, "loss": 0.0477, "step": 174 }, { "clip_ratio/high_max": 0.002472468069754541, "clip_ratio/high_mean": 0.0010857721354113892, "clip_ratio/low_mean": 0.0010049748325400287, "clip_ratio/low_min": 2.7149178094987292e-05, "clip_ratio/region_mean": 0.0020907469479425345, "epoch": 0.016333292500102084, "grad_norm": 0.10905659943819046, "learning_rate": 1e-06, "loss": -0.0006, "step": 175 }, { "clip_ratio/high_max": 0.0022082540308474563, "clip_ratio/high_mean": 0.0008410517675656592, "clip_ratio/low_mean": 0.0009752658534125658, "clip_ratio/low_min": 0.00016588959351793164, "clip_ratio/region_mean": 0.001816317620978225, "epoch": 0.016426625600102666, "grad_norm": 0.10905282944440842, "learning_rate": 1e-06, "loss": 0.0108, "step": 176 }, { "clip_ratio/high_max": 0.002118591142789228, "clip_ratio/high_mean": 0.0009659828519943403, "clip_ratio/low_mean": 0.001117067993618548, "clip_ratio/low_min": 0.00015997291211533593, "clip_ratio/region_mean": 0.0020830508656217717, "epoch": 0.01651995870010325, "grad_norm": 0.10279494524002075, "learning_rate": 1e-06, "loss": 0.0483, "step": 177 }, { "clip_ratio/high_max": 0.0023061638967192266, "clip_ratio/high_mean": 0.00094044086017675, "clip_ratio/low_mean": 0.0011471924299257807, "clip_ratio/low_min": 7.573713992314879e-05, "clip_ratio/region_mean": 0.002087633343762718, "epoch": 0.016613291800103833, "grad_norm": 0.11682261526584625, "learning_rate": 1e-06, "loss": 0.0526, "step": 178 }, { "clip_ratio/high_max": 0.0019308992159494665, "clip_ratio/high_mean": 0.0008092739044514019, "clip_ratio/low_mean": 0.0009166926683974452, "clip_ratio/low_min": 0.00011727929722837871, "clip_ratio/region_mean": 0.0017259665910387412, "epoch": 0.016706624900104418, "grad_norm": 0.11142003536224365, "learning_rate": 1e-06, "loss": 0.019, "step": 179 }, { "clip_ratio/high_max": 0.002123392972862348, "clip_ratio/high_mean": 0.00096100414157263, "clip_ratio/low_mean": 0.0010481630997674074, "clip_ratio/low_min": 2.7603220587479882e-05, "clip_ratio/region_mean": 0.002009167248615995, "epoch": 0.016799958000105, "grad_norm": 0.10944481939077377, "learning_rate": 1e-06, "loss": 0.0443, "step": 180 }, { "clip_ratio/high_max": 0.0022595435148105025, "clip_ratio/high_mean": 0.0009288617347920081, "clip_ratio/low_mean": 0.0014279976403486216, "clip_ratio/low_min": 0.00011325609193590935, "clip_ratio/region_mean": 0.0023568593387608416, "epoch": 0.016893291100105585, "grad_norm": 0.1177840381860733, "learning_rate": 1e-06, "loss": 0.0605, "step": 181 }, { "clip_ratio/high_max": 0.002379469027800951, "clip_ratio/high_mean": 0.0010155923882848583, "clip_ratio/low_mean": 0.0010046198694908526, "clip_ratio/low_min": 8.140969748637872e-05, "clip_ratio/region_mean": 0.0020202122250339016, "epoch": 0.016986624200106167, "grad_norm": 0.1077289804816246, "learning_rate": 1e-06, "loss": -0.0078, "step": 182 }, { "clip_ratio/high_max": 0.002302628017787356, "clip_ratio/high_mean": 0.0008849685782479355, "clip_ratio/low_mean": 0.00109211862218217, "clip_ratio/low_min": 0.00016074363338702824, "clip_ratio/region_mean": 0.0019770871367654763, "epoch": 0.017079957300106748, "grad_norm": 0.2381676435470581, "learning_rate": 1e-06, "loss": 0.0438, "step": 183 }, { "clip_ratio/high_max": 0.0019240915207774378, "clip_ratio/high_mean": 0.0008243355241575046, "clip_ratio/low_mean": 0.0009939919491444016, "clip_ratio/low_min": 3.820173333224375e-05, "clip_ratio/region_mean": 0.0018183274114562664, "epoch": 0.017173290400107333, "grad_norm": 0.12016787379980087, "learning_rate": 1e-06, "loss": 0.0602, "step": 184 }, { "clip_ratio/high_max": 0.0024942127893154975, "clip_ratio/high_mean": 0.0010485043276275974, "clip_ratio/low_mean": 0.0009196217333737877, "clip_ratio/low_min": 9.459688953938894e-05, "clip_ratio/region_mean": 0.0019681260600918904, "epoch": 0.017266623500107915, "grad_norm": 0.09575652331113815, "learning_rate": 1e-06, "loss": -0.0141, "step": 185 }, { "clip_ratio/high_max": 0.002212736442743335, "clip_ratio/high_mean": 0.0009334607493656222, "clip_ratio/low_mean": 0.0010552561016083928, "clip_ratio/low_min": 6.551646765728947e-05, "clip_ratio/region_mean": 0.0019887168455170467, "epoch": 0.0173599566001085, "grad_norm": 0.10698789358139038, "learning_rate": 1e-06, "loss": 0.0436, "step": 186 }, { "clip_ratio/high_max": 0.0022086959615990054, "clip_ratio/high_mean": 0.0009477929706918076, "clip_ratio/low_mean": 0.0010155749187106267, "clip_ratio/low_min": 0.00011371195432730019, "clip_ratio/region_mean": 0.0019633678675745614, "epoch": 0.017453289700109082, "grad_norm": 0.11505325883626938, "learning_rate": 1e-06, "loss": 0.0456, "step": 187 }, { "clip_ratio/high_max": 0.002223488365416415, "clip_ratio/high_mean": 0.0009709603546070866, "clip_ratio/low_mean": 0.0009627137533243513, "clip_ratio/low_min": 5.509776292456081e-05, "clip_ratio/region_mean": 0.001933674058818724, "epoch": 0.017546622800109667, "grad_norm": 0.1694178283214569, "learning_rate": 1e-06, "loss": 0.0172, "step": 188 }, { "clip_ratio/high_max": 0.0022287796964519657, "clip_ratio/high_mean": 0.0009651735381339677, "clip_ratio/low_mean": 0.0009683517419034615, "clip_ratio/low_min": 4.17794562963536e-05, "clip_ratio/region_mean": 0.0019335253236931749, "epoch": 0.01763995590011025, "grad_norm": 0.11302310973405838, "learning_rate": 1e-06, "loss": 0.0375, "step": 189 }, { "clip_ratio/high_max": 0.0021402684069471434, "clip_ratio/high_mean": 0.0008638818180770613, "clip_ratio/low_mean": 0.0008136473952617962, "clip_ratio/low_min": 3.57307362719439e-05, "clip_ratio/region_mean": 0.0016775292278907727, "epoch": 0.017733289000110834, "grad_norm": 0.11362725496292114, "learning_rate": 1e-06, "loss": 0.0204, "step": 190 }, { "clip_ratio/high_max": 0.0021006721726735123, "clip_ratio/high_mean": 0.0009117627378145698, "clip_ratio/low_mean": 0.0009677176021796186, "clip_ratio/low_min": 4.301445369492285e-05, "clip_ratio/region_mean": 0.0018794803945638705, "epoch": 0.017826622100111416, "grad_norm": 0.11704517155885696, "learning_rate": 1e-06, "loss": 0.0209, "step": 191 }, { "clip_ratio/high_max": 0.001939426289027324, "clip_ratio/high_mean": 0.0009004323474073317, "clip_ratio/low_mean": 0.0009891694389807526, "clip_ratio/low_min": 0.0001037706706483732, "clip_ratio/region_mean": 0.0018896018009399995, "epoch": 0.017919955200112, "grad_norm": 0.107717365026474, "learning_rate": 1e-06, "loss": 0.0227, "step": 192 }, { "clip_ratio/high_max": 0.002167451471905224, "clip_ratio/high_mean": 0.0007966815464897081, "clip_ratio/low_mean": 0.0011213892066734843, "clip_ratio/low_min": 5.822916318720672e-05, "clip_ratio/region_mean": 0.0019180707895429805, "epoch": 0.018013288300112583, "grad_norm": 0.11653489619493484, "learning_rate": 1e-06, "loss": 0.0764, "step": 193 }, { "clip_ratio/high_max": 0.0020121706256759353, "clip_ratio/high_mean": 0.0008923529694584431, "clip_ratio/low_mean": 0.0011409554972487967, "clip_ratio/low_min": 0.00020639755166484974, "clip_ratio/region_mean": 0.002033308512181975, "epoch": 0.018106621400113165, "grad_norm": 0.10933557897806168, "learning_rate": 1e-06, "loss": 0.03, "step": 194 }, { "clip_ratio/high_max": 0.001967484855413204, "clip_ratio/high_mean": 0.0008534214885003166, "clip_ratio/low_mean": 0.0010785847280203598, "clip_ratio/low_min": 3.398756052774843e-05, "clip_ratio/region_mean": 0.0019320062710903585, "epoch": 0.01819995450011375, "grad_norm": 0.11805937439203262, "learning_rate": 1e-06, "loss": 0.0371, "step": 195 }, { "clip_ratio/high_max": 0.002049378599622287, "clip_ratio/high_mean": 0.0009569281064614188, "clip_ratio/low_mean": 0.0009143788884102833, "clip_ratio/low_min": 8.688524758326821e-05, "clip_ratio/region_mean": 0.0018713069512159564, "epoch": 0.01829328760011433, "grad_norm": 0.10525800287723541, "learning_rate": 1e-06, "loss": -0.0058, "step": 196 }, { "clip_ratio/high_max": 0.0021906488509557676, "clip_ratio/high_mean": 0.0009428213979845168, "clip_ratio/low_mean": 0.0010363991423218977, "clip_ratio/low_min": 0.00011028917106159497, "clip_ratio/region_mean": 0.0019792204911937006, "epoch": 0.018386620700114917, "grad_norm": 0.11151842772960663, "learning_rate": 1e-06, "loss": 0.0296, "step": 197 }, { "clip_ratio/high_max": 0.0018811374466167763, "clip_ratio/high_mean": 0.0008195067366614239, "clip_ratio/low_mean": 0.0008978890673461137, "clip_ratio/low_min": 6.23137839284027e-05, "clip_ratio/region_mean": 0.0017173958331113681, "epoch": 0.0184799538001155, "grad_norm": 0.10290457308292389, "learning_rate": 1e-06, "loss": 0.0346, "step": 198 }, { "clip_ratio/high_max": 0.00213869426806923, "clip_ratio/high_mean": 0.0008587574375269469, "clip_ratio/low_mean": 0.0010173071113968035, "clip_ratio/low_min": 7.005389124969952e-05, "clip_ratio/region_mean": 0.0018760645689326338, "epoch": 0.018573286900116084, "grad_norm": 0.10898451507091522, "learning_rate": 1e-06, "loss": 0.0355, "step": 199 }, { "clip_ratio/high_max": 0.0020946437398379203, "clip_ratio/high_mean": 0.0008866869247867726, "clip_ratio/low_mean": 0.0011017338583769742, "clip_ratio/low_min": 0.00014374458260135725, "clip_ratio/region_mean": 0.0019884207722498104, "epoch": 0.018666620000116665, "grad_norm": 0.11538136005401611, "learning_rate": 1e-06, "loss": -0.0077, "step": 200 }, { "clip_ratio/high_max": 0.0021541278692893684, "clip_ratio/high_mean": 0.0009477971761953086, "clip_ratio/low_mean": 0.0010456737400090788, "clip_ratio/low_min": 7.926578655315097e-05, "clip_ratio/region_mean": 0.0019934709125664085, "epoch": 0.01875995310011725, "grad_norm": 0.1162070706486702, "learning_rate": 1e-06, "loss": 0.0244, "step": 201 }, { "clip_ratio/high_max": 0.0023056704776536208, "clip_ratio/high_mean": 0.0010438414428790566, "clip_ratio/low_mean": 0.0011379790121281985, "clip_ratio/low_min": 0.00012575384744195617, "clip_ratio/region_mean": 0.0021818204695591703, "epoch": 0.018853286200117832, "grad_norm": 0.11591261625289917, "learning_rate": 1e-06, "loss": 0.0308, "step": 202 }, { "clip_ratio/high_max": 0.0020941249531460926, "clip_ratio/high_mean": 0.0008581130496168043, "clip_ratio/low_mean": 0.0009921733508235775, "clip_ratio/low_min": 0.00011524317960720509, "clip_ratio/region_mean": 0.0018502863749745302, "epoch": 0.018946619300118418, "grad_norm": 0.10963311046361923, "learning_rate": 1e-06, "loss": 0.0724, "step": 203 }, { "clip_ratio/high_max": 0.0022479714098153636, "clip_ratio/high_mean": 0.0010425368418509606, "clip_ratio/low_mean": 0.0009532710992061766, "clip_ratio/low_min": 8.883651571522932e-05, "clip_ratio/region_mean": 0.001995807957428042, "epoch": 0.019039952400119, "grad_norm": 0.10802527517080307, "learning_rate": 1e-06, "loss": 0.0092, "step": 204 }, { "clip_ratio/high_max": 0.002085646046907641, "clip_ratio/high_mean": 0.000918427813303424, "clip_ratio/low_mean": 0.0010653214121703058, "clip_ratio/low_min": 0.00010971641950163757, "clip_ratio/region_mean": 0.0019837492363876663, "epoch": 0.019133285500119585, "grad_norm": 0.11404747515916824, "learning_rate": 1e-06, "loss": -0.0062, "step": 205 }, { "clip_ratio/high_max": 0.0021899579296587035, "clip_ratio/high_mean": 0.0009628759780753171, "clip_ratio/low_mean": 0.001122700450650882, "clip_ratio/low_min": 6.675414169876603e-05, "clip_ratio/region_mean": 0.002085576386889443, "epoch": 0.019226618600120166, "grad_norm": 0.107424296438694, "learning_rate": 1e-06, "loss": 0.02, "step": 206 }, { "clip_ratio/high_max": 0.0023592188117618207, "clip_ratio/high_mean": 0.0009842861445577, "clip_ratio/low_mean": 0.0009870921130641364, "clip_ratio/low_min": 0.0001591050222486956, "clip_ratio/region_mean": 0.0019713782603503205, "epoch": 0.019319951700120748, "grad_norm": 0.12021970003843307, "learning_rate": 1e-06, "loss": -0.017, "step": 207 }, { "clip_ratio/high_max": 0.0020937302251695655, "clip_ratio/high_mean": 0.0010327574600523803, "clip_ratio/low_mean": 0.000963145883360994, "clip_ratio/low_min": 0.00011117216126876883, "clip_ratio/region_mean": 0.0019959033306804486, "epoch": 0.019413284800121333, "grad_norm": 0.12263026833534241, "learning_rate": 1e-06, "loss": 0.0358, "step": 208 }, { "clip_ratio/high_max": 0.0018762995568977203, "clip_ratio/high_mean": 0.0008226645895774709, "clip_ratio/low_mean": 0.00111507356268703, "clip_ratio/low_min": 6.65598063278594e-05, "clip_ratio/region_mean": 0.0019377381104277447, "epoch": 0.019506617900121915, "grad_norm": 0.10565012693405151, "learning_rate": 1e-06, "loss": 0.0524, "step": 209 }, { "clip_ratio/high_max": 0.002638690668391064, "clip_ratio/high_mean": 0.001115183131332742, "clip_ratio/low_mean": 0.000947213296967675, "clip_ratio/low_min": 8.75063205967308e-05, "clip_ratio/region_mean": 0.0020623964446713217, "epoch": 0.0195999510001225, "grad_norm": 0.12090546637773514, "learning_rate": 1e-06, "loss": -0.0036, "step": 210 }, { "clip_ratio/high_max": 0.002234870982647408, "clip_ratio/high_mean": 0.000933173583689495, "clip_ratio/low_mean": 0.0010040104261861416, "clip_ratio/low_min": 0.00018935407115350245, "clip_ratio/region_mean": 0.001937184002599679, "epoch": 0.019693284100123082, "grad_norm": 0.11095651239156723, "learning_rate": 1e-06, "loss": 0.0193, "step": 211 }, { "clip_ratio/high_max": 0.002367613349633757, "clip_ratio/high_mean": 0.0010650542826624587, "clip_ratio/low_mean": 0.0009907163857860724, "clip_ratio/low_min": 1.414347116224235e-05, "clip_ratio/region_mean": 0.0020557706302497536, "epoch": 0.019786617200123667, "grad_norm": 0.13546325266361237, "learning_rate": 1e-06, "loss": -0.0164, "step": 212 }, { "clip_ratio/high_max": 0.0019728094557649456, "clip_ratio/high_mean": 0.000914378544621286, "clip_ratio/low_mean": 0.0009974736640288029, "clip_ratio/low_min": 6.0291994486760814e-05, "clip_ratio/region_mean": 0.001911852195917163, "epoch": 0.01987995030012425, "grad_norm": 0.1279425323009491, "learning_rate": 1e-06, "loss": 0.008, "step": 213 }, { "clip_ratio/high_max": 0.0023448467436537612, "clip_ratio/high_mean": 0.0010103245003847405, "clip_ratio/low_mean": 0.0010073525190819055, "clip_ratio/low_min": 0.000172418247530004, "clip_ratio/region_mean": 0.0020176769903628156, "epoch": 0.019973283400124834, "grad_norm": 0.10428622364997864, "learning_rate": 1e-06, "loss": 0.0363, "step": 214 }, { "clip_ratio/high_max": 0.001850732911407249, "clip_ratio/high_mean": 0.0008369260494873743, "clip_ratio/low_mean": 0.001212673098052619, "clip_ratio/low_min": 0.00016832503297337098, "clip_ratio/region_mean": 0.0020495992284850217, "epoch": 0.020066616500125416, "grad_norm": 0.11830378323793411, "learning_rate": 1e-06, "loss": 0.0532, "step": 215 }, { "clip_ratio/high_max": 0.001899744020192884, "clip_ratio/high_mean": 0.0008709589128557127, "clip_ratio/low_mean": 0.0008804627850622637, "clip_ratio/low_min": 4.245876698405482e-05, "clip_ratio/region_mean": 0.0017514217033749446, "epoch": 0.020159949600126, "grad_norm": 0.11459111422300339, "learning_rate": 1e-06, "loss": 0.0289, "step": 216 }, { "clip_ratio/high_max": 0.0017330941154796164, "clip_ratio/high_mean": 0.0007122519618860679, "clip_ratio/low_mean": 0.0011193732025276404, "clip_ratio/low_min": 0.00011346063456585398, "clip_ratio/region_mean": 0.0018316251662326977, "epoch": 0.020253282700126583, "grad_norm": 0.11213616281747818, "learning_rate": 1e-06, "loss": 0.0714, "step": 217 }, { "clip_ratio/high_max": 0.002310619624040555, "clip_ratio/high_mean": 0.0010488324478501454, "clip_ratio/low_mean": 0.0009332481240562629, "clip_ratio/low_min": 9.051858432940207e-05, "clip_ratio/region_mean": 0.001982080502784811, "epoch": 0.020346615800127168, "grad_norm": 0.10472150146961212, "learning_rate": 1e-06, "loss": 0.0103, "step": 218 }, { "clip_ratio/high_max": 0.00214025368768489, "clip_ratio/high_mean": 0.0009075563848455204, "clip_ratio/low_mean": 0.0010840917584573617, "clip_ratio/low_min": 0.0001759737770044012, "clip_ratio/region_mean": 0.001991648146940861, "epoch": 0.02043994890012775, "grad_norm": 0.11451179534196854, "learning_rate": 1e-06, "loss": 0.0232, "step": 219 }, { "clip_ratio/high_max": 0.0019043495485675521, "clip_ratio/high_mean": 0.0008271847873402294, "clip_ratio/low_mean": 0.001076140339137055, "clip_ratio/low_min": 0.0001006030588541762, "clip_ratio/region_mean": 0.0019033251446671784, "epoch": 0.02053328200012833, "grad_norm": 0.11380618810653687, "learning_rate": 1e-06, "loss": 0.0546, "step": 220 }, { "clip_ratio/high_max": 0.0021941617524134926, "clip_ratio/high_mean": 0.0009049884847627254, "clip_ratio/low_mean": 0.0011537418358784635, "clip_ratio/low_min": 8.618706942797871e-05, "clip_ratio/region_mean": 0.002058730271528475, "epoch": 0.020626615100128916, "grad_norm": 0.12104861438274384, "learning_rate": 1e-06, "loss": 0.017, "step": 221 }, { "clip_ratio/high_max": 0.0024534166004741564, "clip_ratio/high_mean": 0.0009078702751139645, "clip_ratio/low_mean": 0.0009813616561586969, "clip_ratio/low_min": 5.7123578699247446e-05, "clip_ratio/region_mean": 0.001889231956738513, "epoch": 0.020719948200129498, "grad_norm": 0.11052046716213226, "learning_rate": 1e-06, "loss": 0.0108, "step": 222 }, { "clip_ratio/high_max": 0.002098733984894352, "clip_ratio/high_mean": 0.000848316016345052, "clip_ratio/low_mean": 0.0010787546780193225, "clip_ratio/low_min": 0.00012500591947173234, "clip_ratio/region_mean": 0.0019270707198302262, "epoch": 0.020813281300130083, "grad_norm": 0.109070785343647, "learning_rate": 1e-06, "loss": 0.0478, "step": 223 }, { "clip_ratio/high_max": 0.0022960092064749915, "clip_ratio/high_mean": 0.00092519794452528, "clip_ratio/low_mean": 0.0009774558420758694, "clip_ratio/low_min": 0.00015045182044559624, "clip_ratio/region_mean": 0.0019026537629542872, "epoch": 0.020906614400130665, "grad_norm": 0.11426587402820587, "learning_rate": 1e-06, "loss": 0.0267, "step": 224 }, { "clip_ratio/high_max": 0.002138144125638064, "clip_ratio/high_mean": 0.0008678825925017009, "clip_ratio/low_mean": 0.0009975082921300782, "clip_ratio/low_min": 5.39730763193802e-05, "clip_ratio/region_mean": 0.0018653908555279486, "epoch": 0.02099994750013125, "grad_norm": 0.10962416231632233, "learning_rate": 1e-06, "loss": 0.053, "step": 225 }, { "clip_ratio/high_max": 0.002490065286110621, "clip_ratio/high_mean": 0.0011171596561325714, "clip_ratio/low_mean": 0.0008788071827439126, "clip_ratio/low_min": 1.9311972209834494e-05, "clip_ratio/region_mean": 0.001995966878894251, "epoch": 0.021093280600131832, "grad_norm": 0.1325608193874359, "learning_rate": 1e-06, "loss": -0.0415, "step": 226 }, { "clip_ratio/high_max": 0.0021953234463580884, "clip_ratio/high_mean": 0.0009879996432573535, "clip_ratio/low_mean": 0.0008496245955029735, "clip_ratio/low_min": 7.815466551619465e-05, "clip_ratio/region_mean": 0.00183762422966538, "epoch": 0.021186613700132417, "grad_norm": 0.11213158816099167, "learning_rate": 1e-06, "loss": 0.0001, "step": 227 }, { "clip_ratio/high_max": 0.0024829938847688027, "clip_ratio/high_mean": 0.000908926005649846, "clip_ratio/low_mean": 0.0010305980467819609, "clip_ratio/low_min": 0.00010798132279887795, "clip_ratio/region_mean": 0.0019395240597077645, "epoch": 0.021279946800133, "grad_norm": 0.11000971496105194, "learning_rate": 1e-06, "loss": 0.0347, "step": 228 }, { "clip_ratio/high_max": 0.002539554610848427, "clip_ratio/high_mean": 0.0010273880143358838, "clip_ratio/low_mean": 0.0011381345757399686, "clip_ratio/low_min": 0.00015615619122399949, "clip_ratio/region_mean": 0.0021655225864378735, "epoch": 0.021373279900133584, "grad_norm": 0.1189996749162674, "learning_rate": 1e-06, "loss": 0.0251, "step": 229 }, { "clip_ratio/high_max": 0.0024407244491158053, "clip_ratio/high_mean": 0.0010192626468779054, "clip_ratio/low_mean": 0.0009032363959704526, "clip_ratio/low_min": 4.907828588329721e-05, "clip_ratio/region_mean": 0.0019224990901420824, "epoch": 0.021466613000134166, "grad_norm": 0.11909337341785431, "learning_rate": 1e-06, "loss": -0.0178, "step": 230 }, { "clip_ratio/high_max": 0.002424677564704325, "clip_ratio/high_mean": 0.0010363195215177257, "clip_ratio/low_mean": 0.0010111930096172728, "clip_ratio/low_min": 5.805942964798305e-05, "clip_ratio/region_mean": 0.0020475125347729772, "epoch": 0.02155994610013475, "grad_norm": 0.11340123414993286, "learning_rate": 1e-06, "loss": 0.0336, "step": 231 }, { "clip_ratio/high_max": 0.0019159656076226383, "clip_ratio/high_mean": 0.0008645427242299775, "clip_ratio/low_mean": 0.0010895783270825632, "clip_ratio/low_min": 7.422575754389982e-05, "clip_ratio/region_mean": 0.0019541210785973817, "epoch": 0.021653279200135333, "grad_norm": 0.11788474768400192, "learning_rate": 1e-06, "loss": 0.0679, "step": 232 }, { "clip_ratio/high_max": 0.0019080695492448285, "clip_ratio/high_mean": 0.0007964155192894395, "clip_ratio/low_mean": 0.0010572892570053227, "clip_ratio/low_min": 0.00017893908352561994, "clip_ratio/region_mean": 0.0018537047653808258, "epoch": 0.021746612300135915, "grad_norm": 0.12439021468162537, "learning_rate": 1e-06, "loss": 0.054, "step": 233 }, { "clip_ratio/high_max": 0.0021464417659444734, "clip_ratio/high_mean": 0.0009470107142988127, "clip_ratio/low_mean": 0.0010860409747692756, "clip_ratio/low_min": 0.00018239432210975792, "clip_ratio/region_mean": 0.002033051656326279, "epoch": 0.0218399454001365, "grad_norm": 0.11479232460260391, "learning_rate": 1e-06, "loss": -0.0018, "step": 234 }, { "clip_ratio/high_max": 0.00214841213892214, "clip_ratio/high_mean": 0.000886670812178636, "clip_ratio/low_mean": 0.0011134746582683874, "clip_ratio/low_min": 0.00018994514721271116, "clip_ratio/region_mean": 0.002000145508645801, "epoch": 0.02193327850013708, "grad_norm": 0.09970298409461975, "learning_rate": 1e-06, "loss": 0.023, "step": 235 }, { "clip_ratio/high_max": 0.002168571067159064, "clip_ratio/high_mean": 0.0008643800611025654, "clip_ratio/low_mean": 0.0010122475841853884, "clip_ratio/low_min": 6.922038573975442e-05, "clip_ratio/region_mean": 0.0018766276334645227, "epoch": 0.022026611600137667, "grad_norm": 0.1070210263133049, "learning_rate": 1e-06, "loss": 0.0178, "step": 236 }, { "clip_ratio/high_max": 0.002073378796922043, "clip_ratio/high_mean": 0.0009625376042095013, "clip_ratio/low_mean": 0.000940343423280865, "clip_ratio/low_min": 2.969238721561851e-05, "clip_ratio/region_mean": 0.0019028810274903663, "epoch": 0.02211994470013825, "grad_norm": 0.13581731915473938, "learning_rate": 1e-06, "loss": 0.0274, "step": 237 }, { "clip_ratio/high_max": 0.002210548824223224, "clip_ratio/high_mean": 0.0008758097883401206, "clip_ratio/low_mean": 0.0011279759019089397, "clip_ratio/low_min": 8.098124817479402e-05, "clip_ratio/region_mean": 0.0020037857102579437, "epoch": 0.022213277800138834, "grad_norm": 0.12062512338161469, "learning_rate": 1e-06, "loss": 0.0426, "step": 238 }, { "clip_ratio/high_max": 0.0022998643544269726, "clip_ratio/high_mean": 0.0009921831151586957, "clip_ratio/low_mean": 0.0010372236283728853, "clip_ratio/low_min": 7.738099884591065e-05, "clip_ratio/region_mean": 0.002029406801739242, "epoch": 0.022306610900139415, "grad_norm": 0.1370217353105545, "learning_rate": 1e-06, "loss": 0.0009, "step": 239 }, { "clip_ratio/high_max": 0.0024262699225801043, "clip_ratio/high_mean": 0.000984808248176705, "clip_ratio/low_mean": 0.0009955452132999199, "clip_ratio/low_min": 9.318272987002274e-05, "clip_ratio/region_mean": 0.001980353452381678, "epoch": 0.02239994400014, "grad_norm": 0.11358516663312912, "learning_rate": 1e-06, "loss": 0.0077, "step": 240 }, { "clip_ratio/high_max": 0.0025296202147728764, "clip_ratio/high_mean": 0.0008934057877922896, "clip_ratio/low_mean": 0.0010643806599546224, "clip_ratio/low_min": 7.650142106285784e-05, "clip_ratio/region_mean": 0.0019577864950406365, "epoch": 0.022493277100140582, "grad_norm": 0.1107291653752327, "learning_rate": 1e-06, "loss": 0.0128, "step": 241 }, { "clip_ratio/high_max": 0.0023834252351662144, "clip_ratio/high_mean": 0.0009954757606465137, "clip_ratio/low_mean": 0.0010605910792946815, "clip_ratio/low_min": 0.00011055981985919061, "clip_ratio/region_mean": 0.002056066870864015, "epoch": 0.022586610200141168, "grad_norm": 0.11257448047399521, "learning_rate": 1e-06, "loss": -0.0146, "step": 242 }, { "clip_ratio/high_max": 0.0021795274733449332, "clip_ratio/high_mean": 0.0009250603907275945, "clip_ratio/low_mean": 0.0010644061894709012, "clip_ratio/low_min": 0.00018681574238144094, "clip_ratio/region_mean": 0.0019894666256732307, "epoch": 0.02267994330014175, "grad_norm": 0.11343173682689667, "learning_rate": 1e-06, "loss": 0.0204, "step": 243 }, { "clip_ratio/high_max": 0.002128443305991823, "clip_ratio/high_mean": 0.0009035770835907897, "clip_ratio/low_mean": 0.0010444948529766407, "clip_ratio/low_min": 0.0001148682404163992, "clip_ratio/region_mean": 0.0019480719056446105, "epoch": 0.022773276400142334, "grad_norm": 0.1172831729054451, "learning_rate": 1e-06, "loss": 0.0257, "step": 244 }, { "clip_ratio/high_max": 0.0021832419151905924, "clip_ratio/high_mean": 0.0009231668518623337, "clip_ratio/low_mean": 0.0011320406265440397, "clip_ratio/low_min": 8.43684620122076e-05, "clip_ratio/region_mean": 0.002055207463854458, "epoch": 0.022866609500142916, "grad_norm": 0.11971227824687958, "learning_rate": 1e-06, "loss": 0.0223, "step": 245 }, { "clip_ratio/high_max": 0.002301872031239327, "clip_ratio/high_mean": 0.0009581696031091269, "clip_ratio/low_mean": 0.00104950022068806, "clip_ratio/low_min": 9.482742461841553e-05, "clip_ratio/region_mean": 0.0020076698347111233, "epoch": 0.022959942600143498, "grad_norm": 0.12075507640838623, "learning_rate": 1e-06, "loss": 0.0184, "step": 246 }, { "clip_ratio/high_max": 0.0023539223766420037, "clip_ratio/high_mean": 0.000887859448994277, "clip_ratio/low_mean": 0.0012119601451558992, "clip_ratio/low_min": 0.00017464127631683368, "clip_ratio/region_mean": 0.0020998196268919855, "epoch": 0.023053275700144083, "grad_norm": 0.10851837694644928, "learning_rate": 1e-06, "loss": 0.0383, "step": 247 }, { "clip_ratio/high_max": 0.0021011630233260803, "clip_ratio/high_mean": 0.0010566969467618037, "clip_ratio/low_mean": 0.0010376392747275531, "clip_ratio/low_min": 8.686814544489607e-05, "clip_ratio/region_mean": 0.002094336217851378, "epoch": 0.023146608800144665, "grad_norm": 0.11641940474510193, "learning_rate": 1e-06, "loss": -0.04, "step": 248 }, { "clip_ratio/high_max": 0.0022874041096656583, "clip_ratio/high_mean": 0.001076203345292015, "clip_ratio/low_mean": 0.0013142316711309832, "clip_ratio/low_min": 0.00020346790370240342, "clip_ratio/region_mean": 0.0023904349873191677, "epoch": 0.02323994190014525, "grad_norm": 0.12583139538764954, "learning_rate": 1e-06, "loss": 0.0693, "step": 249 }, { "clip_ratio/high_max": 0.002026106718403753, "clip_ratio/high_mean": 0.0009205698734149337, "clip_ratio/low_mean": 0.001014294037304353, "clip_ratio/low_min": 9.524939650873421e-05, "clip_ratio/region_mean": 0.0019348639252712019, "epoch": 0.023333275000145832, "grad_norm": 0.1877448707818985, "learning_rate": 1e-06, "loss": -0.0274, "step": 250 }, { "clip_ratio/high_max": 0.0019113739399472252, "clip_ratio/high_mean": 0.0008814228458504658, "clip_ratio/low_mean": 0.0009779076699487632, "clip_ratio/low_min": 3.899715557054151e-05, "clip_ratio/region_mean": 0.00185933054308407, "epoch": 0.023426608100146417, "grad_norm": 0.14381368458271027, "learning_rate": 1e-06, "loss": 0.0178, "step": 251 }, { "clip_ratio/high_max": 0.002252974092698423, "clip_ratio/high_mean": 0.0008434499231952941, "clip_ratio/low_mean": 0.0013243314788269345, "clip_ratio/low_min": 0.00016336069893441163, "clip_ratio/region_mean": 0.002167781407479197, "epoch": 0.023519941200147, "grad_norm": 2.371664524078369, "learning_rate": 1e-06, "loss": 0.0716, "step": 252 }, { "clip_ratio/high_max": 0.002164042853110004, "clip_ratio/high_mean": 0.0009433994600840379, "clip_ratio/low_mean": 0.001225969301231089, "clip_ratio/low_min": 0.00019286684573671664, "clip_ratio/region_mean": 0.0021693687594961375, "epoch": 0.023613274300147584, "grad_norm": 0.10952296108007431, "learning_rate": 1e-06, "loss": 0.0349, "step": 253 }, { "clip_ratio/high_max": 0.002279012966027949, "clip_ratio/high_mean": 0.0010077389742946252, "clip_ratio/low_mean": 0.0011922855592274573, "clip_ratio/low_min": 0.00017263070185435936, "clip_ratio/region_mean": 0.002200024522608146, "epoch": 0.023706607400148166, "grad_norm": 0.12145056575536728, "learning_rate": 1e-06, "loss": 0.0475, "step": 254 }, { "clip_ratio/high_max": 0.002155759902962018, "clip_ratio/high_mean": 0.0009342498997284565, "clip_ratio/low_mean": 0.0009492247372691054, "clip_ratio/low_min": 5.5615576457057614e-05, "clip_ratio/region_mean": 0.0018834746442735195, "epoch": 0.02379994050014875, "grad_norm": 0.10751894861459732, "learning_rate": 1e-06, "loss": -0.0167, "step": 255 }, { "clip_ratio/high_max": 0.0021634652803186327, "clip_ratio/high_mean": 0.0008521262952854158, "clip_ratio/low_mean": 0.0012216700488352217, "clip_ratio/low_min": 4.012518684248789e-05, "clip_ratio/region_mean": 0.002073796364129521, "epoch": 0.023893273600149333, "grad_norm": 0.11659747362136841, "learning_rate": 1e-06, "loss": 0.0252, "step": 256 }, { "clip_ratio/high_max": 0.0023720954268355854, "clip_ratio/high_mean": 0.0009556506665830966, "clip_ratio/low_mean": 0.0009196366409014445, "clip_ratio/low_min": 4.506940604187548e-05, "clip_ratio/region_mean": 0.0018752872856566682, "completions/clipped_ratio": 0.0135498046875, "completions/max_length": 4096.0, "completions/max_terminated_length": 4095.0, "completions/mean_length": 608.2213134765625, "completions/mean_terminated_length": 560.3133544921875, "completions/min_length": 3.0, "completions/min_terminated_length": 3.0, "epoch": 0.023986606700149918, "grad_norm": 101.60354614257812, "learning_rate": 1e-06, "loss": 0.2708, "num_tokens": 243481278.0, "reward": 0.5733642578125, "reward_std": 0.19922226667404175, "rewards/simpleverify_reward/mean": 0.5733642578125, "rewards/simpleverify_reward/std": 0.494590699672699, "step": 257 }, { "clip_ratio/high_max": 0.0021538904729823116, "clip_ratio/high_mean": 0.0009296576226915931, "clip_ratio/low_mean": 0.001109331686166115, "clip_ratio/low_min": 9.17857860258664e-05, "clip_ratio/region_mean": 0.0020389893179526553, "epoch": 0.0240799398001505, "grad_norm": 0.11701799184083939, "learning_rate": 1e-06, "loss": 0.0371, "step": 258 }, { "clip_ratio/high_max": 0.0023623750785191078, "clip_ratio/high_mean": 0.0008973419080575695, "clip_ratio/low_mean": 0.000874067680342705, "clip_ratio/low_min": 4.655217890103813e-05, "clip_ratio/region_mean": 0.0017714095920382533, "epoch": 0.02417327290015108, "grad_norm": 0.23635396361351013, "learning_rate": 1e-06, "loss": 0.0207, "step": 259 }, { "clip_ratio/high_max": 0.001980001041374635, "clip_ratio/high_mean": 0.0008090350147540448, "clip_ratio/low_mean": 0.0009179293028864777, "clip_ratio/low_min": 8.68928373165545e-05, "clip_ratio/region_mean": 0.001726964306726586, "epoch": 0.024266606000151666, "grad_norm": 0.09978413581848145, "learning_rate": 1e-06, "loss": 0.0265, "step": 260 }, { "clip_ratio/high_max": 0.0020471981188165955, "clip_ratio/high_mean": 0.0008724212530069053, "clip_ratio/low_mean": 0.0009256584289687453, "clip_ratio/low_min": 7.888262553024106e-05, "clip_ratio/region_mean": 0.00179807968379464, "epoch": 0.024359939100152248, "grad_norm": 1.4574960470199585, "learning_rate": 1e-06, "loss": 0.0269, "step": 261 }, { "clip_ratio/high_max": 0.0023386417669826187, "clip_ratio/high_mean": 0.0010859356661967468, "clip_ratio/low_mean": 0.0008676300567458384, "clip_ratio/low_min": 3.0226190574467182e-05, "clip_ratio/region_mean": 0.001953565741132479, "epoch": 0.024453272200152833, "grad_norm": 0.11507795751094818, "learning_rate": 1e-06, "loss": -0.0231, "step": 262 }, { "clip_ratio/high_max": 0.0021133939881110564, "clip_ratio/high_mean": 0.0009408792211615946, "clip_ratio/low_mean": 0.0010678440903575392, "clip_ratio/low_min": 0.00013820266303810058, "clip_ratio/region_mean": 0.002008723356993869, "epoch": 0.024546605300153415, "grad_norm": 0.12888933718204498, "learning_rate": 1e-06, "loss": 0.0651, "step": 263 }, { "clip_ratio/high_max": 0.0019742484982998576, "clip_ratio/high_mean": 0.0008053209021454677, "clip_ratio/low_mean": 0.0009693596948636696, "clip_ratio/low_min": 0.0001142224846262252, "clip_ratio/region_mean": 0.0017746805606293492, "epoch": 0.024639938400154, "grad_norm": 211872.734375, "learning_rate": 1e-06, "loss": 1107.0792, "step": 264 }, { "clip_ratio/high_max": 0.0025999167046393268, "clip_ratio/high_mean": 0.0009686993944342248, "clip_ratio/low_mean": 0.00107698624196928, "clip_ratio/low_min": 0.00020360700364108197, "clip_ratio/region_mean": 0.002045685654593399, "epoch": 0.024733271500154582, "grad_norm": 0.12191295623779297, "learning_rate": 1e-06, "loss": 0.0458, "step": 265 }, { "clip_ratio/high_max": 0.0022056036577851046, "clip_ratio/high_mean": 0.0009684755659691291, "clip_ratio/low_mean": 0.0010323439673811663, "clip_ratio/low_min": 0.0001548218560856185, "clip_ratio/region_mean": 0.0020008195060654543, "epoch": 0.024826604600155167, "grad_norm": 0.1116814836859703, "learning_rate": 1e-06, "loss": 0.0458, "step": 266 }, { "clip_ratio/high_max": 0.00264313152001705, "clip_ratio/high_mean": 0.0011148343583045062, "clip_ratio/low_mean": 0.0009596938398317434, "clip_ratio/low_min": 6.548561032104772e-05, "clip_ratio/region_mean": 0.002074528230878059, "epoch": 0.02491993770015575, "grad_norm": 0.1257859766483307, "learning_rate": 1e-06, "loss": -0.002, "step": 267 }, { "clip_ratio/high_max": 0.0017358685945509933, "clip_ratio/high_mean": 0.0007508427897846559, "clip_ratio/low_mean": 0.0009057802744791843, "clip_ratio/low_min": 8.455717943434138e-05, "clip_ratio/region_mean": 0.0016566230624448508, "epoch": 0.025013270800156334, "grad_norm": 25.43267250061035, "learning_rate": 1e-06, "loss": 0.048, "step": 268 }, { "clip_ratio/high_max": 0.0020468689435801934, "clip_ratio/high_mean": 0.0008374350272788433, "clip_ratio/low_mean": 0.0010345795744797215, "clip_ratio/low_min": 2.3317839804803953e-05, "clip_ratio/region_mean": 0.0018720146035775542, "epoch": 0.025106603900156916, "grad_norm": 2.809274196624756, "learning_rate": 1e-06, "loss": 0.0505, "step": 269 }, { "clip_ratio/high_max": 0.00194508030108409, "clip_ratio/high_mean": 0.000816213167126989, "clip_ratio/low_mean": 0.0011209035765205044, "clip_ratio/low_min": 9.659905026637716e-05, "clip_ratio/region_mean": 0.001937116787303239, "epoch": 0.0251999370001575, "grad_norm": 0.10319977253675461, "learning_rate": 1e-06, "loss": 0.0301, "step": 270 }, { "clip_ratio/high_max": 0.002447914346703328, "clip_ratio/high_mean": 0.0009979642291000346, "clip_ratio/low_mean": 0.0009408927089680219, "clip_ratio/low_min": 9.07915564312134e-05, "clip_ratio/region_mean": 0.0019388569926377386, "epoch": 0.025293270100158083, "grad_norm": 0.1134476289153099, "learning_rate": 1e-06, "loss": 0.0333, "step": 271 }, { "clip_ratio/high_max": 0.0024025061647989787, "clip_ratio/high_mean": 0.0010154991687159054, "clip_ratio/low_mean": 0.0010577546345302835, "clip_ratio/low_min": 0.00011584590356505942, "clip_ratio/region_mean": 0.0020732537595904432, "epoch": 0.025386603200158665, "grad_norm": 0.10023599117994308, "learning_rate": 1e-06, "loss": 0.0109, "step": 272 }, { "clip_ratio/high_max": 0.0022039792711439077, "clip_ratio/high_mean": 0.0008787852093519177, "clip_ratio/low_mean": 0.0013139429429429583, "clip_ratio/low_min": 0.000118511903565377, "clip_ratio/region_mean": 0.002192728134104982, "epoch": 0.02547993630015925, "grad_norm": 0.11383773386478424, "learning_rate": 1e-06, "loss": 0.0634, "step": 273 }, { "clip_ratio/high_max": 0.002123242535162717, "clip_ratio/high_mean": 0.0010181207035202533, "clip_ratio/low_mean": 0.0008940824391174829, "clip_ratio/low_min": 6.807116005802527e-05, "clip_ratio/region_mean": 0.0019122031080769375, "epoch": 0.02557326940015983, "grad_norm": 0.11560788750648499, "learning_rate": 1e-06, "loss": -0.0254, "step": 274 }, { "clip_ratio/high_max": 0.0022261288759182207, "clip_ratio/high_mean": 0.0008659750201331917, "clip_ratio/low_mean": 0.0011666952996165492, "clip_ratio/low_min": 8.28755564725725e-05, "clip_ratio/region_mean": 0.0020326703634054866, "epoch": 0.025666602500160417, "grad_norm": 0.11708521842956543, "learning_rate": 1e-06, "loss": 0.0385, "step": 275 }, { "clip_ratio/high_max": 0.002127394240233116, "clip_ratio/high_mean": 0.0009219192579621449, "clip_ratio/low_mean": 0.0011407461788621731, "clip_ratio/low_min": 1.685346069280058e-05, "clip_ratio/region_mean": 0.0020626654659281485, "epoch": 0.025759935600161, "grad_norm": 0.12133800238370895, "learning_rate": 1e-06, "loss": 0.0277, "step": 276 }, { "clip_ratio/high_max": 0.002553348444052972, "clip_ratio/high_mean": 0.0010130150349141331, "clip_ratio/low_mean": 0.0009643308694649022, "clip_ratio/low_min": 6.193570607138099e-05, "clip_ratio/region_mean": 0.0019773459280258976, "epoch": 0.025853268700161584, "grad_norm": 0.11382672190666199, "learning_rate": 1e-06, "loss": -0.0166, "step": 277 }, { "clip_ratio/high_max": 0.002261892528622411, "clip_ratio/high_mean": 0.0009346344340883661, "clip_ratio/low_mean": 0.0011951681663049385, "clip_ratio/low_min": 0.00014592427578463685, "clip_ratio/region_mean": 0.002129802574927453, "epoch": 0.025946601800162165, "grad_norm": 0.12542285025119781, "learning_rate": 1e-06, "loss": 0.026, "step": 278 }, { "clip_ratio/high_max": 0.0022741039319953416, "clip_ratio/high_mean": 0.0008146316813508747, "clip_ratio/low_mean": 0.0011442370578151895, "clip_ratio/low_min": 0.0001342641453447868, "clip_ratio/region_mean": 0.00195886872097617, "epoch": 0.02603993490016275, "grad_norm": 0.12644465267658234, "learning_rate": 1e-06, "loss": 0.0815, "step": 279 }, { "clip_ratio/high_max": 0.0018161654734285548, "clip_ratio/high_mean": 0.0008414362273470033, "clip_ratio/low_mean": 0.001065722040948458, "clip_ratio/low_min": 2.7908015908906236e-05, "clip_ratio/region_mean": 0.0019071582428296097, "epoch": 0.026133268000163332, "grad_norm": 0.12692326307296753, "learning_rate": 1e-06, "loss": 0.0635, "step": 280 }, { "clip_ratio/high_max": 0.002485525685187895, "clip_ratio/high_mean": 0.0009320987992396113, "clip_ratio/low_mean": 0.0010240671635983745, "clip_ratio/low_min": 0.0001568649968248792, "clip_ratio/region_mean": 0.0019561659210012294, "epoch": 0.026226601100163918, "grad_norm": 0.1111765205860138, "learning_rate": 1e-06, "loss": 0.0165, "step": 281 }, { "clip_ratio/high_max": 0.0021119122611708008, "clip_ratio/high_mean": 0.0008515945046383422, "clip_ratio/low_mean": 0.00109626169250987, "clip_ratio/low_min": 7.348210601776373e-05, "clip_ratio/region_mean": 0.0019478561880532652, "epoch": 0.0263199342001645, "grad_norm": 0.11516562849283218, "learning_rate": 1e-06, "loss": 0.0399, "step": 282 }, { "clip_ratio/high_max": 0.0026272412069374695, "clip_ratio/high_mean": 0.0011091487176599912, "clip_ratio/low_mean": 0.001020450086798519, "clip_ratio/low_min": 0.0001115727391152177, "clip_ratio/region_mean": 0.00212959880445851, "epoch": 0.026413267300165084, "grad_norm": 0.1166430339217186, "learning_rate": 1e-06, "loss": -0.0052, "step": 283 }, { "clip_ratio/high_max": 0.0023727270308882, "clip_ratio/high_mean": 0.0010180645549553446, "clip_ratio/low_mean": 0.0011141158065584023, "clip_ratio/low_min": 7.28704562789062e-05, "clip_ratio/region_mean": 0.002132180343323853, "epoch": 0.026506600400165666, "grad_norm": 0.14391209185123444, "learning_rate": 1e-06, "loss": 0.004, "step": 284 }, { "clip_ratio/high_max": 0.0024710543002584018, "clip_ratio/high_mean": 0.0010852410086954478, "clip_ratio/low_mean": 0.0013846052897861227, "clip_ratio/low_min": 0.00015100027667358518, "clip_ratio/region_mean": 0.0024698463748791255, "epoch": 0.026599933500166248, "grad_norm": 0.1591976135969162, "learning_rate": 1e-06, "loss": 0.0048, "step": 285 }, { "clip_ratio/high_max": 0.0022533032097271644, "clip_ratio/high_mean": 0.0009276878372475039, "clip_ratio/low_mean": 0.0011239837731409352, "clip_ratio/low_min": 0.00010055847451440059, "clip_ratio/region_mean": 0.0020516715885605663, "epoch": 0.026693266600166833, "grad_norm": 0.12282620370388031, "learning_rate": 1e-06, "loss": 0.0304, "step": 286 }, { "clip_ratio/high_max": 0.0022062804237066302, "clip_ratio/high_mean": 0.0009301087266067043, "clip_ratio/low_mean": 0.0010107943780894857, "clip_ratio/low_min": 7.176871258707251e-05, "clip_ratio/region_mean": 0.0019409031447139569, "epoch": 0.026786599700167415, "grad_norm": 0.1139829158782959, "learning_rate": 1e-06, "loss": 0.0213, "step": 287 }, { "clip_ratio/high_max": 0.002141230615961831, "clip_ratio/high_mean": 0.0009377644128107931, "clip_ratio/low_mean": 0.0009456081547796202, "clip_ratio/low_min": 8.164348037098534e-05, "clip_ratio/region_mean": 0.0018833725189324468, "epoch": 0.026879932800168, "grad_norm": 0.1083705946803093, "learning_rate": 1e-06, "loss": -0.027, "step": 288 }, { "clip_ratio/high_max": 0.0021596043225144967, "clip_ratio/high_mean": 0.0009366572612634627, "clip_ratio/low_mean": 0.001154423393018078, "clip_ratio/low_min": 0.00017751159339240985, "clip_ratio/region_mean": 0.0020910806706524454, "epoch": 0.026973265900168582, "grad_norm": 0.11221802234649658, "learning_rate": 1e-06, "loss": 0.0296, "step": 289 }, { "clip_ratio/high_max": 0.0023212663727463223, "clip_ratio/high_mean": 0.0008584045190218603, "clip_ratio/low_mean": 0.0012100212079531047, "clip_ratio/low_min": 0.00019799784422502853, "clip_ratio/region_mean": 0.0020684257251559757, "epoch": 0.027066599000169167, "grad_norm": 0.11031017452478409, "learning_rate": 1e-06, "loss": 0.0482, "step": 290 }, { "clip_ratio/high_max": 0.0018162807318731211, "clip_ratio/high_mean": 0.0008221095104090637, "clip_ratio/low_mean": 0.0011979391438217135, "clip_ratio/low_min": 8.421747315878747e-05, "clip_ratio/region_mean": 0.002020048661506735, "epoch": 0.02715993210016975, "grad_norm": 0.10802154242992401, "learning_rate": 1e-06, "loss": 0.0658, "step": 291 }, { "clip_ratio/high_max": 0.002360517657507444, "clip_ratio/high_mean": 0.0010021632942880387, "clip_ratio/low_mean": 0.001076934360753512, "clip_ratio/low_min": 5.497665006259922e-05, "clip_ratio/region_mean": 0.0020790976341231726, "epoch": 0.027253265200170334, "grad_norm": 0.11743683367967606, "learning_rate": 1e-06, "loss": -0.0019, "step": 292 }, { "clip_ratio/high_max": 0.002440800904878415, "clip_ratio/high_mean": 0.0010539209433773067, "clip_ratio/low_mean": 0.0012650684020627523, "clip_ratio/low_min": 9.336321272712667e-05, "clip_ratio/region_mean": 0.0023189894200186245, "epoch": 0.027346598300170916, "grad_norm": 0.13771085441112518, "learning_rate": 1e-06, "loss": -0.0241, "step": 293 }, { "clip_ratio/high_max": 0.0025593096725060605, "clip_ratio/high_mean": 0.0009615876770112664, "clip_ratio/low_mean": 0.0010978662940033246, "clip_ratio/low_min": 0.0001406737937941216, "clip_ratio/region_mean": 0.002059453967376612, "epoch": 0.0274399314001715, "grad_norm": 0.11490236967802048, "learning_rate": 1e-06, "loss": 0.0049, "step": 294 }, { "clip_ratio/high_max": 0.002203147654654458, "clip_ratio/high_mean": 0.0009295364034187514, "clip_ratio/low_mean": 0.0009648299746913835, "clip_ratio/low_min": 1.770538256096188e-05, "clip_ratio/region_mean": 0.0018943663890240714, "epoch": 0.027533264500172083, "grad_norm": 0.11392612755298615, "learning_rate": 1e-06, "loss": -0.0008, "step": 295 }, { "clip_ratio/high_max": 0.0019143160498060752, "clip_ratio/high_mean": 0.0009036548781296005, "clip_ratio/low_mean": 0.0009823409563978203, "clip_ratio/low_min": 8.575135234423215e-05, "clip_ratio/region_mean": 0.0018859958290704526, "epoch": 0.027626597600172668, "grad_norm": 0.11274272948503494, "learning_rate": 1e-06, "loss": 0.0034, "step": 296 }, { "clip_ratio/high_max": 0.0021235786225588527, "clip_ratio/high_mean": 0.0009286711947424919, "clip_ratio/low_mean": 0.0010827420665009413, "clip_ratio/low_min": 9.054255770024611e-05, "clip_ratio/region_mean": 0.0020114132785238326, "epoch": 0.02771993070017325, "grad_norm": 0.11259263008832932, "learning_rate": 1e-06, "loss": 0.0267, "step": 297 }, { "clip_ratio/high_max": 0.002343071962968679, "clip_ratio/high_mean": 0.0010438790541229537, "clip_ratio/low_mean": 0.001059468137100339, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0021033471639384516, "epoch": 0.02781326380017383, "grad_norm": 0.12148614227771759, "learning_rate": 1e-06, "loss": -0.0121, "step": 298 }, { "clip_ratio/high_max": 0.002493363179382868, "clip_ratio/high_mean": 0.0010799922183650779, "clip_ratio/low_mean": 0.0009481222350586904, "clip_ratio/low_min": 2.8298322831687983e-05, "clip_ratio/region_mean": 0.002028114424319938, "epoch": 0.027906596900174416, "grad_norm": 0.11882860213518143, "learning_rate": 1e-06, "loss": -0.0315, "step": 299 }, { "clip_ratio/high_max": 0.0022521089704241604, "clip_ratio/high_mean": 0.0009415843305760063, "clip_ratio/low_mean": 0.0009593866561772302, "clip_ratio/low_min": 0.00013306337132235058, "clip_ratio/region_mean": 0.0019009710376849398, "epoch": 0.027999930000174998, "grad_norm": 0.10964560508728027, "learning_rate": 1e-06, "loss": -0.0162, "step": 300 }, { "clip_ratio/high_max": 0.002038241731497692, "clip_ratio/high_mean": 0.0007765646369080059, "clip_ratio/low_mean": 0.0010706147386372322, "clip_ratio/low_min": 0.00013939641121396562, "clip_ratio/region_mean": 0.0018471794028300792, "epoch": 0.028093263100175583, "grad_norm": 0.10963646322488785, "learning_rate": 1e-06, "loss": 0.0385, "step": 301 }, { "clip_ratio/high_max": 0.002626213747134898, "clip_ratio/high_mean": 0.0009720066900626989, "clip_ratio/low_mean": 0.0010703817824833095, "clip_ratio/low_min": 0.00010004171963373665, "clip_ratio/region_mean": 0.0020423884852789342, "epoch": 0.028186596200176165, "grad_norm": 0.1182062178850174, "learning_rate": 1e-06, "loss": -0.0022, "step": 302 }, { "clip_ratio/high_max": 0.0018748773509287275, "clip_ratio/high_mean": 0.0009008149536384735, "clip_ratio/low_mean": 0.0008921420248952927, "clip_ratio/low_min": 0.00013410473820840707, "clip_ratio/region_mean": 0.001792956973076798, "epoch": 0.02827992930017675, "grad_norm": 0.10940016806125641, "learning_rate": 1e-06, "loss": -0.0035, "step": 303 }, { "clip_ratio/high_max": 0.002016915019339649, "clip_ratio/high_mean": 0.0008629738986201119, "clip_ratio/low_mean": 0.001146636226621922, "clip_ratio/low_min": 0.00010632408884703182, "clip_ratio/region_mean": 0.0020096101143280976, "epoch": 0.028373262400177332, "grad_norm": 0.1155250295996666, "learning_rate": 1e-06, "loss": 0.047, "step": 304 }, { "clip_ratio/high_max": 0.0019444970785116311, "clip_ratio/high_mean": 0.0008174063568731071, "clip_ratio/low_mean": 0.0011137496803712565, "clip_ratio/low_min": 0.00014554495373886311, "clip_ratio/region_mean": 0.001931156039063353, "epoch": 0.028466595500177917, "grad_norm": 14.725848197937012, "learning_rate": 1e-06, "loss": 0.0396, "step": 305 }, { "clip_ratio/high_max": 0.0021021909997216426, "clip_ratio/high_mean": 0.0009064140031114221, "clip_ratio/low_mean": 0.000976428500507609, "clip_ratio/low_min": 1.3067112377029844e-05, "clip_ratio/region_mean": 0.0018828425018000416, "epoch": 0.0285599286001785, "grad_norm": 0.11437109112739563, "learning_rate": 1e-06, "loss": 0.0031, "step": 306 }, { "clip_ratio/high_max": 0.0020876354465144686, "clip_ratio/high_mean": 0.0009094852357520722, "clip_ratio/low_mean": 0.0011950351581617724, "clip_ratio/low_min": 0.00011705498945957515, "clip_ratio/region_mean": 0.002104520426655654, "epoch": 0.028653261700179084, "grad_norm": 0.12165649235248566, "learning_rate": 1e-06, "loss": -0.0095, "step": 307 }, { "clip_ratio/high_max": 0.0020882127719232813, "clip_ratio/high_mean": 0.0009038070656970376, "clip_ratio/low_mean": 0.0011496131082822103, "clip_ratio/low_min": 0.00011077620729338378, "clip_ratio/region_mean": 0.0020534201757982373, "epoch": 0.028746594800179666, "grad_norm": 0.12291403859853745, "learning_rate": 1e-06, "loss": 0.0114, "step": 308 }, { "clip_ratio/high_max": 0.0024690960271982476, "clip_ratio/high_mean": 0.0009290014877478825, "clip_ratio/low_mean": 0.0012561733237816952, "clip_ratio/low_min": 8.222426913562231e-05, "clip_ratio/region_mean": 0.0021851748242625035, "epoch": 0.02883992790018025, "grad_norm": 0.11932417005300522, "learning_rate": 1e-06, "loss": 0.0299, "step": 309 }, { "clip_ratio/high_max": 0.002151043310732348, "clip_ratio/high_mean": 0.0008489551582897548, "clip_ratio/low_mean": 0.0011632040059339488, "clip_ratio/low_min": 2.3563326976727694e-05, "clip_ratio/region_mean": 0.0020121591951465234, "epoch": 0.028933261000180833, "grad_norm": 0.11020629107952118, "learning_rate": 1e-06, "loss": 0.0336, "step": 310 }, { "clip_ratio/high_max": 0.0020902167016174644, "clip_ratio/high_mean": 0.0008407848545175511, "clip_ratio/low_mean": 0.0010362534849264193, "clip_ratio/low_min": 8.118540063151158e-05, "clip_ratio/region_mean": 0.0018770383321680129, "epoch": 0.029026594100181415, "grad_norm": 0.11123541742563248, "learning_rate": 1e-06, "loss": 0.0168, "step": 311 }, { "clip_ratio/high_max": 0.002179583036195254, "clip_ratio/high_mean": 0.0008577987227909034, "clip_ratio/low_mean": 0.0010695968376239762, "clip_ratio/low_min": 4.6227102757256944e-05, "clip_ratio/region_mean": 0.0019273955695098266, "epoch": 0.029119927200182, "grad_norm": 0.11267873644828796, "learning_rate": 1e-06, "loss": 0.0096, "step": 312 }, { "clip_ratio/high_max": 0.002187814459830406, "clip_ratio/high_mean": 0.0008259893897957227, "clip_ratio/low_mean": 0.0011067908199038357, "clip_ratio/low_min": 5.453302583191544e-05, "clip_ratio/region_mean": 0.0019327802147017792, "epoch": 0.02921326030018258, "grad_norm": 0.10996173322200775, "learning_rate": 1e-06, "loss": 0.0188, "step": 313 }, { "clip_ratio/high_max": 0.002290935481141787, "clip_ratio/high_mean": 0.0009460082583245821, "clip_ratio/low_mean": 0.0011237544349569362, "clip_ratio/low_min": 0.00019898545542673673, "clip_ratio/region_mean": 0.0020697627114714123, "epoch": 0.029306593400183167, "grad_norm": 0.11206992715597153, "learning_rate": 1e-06, "loss": 0.0099, "step": 314 }, { "clip_ratio/high_max": 0.0018592779779282864, "clip_ratio/high_mean": 0.0007766994185658405, "clip_ratio/low_mean": 0.00106459978633211, "clip_ratio/low_min": 0.0001234176761499839, "clip_ratio/region_mean": 0.0018412991994409822, "epoch": 0.02939992650018375, "grad_norm": 0.10743377357721329, "learning_rate": 1e-06, "loss": 0.028, "step": 315 }, { "clip_ratio/high_max": 0.00216169571649516, "clip_ratio/high_mean": 0.0009073383462236961, "clip_ratio/low_mean": 0.0009665958641562611, "clip_ratio/low_min": 8.019068354769843e-05, "clip_ratio/region_mean": 0.001873934168543201, "epoch": 0.029493259600184334, "grad_norm": 0.11096478998661041, "learning_rate": 1e-06, "loss": -0.0066, "step": 316 }, { "clip_ratio/high_max": 0.001815407202229835, "clip_ratio/high_mean": 0.0007989068417373346, "clip_ratio/low_mean": 0.0013156889435776975, "clip_ratio/low_min": 0.0001044754662871128, "clip_ratio/region_mean": 0.0021145957653061487, "epoch": 0.029586592700184915, "grad_norm": 0.1608605533838272, "learning_rate": 1e-06, "loss": 0.0549, "step": 317 }, { "clip_ratio/high_max": 0.002236041751530138, "clip_ratio/high_mean": 0.000868625141265511, "clip_ratio/low_mean": 0.0011786719078372698, "clip_ratio/low_min": 6.258014946070034e-05, "clip_ratio/region_mean": 0.0020472970500122756, "epoch": 0.0296799258001855, "grad_norm": 0.11986640095710754, "learning_rate": 1e-06, "loss": 0.0293, "step": 318 }, { "clip_ratio/high_max": 0.0021051574003649876, "clip_ratio/high_mean": 0.0008974846241471823, "clip_ratio/low_mean": 0.0012301670685701538, "clip_ratio/low_min": 8.920971049519721e-05, "clip_ratio/region_mean": 0.002127651678165421, "epoch": 0.029773258900186082, "grad_norm": 0.12771056592464447, "learning_rate": 1e-06, "loss": 0.0813, "step": 319 }, { "clip_ratio/high_max": 0.0021571613433479797, "clip_ratio/high_mean": 0.0008580247031204635, "clip_ratio/low_mean": 0.0010435702915856382, "clip_ratio/low_min": 0.00013387511171458755, "clip_ratio/region_mean": 0.0019015949656022713, "epoch": 0.029866592000186667, "grad_norm": 0.12085594981908798, "learning_rate": 1e-06, "loss": -0.0077, "step": 320 }, { "clip_ratio/high_max": 0.002569294931163313, "clip_ratio/high_mean": 0.0009803638513403712, "clip_ratio/low_mean": 0.001121017903642496, "clip_ratio/low_min": 5.670980135619175e-05, "clip_ratio/region_mean": 0.002101381804095581, "epoch": 0.02995992510018725, "grad_norm": 0.10860329121351242, "learning_rate": 1e-06, "loss": 0.0299, "step": 321 }, { "clip_ratio/high_max": 0.002290455511683831, "clip_ratio/high_mean": 0.0010337076855648775, "clip_ratio/low_mean": 0.001258689138921909, "clip_ratio/low_min": 7.156406809372129e-05, "clip_ratio/region_mean": 0.0022923968645045534, "epoch": 0.030053258200187834, "grad_norm": 0.14300259947776794, "learning_rate": 1e-06, "loss": 0.0233, "step": 322 }, { "clip_ratio/high_max": 0.002157535061996896, "clip_ratio/high_mean": 0.0010063192603411153, "clip_ratio/low_mean": 0.0010595787480269792, "clip_ratio/low_min": 6.554570427397266e-05, "clip_ratio/region_mean": 0.002065898028376978, "epoch": 0.030146591300188416, "grad_norm": 0.12497463822364807, "learning_rate": 1e-06, "loss": 0.0025, "step": 323 }, { "clip_ratio/high_max": 0.002143306970538106, "clip_ratio/high_mean": 0.0009250070907000918, "clip_ratio/low_mean": 0.001001356817141641, "clip_ratio/low_min": 0.0001660984598856885, "clip_ratio/region_mean": 0.001926363882375881, "epoch": 0.030239924400188998, "grad_norm": 0.11590266227722168, "learning_rate": 1e-06, "loss": 0.0046, "step": 324 }, { "clip_ratio/high_max": 0.0023267439974006265, "clip_ratio/high_mean": 0.0009486937760811998, "clip_ratio/low_mean": 0.0010370833133492852, "clip_ratio/low_min": 0.00011028562857973156, "clip_ratio/region_mean": 0.001985777067602612, "epoch": 0.030333257500189583, "grad_norm": 0.11165519058704376, "learning_rate": 1e-06, "loss": 0.0114, "step": 325 }, { "clip_ratio/high_max": 0.0019282541616121307, "clip_ratio/high_mean": 0.0008478929066768615, "clip_ratio/low_mean": 0.0011901712969120126, "clip_ratio/low_min": 0.00015262997476384044, "clip_ratio/region_mean": 0.0020380642235977575, "epoch": 0.030426590600190165, "grad_norm": 0.1079166904091835, "learning_rate": 1e-06, "loss": 0.0341, "step": 326 }, { "clip_ratio/high_max": 0.002220818139903713, "clip_ratio/high_mean": 0.0009001561902550748, "clip_ratio/low_mean": 0.0011336224706610665, "clip_ratio/low_min": 0.00010049212050944334, "clip_ratio/region_mean": 0.002033778640907258, "epoch": 0.03051992370019075, "grad_norm": 0.11433017253875732, "learning_rate": 1e-06, "loss": 0.0484, "step": 327 }, { "clip_ratio/high_max": 0.0021585938629868906, "clip_ratio/high_mean": 0.0008333931491506519, "clip_ratio/low_mean": 0.0013346746491151862, "clip_ratio/low_min": 0.0001245234270754736, "clip_ratio/region_mean": 0.0021680677818949334, "epoch": 0.030613256800191332, "grad_norm": 0.10329735279083252, "learning_rate": 1e-06, "loss": 0.0646, "step": 328 }, { "clip_ratio/high_max": 0.002627769506943878, "clip_ratio/high_mean": 0.0011090613079431932, "clip_ratio/low_mean": 0.0013199484637880232, "clip_ratio/low_min": 9.724599294713698e-06, "clip_ratio/region_mean": 0.0024290097717312165, "epoch": 0.030706589900191917, "grad_norm": 0.22804972529411316, "learning_rate": 1e-06, "loss": 0.0118, "step": 329 }, { "clip_ratio/high_max": 0.0021940586593700573, "clip_ratio/high_mean": 0.0010116602315974887, "clip_ratio/low_mean": 0.0010665427253115922, "clip_ratio/low_min": 7.944414210214745e-05, "clip_ratio/region_mean": 0.0020782028950634412, "epoch": 0.0307999230001925, "grad_norm": 0.10244179517030716, "learning_rate": 1e-06, "loss": 0.0165, "step": 330 }, { "clip_ratio/high_max": 0.002239552872197237, "clip_ratio/high_mean": 0.0009741118919919245, "clip_ratio/low_mean": 0.001268049460122711, "clip_ratio/low_min": 0.00016382148351112846, "clip_ratio/region_mean": 0.002242161353933625, "epoch": 0.030893256100193084, "grad_norm": 0.11611955612897873, "learning_rate": 1e-06, "loss": 0.0332, "step": 331 }, { "clip_ratio/high_max": 0.002064609434455633, "clip_ratio/high_mean": 0.0008662734035169706, "clip_ratio/low_mean": 0.0012566667901410256, "clip_ratio/low_min": 0.0001835051916714292, "clip_ratio/region_mean": 0.002122940124536399, "epoch": 0.030986589200193666, "grad_norm": 0.13188059628009796, "learning_rate": 1e-06, "loss": 0.056, "step": 332 }, { "clip_ratio/high_max": 0.0021246587857604027, "clip_ratio/high_mean": 0.0009811239651753567, "clip_ratio/low_mean": 0.0011925911840080516, "clip_ratio/low_min": 0.0001146252870967146, "clip_ratio/region_mean": 0.0021737151328125037, "epoch": 0.03107992230019425, "grad_norm": 0.12084190547466278, "learning_rate": 1e-06, "loss": 0.0089, "step": 333 }, { "clip_ratio/high_max": 0.0020908947408315726, "clip_ratio/high_mean": 0.000870091213073465, "clip_ratio/low_mean": 0.0011460989444458392, "clip_ratio/low_min": 8.154174929586588e-05, "clip_ratio/region_mean": 0.0020161901411483996, "epoch": 0.031173255400194833, "grad_norm": 0.1228744387626648, "learning_rate": 1e-06, "loss": 0.0692, "step": 334 }, { "clip_ratio/high_max": 0.0021643416112056, "clip_ratio/high_mean": 0.0008296875603264198, "clip_ratio/low_mean": 0.0011237840626563411, "clip_ratio/low_min": 9.355169504488003e-05, "clip_ratio/region_mean": 0.001953471604792867, "epoch": 0.03126658850019542, "grad_norm": 0.11427238583564758, "learning_rate": 1e-06, "loss": 0.0253, "step": 335 }, { "clip_ratio/high_max": 0.0019447591839707457, "clip_ratio/high_mean": 0.0009052424302353757, "clip_ratio/low_mean": 0.0010355626727687195, "clip_ratio/low_min": 9.105355456995312e-05, "clip_ratio/region_mean": 0.0019408051084610634, "epoch": 0.031359921600196, "grad_norm": 0.11841615289449692, "learning_rate": 1e-06, "loss": 0.0131, "step": 336 }, { "clip_ratio/high_max": 0.00232812426838791, "clip_ratio/high_mean": 0.0009957498878065962, "clip_ratio/low_mean": 0.0010864943724300247, "clip_ratio/low_min": 0.00013056925308774225, "clip_ratio/region_mean": 0.002082244238408748, "epoch": 0.03145325470019658, "grad_norm": 0.12674465775489807, "learning_rate": 1e-06, "loss": 0.0027, "step": 337 }, { "clip_ratio/high_max": 0.0025400919912499376, "clip_ratio/high_mean": 0.0010203268793702591, "clip_ratio/low_mean": 0.0010479550910531543, "clip_ratio/low_min": 7.761822325846879e-05, "clip_ratio/region_mean": 0.002068281937681604, "epoch": 0.031546587800197166, "grad_norm": 0.10925080627202988, "learning_rate": 1e-06, "loss": 0.007, "step": 338 }, { "clip_ratio/high_max": 0.002594425001007039, "clip_ratio/high_mean": 0.0011677702459564898, "clip_ratio/low_mean": 0.0009922743774950504, "clip_ratio/low_min": 1.7801196008804254e-05, "clip_ratio/region_mean": 0.0021600445470539853, "epoch": 0.03163992090019775, "grad_norm": 0.4444403052330017, "learning_rate": 1e-06, "loss": -0.0159, "step": 339 }, { "clip_ratio/high_max": 0.0018279580544913188, "clip_ratio/high_mean": 0.0007993062654350069, "clip_ratio/low_mean": 0.0011365246537025087, "clip_ratio/low_min": 5.94459115745849e-05, "clip_ratio/region_mean": 0.0019358308854862116, "epoch": 0.03173325400019833, "grad_norm": 0.10690231621265411, "learning_rate": 1e-06, "loss": 0.0825, "step": 340 }, { "clip_ratio/high_max": 0.002140695243724622, "clip_ratio/high_mean": 0.0009172743375529535, "clip_ratio/low_mean": 0.0010918516491074115, "clip_ratio/low_min": 9.728156146593392e-05, "clip_ratio/region_mean": 0.0020091259430046193, "epoch": 0.031826587100198915, "grad_norm": 0.11435524374246597, "learning_rate": 1e-06, "loss": 0.0374, "step": 341 }, { "clip_ratio/high_max": 0.0020586257523973472, "clip_ratio/high_mean": 0.000902438179764431, "clip_ratio/low_mean": 0.001106838470150251, "clip_ratio/low_min": 4.30491054430604e-05, "clip_ratio/region_mean": 0.002009276649914682, "epoch": 0.0319199202001995, "grad_norm": 0.13048726320266724, "learning_rate": 1e-06, "loss": 0.0465, "step": 342 }, { "clip_ratio/high_max": 0.002136928407708183, "clip_ratio/high_mean": 0.000896078025107272, "clip_ratio/low_mean": 0.0011898585507879034, "clip_ratio/low_min": 0.00011722445742634591, "clip_ratio/region_mean": 0.002085936546791345, "epoch": 0.032013253300200085, "grad_norm": 0.1033177301287651, "learning_rate": 1e-06, "loss": 0.0701, "step": 343 }, { "clip_ratio/high_max": 0.0022084057782194577, "clip_ratio/high_mean": 0.0008568549201299902, "clip_ratio/low_mean": 0.0012664664645853918, "clip_ratio/low_min": 0.0001900614661280997, "clip_ratio/region_mean": 0.002123321406543255, "epoch": 0.032106586400200664, "grad_norm": 0.10965591669082642, "learning_rate": 1e-06, "loss": 0.081, "step": 344 }, { "clip_ratio/high_max": 0.0021995499773765914, "clip_ratio/high_mean": 0.0009204053349094465, "clip_ratio/low_mean": 0.0010610223434923682, "clip_ratio/low_min": 8.669406906847144e-05, "clip_ratio/region_mean": 0.0019814276456600055, "epoch": 0.03219991950020125, "grad_norm": 0.13363026082515717, "learning_rate": 1e-06, "loss": 0.0108, "step": 345 }, { "clip_ratio/high_max": 0.0025101989158429205, "clip_ratio/high_mean": 0.0010780927477753721, "clip_ratio/low_mean": 0.001075920521543594, "clip_ratio/low_min": 6.821509487053845e-05, "clip_ratio/region_mean": 0.0021540133166126907, "epoch": 0.032293252600201834, "grad_norm": 0.11479067802429199, "learning_rate": 1e-06, "loss": 0.0085, "step": 346 }, { "clip_ratio/high_max": 0.0021954900221317075, "clip_ratio/high_mean": 0.0009020977868203772, "clip_ratio/low_mean": 0.0013978856331959832, "clip_ratio/low_min": 0.00016852553017088212, "clip_ratio/region_mean": 0.0022999833890935406, "epoch": 0.03238658570020242, "grad_norm": 0.14521950483322144, "learning_rate": 1e-06, "loss": 0.0584, "step": 347 }, { "clip_ratio/high_max": 0.002100695488479687, "clip_ratio/high_mean": 0.0008622984587418614, "clip_ratio/low_mean": 0.001215741373016499, "clip_ratio/low_min": 0.00012669900752371177, "clip_ratio/region_mean": 0.0020780398626811802, "epoch": 0.032479918800203, "grad_norm": 0.10538921505212784, "learning_rate": 1e-06, "loss": 0.0125, "step": 348 }, { "clip_ratio/high_max": 0.0021563248701568227, "clip_ratio/high_mean": 0.0009867449170997133, "clip_ratio/low_mean": 0.0011401438314351253, "clip_ratio/low_min": 8.875741150404792e-05, "clip_ratio/region_mean": 0.0021268887794576585, "epoch": 0.03257325190020358, "grad_norm": 0.11249448359012604, "learning_rate": 1e-06, "loss": 0.0135, "step": 349 }, { "clip_ratio/high_max": 0.002390588961134199, "clip_ratio/high_mean": 0.0009480214575887658, "clip_ratio/low_mean": 0.001145987107520341, "clip_ratio/low_min": 6.574898907274473e-05, "clip_ratio/region_mean": 0.002094008566928096, "epoch": 0.03266658500020417, "grad_norm": 0.11310872435569763, "learning_rate": 1e-06, "loss": 0.0151, "step": 350 }, { "clip_ratio/high_max": 0.001988746036658995, "clip_ratio/high_mean": 0.0008116280841932166, "clip_ratio/low_mean": 0.001365374027955113, "clip_ratio/low_min": 0.00024016555744310608, "clip_ratio/region_mean": 0.002177002126700245, "epoch": 0.032759918100204746, "grad_norm": 0.11652226746082306, "learning_rate": 1e-06, "loss": 0.087, "step": 351 }, { "clip_ratio/high_max": 0.0020331219893705565, "clip_ratio/high_mean": 0.0009234167628164869, "clip_ratio/low_mean": 0.0012328605589573272, "clip_ratio/low_min": 0.00022346949299389962, "clip_ratio/region_mean": 0.0021562772890320048, "epoch": 0.03285325120020533, "grad_norm": 0.11358150839805603, "learning_rate": 1e-06, "loss": 0.0143, "step": 352 }, { "clip_ratio/high_max": 0.0021666164975613356, "clip_ratio/high_mean": 0.0008399095604545437, "clip_ratio/low_mean": 0.0012059905056958087, "clip_ratio/low_min": 0.00014595930952054914, "clip_ratio/region_mean": 0.002045900087978225, "epoch": 0.03294658430020592, "grad_norm": 0.10419805347919464, "learning_rate": 1e-06, "loss": 0.045, "step": 353 }, { "clip_ratio/high_max": 0.0020624875105568208, "clip_ratio/high_mean": 0.0008539489972463343, "clip_ratio/low_mean": 0.00117095968380454, "clip_ratio/low_min": 5.2058303481317125e-05, "clip_ratio/region_mean": 0.0020249086810508743, "epoch": 0.0330399174002065, "grad_norm": 3558.2763671875, "learning_rate": 1e-06, "loss": 0.3708, "step": 354 }, { "clip_ratio/high_max": 0.002189752980484627, "clip_ratio/high_mean": 0.0008734756684134481, "clip_ratio/low_mean": 0.0010751479403552366, "clip_ratio/low_min": 0.00011319543955323752, "clip_ratio/region_mean": 0.0019486236124066636, "epoch": 0.03313325050020708, "grad_norm": 0.11278904229402542, "learning_rate": 1e-06, "loss": 0.0342, "step": 355 }, { "clip_ratio/high_max": 0.002278018748256727, "clip_ratio/high_mean": 0.0008883386590241571, "clip_ratio/low_mean": 0.0010244712721032556, "clip_ratio/low_min": 5.442007568490226e-05, "clip_ratio/region_mean": 0.001912809930217918, "epoch": 0.033226583600207665, "grad_norm": 0.1159723773598671, "learning_rate": 1e-06, "loss": -0.0039, "step": 356 }, { "clip_ratio/high_max": 0.0023154462687671185, "clip_ratio/high_mean": 0.0009308602202509064, "clip_ratio/low_mean": 0.00125598987142439, "clip_ratio/low_min": 9.143451734416885e-05, "clip_ratio/region_mean": 0.002186850157158915, "epoch": 0.03331991670020825, "grad_norm": 0.1177138015627861, "learning_rate": 1e-06, "loss": 0.0292, "step": 357 }, { "clip_ratio/high_max": 0.00206620594690321, "clip_ratio/high_mean": 0.0008164736009348417, "clip_ratio/low_mean": 0.0011954403889831156, "clip_ratio/low_min": 0.00011787036692112451, "clip_ratio/region_mean": 0.0020119140099268407, "epoch": 0.033413249800208836, "grad_norm": 0.10872387140989304, "learning_rate": 1e-06, "loss": 0.0838, "step": 358 }, { "clip_ratio/high_max": 0.002361755156016443, "clip_ratio/high_mean": 0.0009098943064600462, "clip_ratio/low_mean": 0.0011377494683983969, "clip_ratio/low_min": 0.00013214747650636127, "clip_ratio/region_mean": 0.0020476438076002523, "epoch": 0.033506582900209414, "grad_norm": 0.11202988028526306, "learning_rate": 1e-06, "loss": 0.0681, "step": 359 }, { "clip_ratio/high_max": 0.0019100447389064357, "clip_ratio/high_mean": 0.0008295353891298873, "clip_ratio/low_mean": 0.0010534365555940894, "clip_ratio/low_min": 0.0001322676289419178, "clip_ratio/region_mean": 0.001882971941085998, "epoch": 0.03359991600021, "grad_norm": 0.11695244908332825, "learning_rate": 1e-06, "loss": 0.0462, "step": 360 }, { "clip_ratio/high_max": 0.002434186142636463, "clip_ratio/high_mean": 0.0010246202728012577, "clip_ratio/low_mean": 0.001124836260714801, "clip_ratio/low_min": 8.7745472228562e-05, "clip_ratio/region_mean": 0.002149456471670419, "epoch": 0.033693249100210584, "grad_norm": 0.12341535836458206, "learning_rate": 1e-06, "loss": 0.0308, "step": 361 }, { "clip_ratio/high_max": 0.002108447100908961, "clip_ratio/high_mean": 0.0009159620240097865, "clip_ratio/low_mean": 0.0011480096836748999, "clip_ratio/low_min": 2.7470548047858756e-05, "clip_ratio/region_mean": 0.002063971711322665, "epoch": 0.03378658220021117, "grad_norm": 77.47904968261719, "learning_rate": 1e-06, "loss": 0.0425, "step": 362 }, { "clip_ratio/high_max": 0.0019328292546560988, "clip_ratio/high_mean": 0.0008381344696317683, "clip_ratio/low_mean": 0.0011714386382664088, "clip_ratio/low_min": 7.131160236895084e-05, "clip_ratio/region_mean": 0.0020095731160836294, "epoch": 0.03387991530021175, "grad_norm": 0.1070476546883583, "learning_rate": 1e-06, "loss": 0.0396, "step": 363 }, { "clip_ratio/high_max": 0.0025864454873953946, "clip_ratio/high_mean": 0.0010019448891398497, "clip_ratio/low_mean": 0.0011095358204329386, "clip_ratio/low_min": 0.00011711732349795057, "clip_ratio/region_mean": 0.0021114807241247036, "epoch": 0.03397324840021233, "grad_norm": 0.10823500156402588, "learning_rate": 1e-06, "loss": 0.0079, "step": 364 }, { "clip_ratio/high_max": 0.0024609017054899596, "clip_ratio/high_mean": 0.0009992888808483258, "clip_ratio/low_mean": 0.001211238373798551, "clip_ratio/low_min": 0.00016066131684056018, "clip_ratio/region_mean": 0.002210527192801237, "epoch": 0.03406658150021292, "grad_norm": 0.10757739096879959, "learning_rate": 1e-06, "loss": 0.0476, "step": 365 }, { "clip_ratio/high_max": 0.0019327660484123044, "clip_ratio/high_mean": 0.0008466597228107275, "clip_ratio/low_mean": 0.0012629792581719812, "clip_ratio/low_min": 0.00019227057327952934, "clip_ratio/region_mean": 0.0021096389536978677, "epoch": 0.034159914600213497, "grad_norm": 0.10895657539367676, "learning_rate": 1e-06, "loss": 0.0477, "step": 366 }, { "clip_ratio/high_max": 0.0025174269394483417, "clip_ratio/high_mean": 0.0009726378939376445, "clip_ratio/low_mean": 0.001160546064056689, "clip_ratio/low_min": 7.140652814996429e-05, "clip_ratio/region_mean": 0.0021331839452614076, "epoch": 0.03425324770021408, "grad_norm": 0.11871457099914551, "learning_rate": 1e-06, "loss": 0.0087, "step": 367 }, { "clip_ratio/high_max": 0.002498274807294365, "clip_ratio/high_mean": 0.0009608788041077787, "clip_ratio/low_mean": 0.0017753513166098855, "clip_ratio/low_min": 7.549638758064248e-05, "clip_ratio/region_mean": 0.0027362301480025053, "epoch": 0.03434658080021467, "grad_norm": 0.14470025897026062, "learning_rate": 1e-06, "loss": 0.0368, "step": 368 }, { "clip_ratio/high_max": 0.0018706831469899043, "clip_ratio/high_mean": 0.0008562452712794766, "clip_ratio/low_mean": 0.0011088792816735804, "clip_ratio/low_min": 8.010771489352919e-05, "clip_ratio/region_mean": 0.0019651246329885907, "epoch": 0.03443991390021525, "grad_norm": 142.6408233642578, "learning_rate": 1e-06, "loss": 0.0493, "step": 369 }, { "clip_ratio/high_max": 0.0023074456985341385, "clip_ratio/high_mean": 0.001000052234303439, "clip_ratio/low_mean": 0.0010730547783168731, "clip_ratio/low_min": 7.081450348778162e-05, "clip_ratio/region_mean": 0.002073107098112814, "epoch": 0.03453324700021583, "grad_norm": 0.3243756890296936, "learning_rate": 1e-06, "loss": -0.0217, "step": 370 }, { "clip_ratio/high_max": 0.0022518236510222778, "clip_ratio/high_mean": 0.0009693707743281266, "clip_ratio/low_mean": 0.001249856944923522, "clip_ratio/low_min": 0.0001641616627239273, "clip_ratio/region_mean": 0.002219227739260532, "epoch": 0.034626580100216416, "grad_norm": 0.1239929273724556, "learning_rate": 1e-06, "loss": 0.0465, "step": 371 }, { "clip_ratio/high_max": 0.0026092430343851447, "clip_ratio/high_mean": 0.001003756478894502, "clip_ratio/low_mean": 0.001093214936190634, "clip_ratio/low_min": 0.00012626219540834427, "clip_ratio/region_mean": 0.00209697140235221, "epoch": 0.034719913200217, "grad_norm": 0.11750996112823486, "learning_rate": 1e-06, "loss": -0.0068, "step": 372 }, { "clip_ratio/high_max": 0.002271275734528899, "clip_ratio/high_mean": 0.0010254267763230018, "clip_ratio/low_mean": 0.0014832128399575595, "clip_ratio/low_min": 8.646424794278573e-05, "clip_ratio/region_mean": 0.0025086395326070487, "epoch": 0.034813246300217586, "grad_norm": 0.1160498559474945, "learning_rate": 1e-06, "loss": 0.031, "step": 373 }, { "clip_ratio/high_max": 0.0022865480605105404, "clip_ratio/high_mean": 0.0009589161909389077, "clip_ratio/low_mean": 0.001251539524673717, "clip_ratio/low_min": 0.00012960705316800158, "clip_ratio/region_mean": 0.00221045569924172, "epoch": 0.034906579400218164, "grad_norm": 0.12079624831676483, "learning_rate": 1e-06, "loss": 0.0627, "step": 374 }, { "clip_ratio/high_max": 0.0025596896302886307, "clip_ratio/high_mean": 0.0009454310347791761, "clip_ratio/low_mean": 0.0011658837320283055, "clip_ratio/low_min": 0.00013075372226012405, "clip_ratio/region_mean": 0.002111314817739185, "epoch": 0.03499991250021875, "grad_norm": 0.11875633150339127, "learning_rate": 1e-06, "loss": 0.0272, "step": 375 }, { "clip_ratio/high_max": 0.0025779101488296874, "clip_ratio/high_mean": 0.0010267890356772114, "clip_ratio/low_mean": 0.0011818734201369807, "clip_ratio/low_min": 0.00012896342195745092, "clip_ratio/region_mean": 0.002208662474004086, "epoch": 0.035093245600219335, "grad_norm": 2.0617852210998535, "learning_rate": 1e-06, "loss": 0.0342, "step": 376 }, { "clip_ratio/high_max": 0.0023847867487347685, "clip_ratio/high_mean": 0.0009529884318908444, "clip_ratio/low_mean": 0.0012494894035626203, "clip_ratio/low_min": 6.29909309282084e-05, "clip_ratio/region_mean": 0.002202477800892666, "epoch": 0.03518657870021991, "grad_norm": 0.11993952095508575, "learning_rate": 1e-06, "loss": 0.0515, "step": 377 }, { "clip_ratio/high_max": 0.0021552698672167026, "clip_ratio/high_mean": 0.000986358878435567, "clip_ratio/low_mean": 0.0010060342774522724, "clip_ratio/low_min": 3.663903316919459e-05, "clip_ratio/region_mean": 0.0019923931831726804, "epoch": 0.0352799118002205, "grad_norm": 0.11223357170820236, "learning_rate": 1e-06, "loss": 0.0166, "step": 378 }, { "clip_ratio/high_max": 0.002161166830774164, "clip_ratio/high_mean": 0.001033863580232719, "clip_ratio/low_mean": 0.0011994639498880133, "clip_ratio/low_min": 8.22612000774825e-05, "clip_ratio/region_mean": 0.002233327519206796, "epoch": 0.03537324490022108, "grad_norm": 0.12360091507434845, "learning_rate": 1e-06, "loss": 0.0106, "step": 379 }, { "clip_ratio/high_max": 0.002008361647312995, "clip_ratio/high_mean": 0.0008517967498846701, "clip_ratio/low_mean": 0.0011015180789399892, "clip_ratio/low_min": 8.637665087007917e-05, "clip_ratio/region_mean": 0.001953314807906281, "epoch": 0.03546657800022167, "grad_norm": 0.11588394641876221, "learning_rate": 1e-06, "loss": 0.0098, "step": 380 }, { "clip_ratio/high_max": 0.0025197108698193915, "clip_ratio/high_mean": 0.0010250580944557441, "clip_ratio/low_mean": 0.0011763858419726603, "clip_ratio/low_min": 5.473867895489093e-05, "clip_ratio/region_mean": 0.0022014439455233514, "epoch": 0.03555991110022225, "grad_norm": 0.1170274168252945, "learning_rate": 1e-06, "loss": 0.0051, "step": 381 }, { "clip_ratio/high_max": 0.0024813237905618735, "clip_ratio/high_mean": 0.0010466479743627133, "clip_ratio/low_mean": 0.0013100554278935306, "clip_ratio/low_min": 0.00032503129477845505, "clip_ratio/region_mean": 0.0023567034004372545, "epoch": 0.03565324420022283, "grad_norm": 0.12184745818376541, "learning_rate": 1e-06, "loss": 0.014, "step": 382 }, { "clip_ratio/high_max": 0.002297039158293046, "clip_ratio/high_mean": 0.0010228568207821809, "clip_ratio/low_mean": 0.0012469372504710918, "clip_ratio/low_min": 0.00012439452621038072, "clip_ratio/region_mean": 0.002269794036692474, "epoch": 0.03574657730022342, "grad_norm": 0.1195698007941246, "learning_rate": 1e-06, "loss": -0.0077, "step": 383 }, { "clip_ratio/high_max": 0.0020837840129388496, "clip_ratio/high_mean": 0.0008683273645146983, "clip_ratio/low_mean": 0.0012905644834972918, "clip_ratio/low_min": 0.0002028144008363597, "clip_ratio/region_mean": 0.0021588918898487464, "epoch": 0.035839910400224, "grad_norm": 0.1446634829044342, "learning_rate": 1e-06, "loss": -0.0095, "step": 384 }, { "clip_ratio/high_max": 0.002249407330964459, "clip_ratio/high_mean": 0.0009522568707325263, "clip_ratio/low_mean": 0.0009188744315906661, "clip_ratio/low_min": 8.93153783181333e-05, "clip_ratio/region_mean": 0.0018711313896346837, "completions/clipped_ratio": 0.013619559151785698, "completions/max_length": 4096.0, "completions/max_terminated_length": 4087.0, "completions/mean_length": 613.9982299804688, "completions/mean_terminated_length": 565.9200439453125, "completions/min_length": 4.0, "completions/min_terminated_length": 4.0, "epoch": 0.03593324350022458, "grad_norm": 25.646211624145508, "learning_rate": 1e-06, "loss": 0.1073, "num_tokens": 325348833.0, "reward": 0.582711398601532, "reward_std": 0.19923368096351624, "rewards/simpleverify_reward/mean": 0.5827113389968872, "rewards/simpleverify_reward/std": 0.49311351776123047, "step": 385 }, { "clip_ratio/high_max": 0.002067670058750082, "clip_ratio/high_mean": 0.0008996190608741017, "clip_ratio/low_mean": 0.0010475958952156361, "clip_ratio/low_min": 4.895178244623821e-05, "clip_ratio/region_mean": 0.0019472149760986213, "epoch": 0.036026576600225166, "grad_norm": 17.868595123291016, "learning_rate": 1e-06, "loss": 0.2024, "step": 386 }, { "clip_ratio/high_max": 0.0026831048671738245, "clip_ratio/high_mean": 0.0010850908856809838, "clip_ratio/low_mean": 0.0010147848843189422, "clip_ratio/low_min": 0.00010342623772885418, "clip_ratio/region_mean": 0.002099875775456894, "epoch": 0.03611990970022575, "grad_norm": 0.12918300926685333, "learning_rate": 1e-06, "loss": 0.057, "step": 387 }, { "clip_ratio/high_max": 0.002469547434884589, "clip_ratio/high_mean": 0.0009313369773735758, "clip_ratio/low_mean": 0.0009581187514413614, "clip_ratio/low_min": 8.98518137546489e-05, "clip_ratio/region_mean": 0.001889455692435149, "epoch": 0.03621324280022633, "grad_norm": 111.1423110961914, "learning_rate": 1e-06, "loss": 0.1181, "step": 388 }, { "clip_ratio/high_max": 0.0020690788951469585, "clip_ratio/high_mean": 0.0008665561781526776, "clip_ratio/low_mean": 0.0010723034756665584, "clip_ratio/low_min": 0.00012222520399518544, "clip_ratio/region_mean": 0.0019388596701901406, "epoch": 0.036306575900226914, "grad_norm": 182.8689422607422, "learning_rate": 1e-06, "loss": 0.0617, "step": 389 }, { "clip_ratio/high_max": 0.0025762005243450403, "clip_ratio/high_mean": 0.0009452105859963922, "clip_ratio/low_mean": 0.001112248273784644, "clip_ratio/low_min": 0.00011019068097084528, "clip_ratio/region_mean": 0.0020574588197632693, "epoch": 0.0363999090002275, "grad_norm": 260.7861633300781, "learning_rate": 1e-06, "loss": 0.2447, "step": 390 }, { "clip_ratio/high_max": 0.0028776615363312885, "clip_ratio/high_mean": 0.0011305768821330275, "clip_ratio/low_mean": 0.0009817893151193857, "clip_ratio/low_min": 0.000157995007612044, "clip_ratio/region_mean": 0.0021123662445461378, "epoch": 0.036493242100228085, "grad_norm": 0.15227451920509338, "learning_rate": 1e-06, "loss": 0.0009, "step": 391 }, { "clip_ratio/high_max": 0.0023742825142107904, "clip_ratio/high_mean": 0.001022162312438013, "clip_ratio/low_mean": 0.0010293040395481512, "clip_ratio/low_min": 2.9523555895138998e-05, "clip_ratio/region_mean": 0.0020514663992798887, "epoch": 0.03658657520022866, "grad_norm": 0.12371998280286789, "learning_rate": 1e-06, "loss": 0.0188, "step": 392 }, { "clip_ratio/high_max": 0.0024497416816302575, "clip_ratio/high_mean": 0.0009842597391980235, "clip_ratio/low_mean": 0.001050512641086243, "clip_ratio/low_min": 0.00016549810061405879, "clip_ratio/region_mean": 0.002034772311162669, "epoch": 0.03667990830022925, "grad_norm": 6616.79150390625, "learning_rate": 1e-06, "loss": 1.4815, "step": 393 }, { "clip_ratio/high_max": 0.002531377787818201, "clip_ratio/high_mean": 0.0010408186390122864, "clip_ratio/low_mean": 0.0009986791792471195, "clip_ratio/low_min": 1.6771769878687337e-05, "clip_ratio/region_mean": 0.002039497790974565, "epoch": 0.036773241400229834, "grad_norm": 0.12149332463741302, "learning_rate": 1e-06, "loss": 0.0209, "step": 394 }, { "clip_ratio/high_max": 0.0023246789205586538, "clip_ratio/high_mean": 0.0009743188820721116, "clip_ratio/low_mean": 0.001059644178894814, "clip_ratio/low_min": 4.478173559618881e-05, "clip_ratio/region_mean": 0.0020339630791568197, "epoch": 0.03686657450023042, "grad_norm": 0.12550579011440277, "learning_rate": 1e-06, "loss": 0.0595, "step": 395 }, { "clip_ratio/high_max": 0.002719729862292297, "clip_ratio/high_mean": 0.0010334120634070132, "clip_ratio/low_mean": 0.0010996741038979962, "clip_ratio/low_min": 8.656009595142677e-05, "clip_ratio/region_mean": 0.002133086192770861, "epoch": 0.036959907600231, "grad_norm": 0.10799600183963776, "learning_rate": 1e-06, "loss": -0.0098, "step": 396 }, { "clip_ratio/high_max": 0.0024350728635909036, "clip_ratio/high_mean": 0.0010369525007263292, "clip_ratio/low_mean": 0.0012364416979835369, "clip_ratio/low_min": 0.00014030232341610827, "clip_ratio/region_mean": 0.0022733941950718872, "epoch": 0.03705324070023158, "grad_norm": 0.2517393231391907, "learning_rate": 1e-06, "loss": 0.0213, "step": 397 }, { "clip_ratio/high_max": 0.0022064178774598986, "clip_ratio/high_mean": 0.0009866075888567138, "clip_ratio/low_mean": 0.0011152268034493318, "clip_ratio/low_min": 0.0001264476595679298, "clip_ratio/region_mean": 0.0021018343686591834, "epoch": 0.03714657380023217, "grad_norm": 0.42689386010169983, "learning_rate": 1e-06, "loss": 0.0223, "step": 398 }, { "clip_ratio/high_max": 0.0027222053104196675, "clip_ratio/high_mean": 0.0010609816854412202, "clip_ratio/low_mean": 0.0011185719304194208, "clip_ratio/low_min": 0.0001686264713498531, "clip_ratio/region_mean": 0.0021795535903947894, "epoch": 0.03723990690023275, "grad_norm": 12.289047241210938, "learning_rate": 1e-06, "loss": 0.0404, "step": 399 }, { "clip_ratio/high_max": 0.002141905002645217, "clip_ratio/high_mean": 0.0008924581707105972, "clip_ratio/low_mean": 0.0010605488569126464, "clip_ratio/low_min": 0.00016607354882580694, "clip_ratio/region_mean": 0.001953007078554947, "epoch": 0.03733324000023333, "grad_norm": 0.12708602845668793, "learning_rate": 1e-06, "loss": 0.0151, "step": 400 }, { "clip_ratio/high_max": 0.0026312369518564083, "clip_ratio/high_mean": 0.0009713691142678726, "clip_ratio/low_mean": 0.001152117320089019, "clip_ratio/low_min": 3.873166861012578e-05, "clip_ratio/region_mean": 0.0021234863670542836, "epoch": 0.037426573100233916, "grad_norm": 0.1036255732178688, "learning_rate": 1e-06, "loss": 0.0391, "step": 401 }, { "clip_ratio/high_max": 0.002453912478813436, "clip_ratio/high_mean": 0.001055428172549, "clip_ratio/low_mean": 0.001001802596874768, "clip_ratio/low_min": 4.2193163608317263e-05, "clip_ratio/region_mean": 0.0020572307621478103, "epoch": 0.0375199062002345, "grad_norm": 0.1153542622923851, "learning_rate": 1e-06, "loss": 0.0146, "step": 402 }, { "clip_ratio/high_max": 0.002451273521728581, "clip_ratio/high_mean": 0.0010216802638751687, "clip_ratio/low_mean": 0.0010770295539259678, "clip_ratio/low_min": 0.00012786481875082245, "clip_ratio/region_mean": 0.002098709846904967, "epoch": 0.03761323930023508, "grad_norm": 79.2166976928711, "learning_rate": 1e-06, "loss": 0.022, "step": 403 }, { "clip_ratio/high_max": 0.002031783093116246, "clip_ratio/high_mean": 0.0008570750114813563, "clip_ratio/low_mean": 0.0010908130097959656, "clip_ratio/low_min": 3.844057209789753e-05, "clip_ratio/region_mean": 0.0019478880349197425, "epoch": 0.037706572400235665, "grad_norm": 0.11656417697668076, "learning_rate": 1e-06, "loss": 0.0292, "step": 404 }, { "clip_ratio/high_max": 0.002362692335736938, "clip_ratio/high_mean": 0.0009676938370830612, "clip_ratio/low_mean": 0.001088626577256946, "clip_ratio/low_min": 0.00012091054259144585, "clip_ratio/region_mean": 0.0020563204670907, "epoch": 0.03779990550023625, "grad_norm": 0.11351550370454788, "learning_rate": 1e-06, "loss": 0.0703, "step": 405 }, { "clip_ratio/high_max": 0.0021893087177886628, "clip_ratio/high_mean": 0.0010168567332584644, "clip_ratio/low_mean": 0.0011922485864488408, "clip_ratio/low_min": 0.00015677638839406427, "clip_ratio/region_mean": 0.002209105332440231, "epoch": 0.037893238600236835, "grad_norm": 0.2789289653301239, "learning_rate": 1e-06, "loss": 0.0212, "step": 406 }, { "clip_ratio/high_max": 0.002066814085992519, "clip_ratio/high_mean": 0.0008650986110296799, "clip_ratio/low_mean": 0.0011928926141990814, "clip_ratio/low_min": 0.00016279277224384714, "clip_ratio/region_mean": 0.002057991237961687, "epoch": 0.03798657170023741, "grad_norm": 0.12234649807214737, "learning_rate": 1e-06, "loss": 0.0685, "step": 407 }, { "clip_ratio/high_max": 0.0023458638970623724, "clip_ratio/high_mean": 0.0009759523345564958, "clip_ratio/low_mean": 0.0010563223568169633, "clip_ratio/low_min": 0.00013646559455082752, "clip_ratio/region_mean": 0.002032274664088618, "epoch": 0.038079904800238, "grad_norm": 0.11095178872346878, "learning_rate": 1e-06, "loss": 0.0635, "step": 408 }, { "clip_ratio/high_max": 0.0021028829578426667, "clip_ratio/high_mean": 0.000901340401469497, "clip_ratio/low_mean": 0.0010760918958112597, "clip_ratio/low_min": 9.700328337203246e-05, "clip_ratio/region_mean": 0.001977432330022566, "epoch": 0.038173237900238584, "grad_norm": 0.11165999621152878, "learning_rate": 1e-06, "loss": 0.0247, "step": 409 }, { "clip_ratio/high_max": 0.002505515411030501, "clip_ratio/high_mean": 0.0009516960417386144, "clip_ratio/low_mean": 0.0010113672615261748, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0019630632814369164, "epoch": 0.03826657100023917, "grad_norm": 0.11181530356407166, "learning_rate": 1e-06, "loss": 0.0363, "step": 410 }, { "clip_ratio/high_max": 0.0025688495879876427, "clip_ratio/high_mean": 0.0011073986534029245, "clip_ratio/low_mean": 0.0011780047789216042, "clip_ratio/low_min": 0.0001348697187495418, "clip_ratio/region_mean": 0.002285403446876444, "epoch": 0.03835990410023975, "grad_norm": 0.7511307001113892, "learning_rate": 1e-06, "loss": 0.0363, "step": 411 }, { "clip_ratio/high_max": 0.002582698012702167, "clip_ratio/high_mean": 0.001013639663142385, "clip_ratio/low_mean": 0.0011140792230435181, "clip_ratio/low_min": 0.00013854836925020209, "clip_ratio/region_mean": 0.0021277188934618607, "epoch": 0.03845323720024033, "grad_norm": 1.0676027536392212, "learning_rate": 1e-06, "loss": 0.0184, "step": 412 }, { "clip_ratio/high_max": 0.0022497668833239004, "clip_ratio/high_mean": 0.0009708232973935083, "clip_ratio/low_mean": 0.001193323107145261, "clip_ratio/low_min": 6.63809241814306e-05, "clip_ratio/region_mean": 0.0021641464190906845, "epoch": 0.03854657030024092, "grad_norm": 0.48996543884277344, "learning_rate": 1e-06, "loss": 0.0171, "step": 413 }, { "clip_ratio/high_max": 0.0024030716813285835, "clip_ratio/high_mean": 0.0009532924068480497, "clip_ratio/low_mean": 0.001171562365925638, "clip_ratio/low_min": 0.00012433270967449062, "clip_ratio/region_mean": 0.0021248547855066136, "epoch": 0.038639903400241496, "grad_norm": 0.11026362329721451, "learning_rate": 1e-06, "loss": 0.0345, "step": 414 }, { "clip_ratio/high_max": 0.002473651125910692, "clip_ratio/high_mean": 0.0008779411837167572, "clip_ratio/low_mean": 0.0010945960038952762, "clip_ratio/low_min": 4.567735413729679e-05, "clip_ratio/region_mean": 0.0019725371967069805, "epoch": 0.03873323650024208, "grad_norm": 0.11202587187290192, "learning_rate": 1e-06, "loss": -0.0133, "step": 415 }, { "clip_ratio/high_max": 0.0022820236190455034, "clip_ratio/high_mean": 0.0009653055367380148, "clip_ratio/low_mean": 0.0010611517136567272, "clip_ratio/low_min": 8.180870827345643e-05, "clip_ratio/region_mean": 0.002026457237661816, "epoch": 0.038826569600242666, "grad_norm": 0.11086110770702362, "learning_rate": 1e-06, "loss": -0.017, "step": 416 }, { "clip_ratio/high_max": 0.0026077920156239998, "clip_ratio/high_mean": 0.0010575046890153317, "clip_ratio/low_mean": 0.0013064665217825677, "clip_ratio/low_min": 0.0001920913127833046, "clip_ratio/region_mean": 0.0023639711944269948, "epoch": 0.03891990270024325, "grad_norm": 0.11942239850759506, "learning_rate": 1e-06, "loss": 0.0507, "step": 417 }, { "clip_ratio/high_max": 0.0022294865048024803, "clip_ratio/high_mean": 0.0009274838212149916, "clip_ratio/low_mean": 0.001136209859396331, "clip_ratio/low_min": 0.00010910385390161537, "clip_ratio/region_mean": 0.0020636937333620153, "epoch": 0.03901323580024383, "grad_norm": 0.11716679483652115, "learning_rate": 1e-06, "loss": 0.051, "step": 418 }, { "clip_ratio/high_max": 0.002278532829222968, "clip_ratio/high_mean": 0.0009009390378196258, "clip_ratio/low_mean": 0.0013087997722323053, "clip_ratio/low_min": 0.0001721689832265838, "clip_ratio/region_mean": 0.0022097387409303337, "epoch": 0.039106568900244415, "grad_norm": 0.1899026483297348, "learning_rate": 1e-06, "loss": 0.016, "step": 419 }, { "clip_ratio/high_max": 0.0022739600535715, "clip_ratio/high_mean": 0.0009323005233454751, "clip_ratio/low_mean": 0.001358584140689345, "clip_ratio/low_min": 0.00012051994599460158, "clip_ratio/region_mean": 0.0022908847604412585, "epoch": 0.039199902000245, "grad_norm": 0.12367464601993561, "learning_rate": 1e-06, "loss": 0.0295, "step": 420 }, { "clip_ratio/high_max": 0.002480124225257896, "clip_ratio/high_mean": 0.0010232046297460329, "clip_ratio/low_mean": 0.0012769949244102463, "clip_ratio/low_min": 6.096029483160237e-05, "clip_ratio/region_mean": 0.0023001995505183004, "epoch": 0.039293235100245585, "grad_norm": 0.11549799144268036, "learning_rate": 1e-06, "loss": 0.0125, "step": 421 }, { "clip_ratio/high_max": 0.002469808707246557, "clip_ratio/high_mean": 0.0010399159400549252, "clip_ratio/low_mean": 0.0012084107147529721, "clip_ratio/low_min": 0.00010879743695113575, "clip_ratio/region_mean": 0.0022483267021016218, "epoch": 0.039386568200246164, "grad_norm": 0.11152950674295425, "learning_rate": 1e-06, "loss": -0.0009, "step": 422 }, { "clip_ratio/high_max": 0.0026854256211663596, "clip_ratio/high_mean": 0.0010510823885852005, "clip_ratio/low_mean": 0.0013436911594908452, "clip_ratio/low_min": 0.0001464334436604986, "clip_ratio/region_mean": 0.002394773531705141, "epoch": 0.03947990130024675, "grad_norm": 0.10431065410375595, "learning_rate": 1e-06, "loss": 0.0355, "step": 423 }, { "clip_ratio/high_max": 0.002534257189836353, "clip_ratio/high_mean": 0.00112330846968689, "clip_ratio/low_mean": 0.0013213567399361636, "clip_ratio/low_min": 0.00011886351967405062, "clip_ratio/region_mean": 0.0024446651514153928, "epoch": 0.039573234400247334, "grad_norm": 0.13028563559055328, "learning_rate": 1e-06, "loss": 0.0442, "step": 424 }, { "clip_ratio/high_max": 0.0019954144554503728, "clip_ratio/high_mean": 0.000917303295864258, "clip_ratio/low_mean": 0.0014813743509876076, "clip_ratio/low_min": 0.0001240512710865005, "clip_ratio/region_mean": 0.0023986775559023954, "epoch": 0.03966656750024792, "grad_norm": 0.12728068232536316, "learning_rate": 1e-06, "loss": 0.0585, "step": 425 }, { "clip_ratio/high_max": 0.0020989308832213283, "clip_ratio/high_mean": 0.0009160503723251168, "clip_ratio/low_mean": 0.0014201571757439524, "clip_ratio/low_min": 0.0002168270275433315, "clip_ratio/region_mean": 0.0023362075953627937, "epoch": 0.0397599006002485, "grad_norm": 0.16264601051807404, "learning_rate": 1e-06, "loss": 0.0624, "step": 426 }, { "clip_ratio/high_max": 0.0021426433195301797, "clip_ratio/high_mean": 0.000896576768354862, "clip_ratio/low_mean": 0.001322147747487179, "clip_ratio/low_min": 9.162742026092019e-05, "clip_ratio/region_mean": 0.0022187244831002317, "epoch": 0.03985323370024908, "grad_norm": 0.10477511584758759, "learning_rate": 1e-06, "loss": 0.0099, "step": 427 }, { "clip_ratio/high_max": 0.0024193348581320606, "clip_ratio/high_mean": 0.0009610278211766854, "clip_ratio/low_mean": 0.0012633223341254052, "clip_ratio/low_min": 0.0001198755899167736, "clip_ratio/region_mean": 0.0022243501516641118, "epoch": 0.03994656680024967, "grad_norm": 0.12390594184398651, "learning_rate": 1e-06, "loss": 0.0232, "step": 428 }, { "clip_ratio/high_max": 0.0020847667692578398, "clip_ratio/high_mean": 0.0009542134248476941, "clip_ratio/low_mean": 0.0011549616538104601, "clip_ratio/low_min": 0.0001373622963001253, "clip_ratio/region_mean": 0.0021091750968480483, "epoch": 0.040039899900250246, "grad_norm": 2.778806447982788, "learning_rate": 1e-06, "loss": 0.0003, "step": 429 }, { "clip_ratio/high_max": 0.0025836441564024426, "clip_ratio/high_mean": 0.0011250056813878473, "clip_ratio/low_mean": 0.0012500049233494792, "clip_ratio/low_min": 0.0001842158399085747, "clip_ratio/region_mean": 0.002375010633841157, "epoch": 0.04013323300025083, "grad_norm": 54.640708923339844, "learning_rate": 1e-06, "loss": 9.0728, "step": 430 }, { "clip_ratio/high_max": 0.001933260733494535, "clip_ratio/high_mean": 0.0008873352708178572, "clip_ratio/low_mean": 0.001147896888141986, "clip_ratio/low_min": 0.0001697036059340462, "clip_ratio/region_mean": 0.002035232144407928, "epoch": 0.04022656610025142, "grad_norm": 0.1280539333820343, "learning_rate": 1e-06, "loss": 0.0412, "step": 431 }, { "clip_ratio/high_max": 0.0023835966203478165, "clip_ratio/high_mean": 0.001047114794346271, "clip_ratio/low_mean": 0.0011902262558578514, "clip_ratio/low_min": 4.6797200411674567e-05, "clip_ratio/region_mean": 0.0022373410320142284, "epoch": 0.040319899200252, "grad_norm": 12.68848991394043, "learning_rate": 1e-06, "loss": 0.0265, "step": 432 }, { "clip_ratio/high_max": 0.002404195533017628, "clip_ratio/high_mean": 0.001040061721141683, "clip_ratio/low_mean": 0.0012366080882202368, "clip_ratio/low_min": 8.876696938386885e-05, "clip_ratio/region_mean": 0.0022766698530176654, "epoch": 0.04041323230025258, "grad_norm": 0.11501103639602661, "learning_rate": 1e-06, "loss": 0.0577, "step": 433 }, { "clip_ratio/high_max": 0.002308251685462892, "clip_ratio/high_mean": 0.0009945334040821763, "clip_ratio/low_mean": 0.0012643223672057502, "clip_ratio/low_min": 9.360061358165694e-05, "clip_ratio/region_mean": 0.0022588557621929795, "epoch": 0.040506565400253165, "grad_norm": 0.11349420994520187, "learning_rate": 1e-06, "loss": 0.0275, "step": 434 }, { "clip_ratio/high_max": 0.002837083717167843, "clip_ratio/high_mean": 0.001185728506243322, "clip_ratio/low_mean": 0.0011361503638909198, "clip_ratio/low_min": 8.328311105287867e-05, "clip_ratio/region_mean": 0.00232187890651403, "epoch": 0.04059989850025375, "grad_norm": 0.1295388787984848, "learning_rate": 1e-06, "loss": -0.0434, "step": 435 }, { "clip_ratio/high_max": 0.002334456396056339, "clip_ratio/high_mean": 0.0009760251523402985, "clip_ratio/low_mean": 0.001292510401981417, "clip_ratio/low_min": 0.00011655015623546205, "clip_ratio/region_mean": 0.0022685355215799063, "epoch": 0.040693231600254336, "grad_norm": 0.12310700118541718, "learning_rate": 1e-06, "loss": 0.0508, "step": 436 }, { "clip_ratio/high_max": 0.0022471420052170288, "clip_ratio/high_mean": 0.0009750290591910016, "clip_ratio/low_mean": 0.0013760944257228402, "clip_ratio/low_min": 9.890980982163455e-05, "clip_ratio/region_mean": 0.0023511234976467676, "epoch": 0.040786564700254914, "grad_norm": 0.12615050375461578, "learning_rate": 1e-06, "loss": 0.0437, "step": 437 }, { "clip_ratio/high_max": 0.0026431280675751623, "clip_ratio/high_mean": 0.0010137643012058106, "clip_ratio/low_mean": 0.0011139116850245045, "clip_ratio/low_min": 7.798544083925663e-05, "clip_ratio/region_mean": 0.0021276760307955556, "epoch": 0.0408798978002555, "grad_norm": 0.11678317934274673, "learning_rate": 1e-06, "loss": -0.0252, "step": 438 }, { "clip_ratio/high_max": 0.0023802153300493956, "clip_ratio/high_mean": 0.0009454761584493099, "clip_ratio/low_mean": 0.00115106920566177, "clip_ratio/low_min": 0.00011781001921917778, "clip_ratio/region_mean": 0.0020965453441021964, "epoch": 0.040973230900256084, "grad_norm": 0.1235341802239418, "learning_rate": 1e-06, "loss": -0.0053, "step": 439 }, { "clip_ratio/high_max": 0.0020673832041211426, "clip_ratio/high_mean": 0.0009385839366586879, "clip_ratio/low_mean": 0.0011467592667031568, "clip_ratio/low_min": 8.075160440057516e-05, "clip_ratio/region_mean": 0.0020853432433796115, "epoch": 0.04106656400025666, "grad_norm": 0.10878008604049683, "learning_rate": 1e-06, "loss": 0.0487, "step": 440 }, { "clip_ratio/high_max": 0.0021675955067621544, "clip_ratio/high_mean": 0.0009030082328536082, "clip_ratio/low_mean": 0.001254392587725306, "clip_ratio/low_min": 0.0001369479005006724, "clip_ratio/region_mean": 0.0021574007623712532, "epoch": 0.04115989710025725, "grad_norm": 0.10853538662195206, "learning_rate": 1e-06, "loss": 0.0433, "step": 441 }, { "clip_ratio/high_max": 0.0025476321825408377, "clip_ratio/high_mean": 0.0010242823518638033, "clip_ratio/low_mean": 0.0014408091028599301, "clip_ratio/low_min": 0.0001836762257880764, "clip_ratio/region_mean": 0.0024650913910591044, "epoch": 0.04125323020025783, "grad_norm": 0.12676697969436646, "learning_rate": 1e-06, "loss": 0.043, "step": 442 }, { "clip_ratio/high_max": 0.002140582466381602, "clip_ratio/high_mean": 0.0008623576413810952, "clip_ratio/low_mean": 0.0011736539308913052, "clip_ratio/low_min": 7.872206879255828e-05, "clip_ratio/region_mean": 0.002036011530435644, "epoch": 0.04134656330025842, "grad_norm": 0.11287907510995865, "learning_rate": 1e-06, "loss": 0.0266, "step": 443 }, { "clip_ratio/high_max": 0.002424163936666446, "clip_ratio/high_mean": 0.0010643073601386277, "clip_ratio/low_mean": 0.0010743604125309503, "clip_ratio/low_min": 4.598328723659506e-05, "clip_ratio/region_mean": 0.002138667790859472, "epoch": 0.041439896400258996, "grad_norm": 0.13857538998126984, "learning_rate": 1e-06, "loss": 0.001, "step": 444 }, { "clip_ratio/high_max": 0.002274577105708886, "clip_ratio/high_mean": 0.0009725368345243623, "clip_ratio/low_mean": 0.0010961568277707556, "clip_ratio/low_min": 0.000193777819731622, "clip_ratio/region_mean": 0.002068693662295118, "epoch": 0.04153322950025958, "grad_norm": 0.10923687368631363, "learning_rate": 1e-06, "loss": -0.002, "step": 445 }, { "clip_ratio/high_max": 0.002088048840960255, "clip_ratio/high_mean": 0.0009893544010992628, "clip_ratio/low_mean": 0.0010823304044151882, "clip_ratio/low_min": 6.492273860203568e-05, "clip_ratio/region_mean": 0.0020716848084703088, "epoch": 0.04162656260026017, "grad_norm": 0.13394789397716522, "learning_rate": 1e-06, "loss": -0.0257, "step": 446 }, { "clip_ratio/high_max": 0.002701765079109464, "clip_ratio/high_mean": 0.0010838742018677294, "clip_ratio/low_mean": 0.0011468359552964102, "clip_ratio/low_min": 0.0001046391635100008, "clip_ratio/region_mean": 0.0022307101025944576, "epoch": 0.04171989570026075, "grad_norm": 0.12949150800704956, "learning_rate": 1e-06, "loss": -0.0451, "step": 447 }, { "clip_ratio/high_max": 0.0021030796342529356, "clip_ratio/high_mean": 0.0008702351115061902, "clip_ratio/low_mean": 0.001184272587124724, "clip_ratio/low_min": 0.00022214568161871284, "clip_ratio/region_mean": 0.0020545077277347445, "epoch": 0.04181322880026133, "grad_norm": 0.11956897377967834, "learning_rate": 1e-06, "loss": 0.0506, "step": 448 }, { "clip_ratio/high_max": 0.0025427526052226312, "clip_ratio/high_mean": 0.0010415463693789206, "clip_ratio/low_mean": 0.001196772966068238, "clip_ratio/low_min": 8.376908954232931e-05, "clip_ratio/region_mean": 0.0022383193572750315, "epoch": 0.041906561900261916, "grad_norm": 0.11449374258518219, "learning_rate": 1e-06, "loss": 0.0032, "step": 449 }, { "clip_ratio/high_max": 0.0022070157356210984, "clip_ratio/high_mean": 0.0008446124647889519, "clip_ratio/low_mean": 0.0012799747637473047, "clip_ratio/low_min": 6.412282345991116e-05, "clip_ratio/region_mean": 0.0021245872412691824, "epoch": 0.0419998950002625, "grad_norm": 0.12278907746076584, "learning_rate": 1e-06, "loss": 0.0502, "step": 450 }, { "clip_ratio/high_max": 0.002613900818687398, "clip_ratio/high_mean": 0.0011630403314484283, "clip_ratio/low_mean": 0.0012179805653431686, "clip_ratio/low_min": 0.0001019046194414841, "clip_ratio/region_mean": 0.0023810209168004803, "epoch": 0.042093228100263086, "grad_norm": 0.2274618148803711, "learning_rate": 1e-06, "loss": 0.0062, "step": 451 }, { "clip_ratio/high_max": 0.0020158883817202877, "clip_ratio/high_mean": 0.0008125750391627662, "clip_ratio/low_mean": 0.0015901314181974158, "clip_ratio/low_min": 0.0001677200671110768, "clip_ratio/region_mean": 0.002402706471912097, "epoch": 0.042186561200263664, "grad_norm": 0.11661520600318909, "learning_rate": 1e-06, "loss": 0.0571, "step": 452 }, { "clip_ratio/high_max": 0.0018951612437376752, "clip_ratio/high_mean": 0.0008031379347812617, "clip_ratio/low_mean": 0.00128961973132391, "clip_ratio/low_min": 0.00017866119196696673, "clip_ratio/region_mean": 0.002092757618811447, "epoch": 0.04227989430026425, "grad_norm": 0.11419399082660675, "learning_rate": 1e-06, "loss": 0.0527, "step": 453 }, { "clip_ratio/high_max": 0.0019802157912636176, "clip_ratio/high_mean": 0.0007942871070554247, "clip_ratio/low_mean": 0.0010927617076958995, "clip_ratio/low_min": 6.191489092088887e-05, "clip_ratio/region_mean": 0.0018870487765525468, "epoch": 0.042373227400264835, "grad_norm": 0.11077532172203064, "learning_rate": 1e-06, "loss": 0.0433, "step": 454 }, { "clip_ratio/high_max": 0.002080933227261994, "clip_ratio/high_mean": 0.0008692070241522742, "clip_ratio/low_mean": 0.0013762066446361132, "clip_ratio/low_min": 0.0002110941550199641, "clip_ratio/region_mean": 0.0022454136924352497, "epoch": 0.04246656050026541, "grad_norm": 0.11451329290866852, "learning_rate": 1e-06, "loss": 0.0698, "step": 455 }, { "clip_ratio/high_max": 0.0028301770871621557, "clip_ratio/high_mean": 0.0011058487980335485, "clip_ratio/low_mean": 0.001249912013008725, "clip_ratio/low_min": 0.00019412276742514223, "clip_ratio/region_mean": 0.002355760763748549, "epoch": 0.042559893600266, "grad_norm": 0.12839896976947784, "learning_rate": 1e-06, "loss": -0.0184, "step": 456 }, { "clip_ratio/high_max": 0.0021596031438093632, "clip_ratio/high_mean": 0.0009629893083911156, "clip_ratio/low_mean": 0.0011259939365118043, "clip_ratio/low_min": 9.444024453841848e-05, "clip_ratio/region_mean": 0.0020889832885586657, "epoch": 0.04265322670026658, "grad_norm": 0.2612585127353668, "learning_rate": 1e-06, "loss": 0.0138, "step": 457 }, { "clip_ratio/high_max": 0.002343076281249523, "clip_ratio/high_mean": 0.001028607028274564, "clip_ratio/low_mean": 0.0012364083959255368, "clip_ratio/low_min": 0.00013410425890469924, "clip_ratio/region_mean": 0.002265015384182334, "epoch": 0.04274655980026717, "grad_norm": 0.12061664462089539, "learning_rate": 1e-06, "loss": 0.0146, "step": 458 }, { "clip_ratio/high_max": 0.002508892248442862, "clip_ratio/high_mean": 0.001076467382517876, "clip_ratio/low_mean": 0.0012913583013869356, "clip_ratio/low_min": 7.221037594717927e-05, "clip_ratio/region_mean": 0.002367825662076939, "epoch": 0.04283989290026775, "grad_norm": 0.1079101711511612, "learning_rate": 1e-06, "loss": 0.0137, "step": 459 }, { "clip_ratio/high_max": 0.0024721603622310795, "clip_ratio/high_mean": 0.000982174722594209, "clip_ratio/low_mean": 0.0012021498550893739, "clip_ratio/low_min": 4.907491256744834e-05, "clip_ratio/region_mean": 0.002184324592235498, "epoch": 0.04293322600026833, "grad_norm": 0.11388076841831207, "learning_rate": 1e-06, "loss": 0.0248, "step": 460 }, { "clip_ratio/high_max": 0.0021407990498119034, "clip_ratio/high_mean": 0.0008957698009908199, "clip_ratio/low_mean": 0.0012666996371990535, "clip_ratio/low_min": 0.00021342346644814825, "clip_ratio/region_mean": 0.002162469369068276, "epoch": 0.04302655910026892, "grad_norm": 0.10794167965650558, "learning_rate": 1e-06, "loss": 0.0517, "step": 461 }, { "clip_ratio/high_max": 0.0022121062866062857, "clip_ratio/high_mean": 0.00101082428227528, "clip_ratio/low_mean": 0.0013233691024652217, "clip_ratio/low_min": 0.00013778938136965735, "clip_ratio/region_mean": 0.0023341934065683745, "epoch": 0.0431198922002695, "grad_norm": 0.12797607481479645, "learning_rate": 1e-06, "loss": 0.0713, "step": 462 }, { "clip_ratio/high_max": 0.002486055782355834, "clip_ratio/high_mean": 0.0010715318439906696, "clip_ratio/low_mean": 0.0009811017098400043, "clip_ratio/low_min": 8.93899659786257e-05, "clip_ratio/region_mean": 0.002052633608400356, "epoch": 0.04321322530027008, "grad_norm": 0.11894649267196655, "learning_rate": 1e-06, "loss": 0.0126, "step": 463 }, { "clip_ratio/high_max": 0.0019702802383108065, "clip_ratio/high_mean": 0.0009924004334607162, "clip_ratio/low_mean": 0.001027456475640065, "clip_ratio/low_min": 9.611607947590528e-05, "clip_ratio/region_mean": 0.0020198568963678554, "epoch": 0.043306558400270666, "grad_norm": 0.12023472040891647, "learning_rate": 1e-06, "loss": -0.0043, "step": 464 }, { "clip_ratio/high_max": 0.002276383886055555, "clip_ratio/high_mean": 0.0009723977236717474, "clip_ratio/low_mean": 0.0012431670438672882, "clip_ratio/low_min": 0.00010941943310172064, "clip_ratio/region_mean": 0.002215564774814993, "epoch": 0.04339989150027125, "grad_norm": 0.11027390509843826, "learning_rate": 1e-06, "loss": 0.0166, "step": 465 }, { "clip_ratio/high_max": 0.00218901781772729, "clip_ratio/high_mean": 0.0009813891720114043, "clip_ratio/low_mean": 0.0011943020799662918, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0021756912537966855, "epoch": 0.04349322460027183, "grad_norm": 0.12403442710638046, "learning_rate": 1e-06, "loss": -0.0306, "step": 466 }, { "clip_ratio/high_max": 0.0026907776336884126, "clip_ratio/high_mean": 0.001170337083749473, "clip_ratio/low_mean": 0.0011795625832746737, "clip_ratio/low_min": 7.412663671857445e-05, "clip_ratio/region_mean": 0.0023498997106798925, "epoch": 0.043586557700272414, "grad_norm": 0.23572413623332977, "learning_rate": 1e-06, "loss": -0.0222, "step": 467 }, { "clip_ratio/high_max": 0.0020260073797544464, "clip_ratio/high_mean": 0.000941921076446306, "clip_ratio/low_mean": 0.0010628185209498042, "clip_ratio/low_min": 0.00017466972349211574, "clip_ratio/region_mean": 0.0020047395591973327, "epoch": 0.043679890800273, "grad_norm": 0.11597782373428345, "learning_rate": 1e-06, "loss": 0.075, "step": 468 }, { "clip_ratio/high_max": 0.0023676328928559087, "clip_ratio/high_mean": 0.0010208651328866836, "clip_ratio/low_mean": 0.0011701043204084272, "clip_ratio/low_min": 0.00025888476557156537, "clip_ratio/region_mean": 0.0021909694551141, "epoch": 0.043773223900273585, "grad_norm": 0.12010545283555984, "learning_rate": 1e-06, "loss": 0.0392, "step": 469 }, { "clip_ratio/high_max": 0.0024410178084508516, "clip_ratio/high_mean": 0.0010038916698249523, "clip_ratio/low_mean": 0.001297725255426485, "clip_ratio/low_min": 8.251314466178883e-05, "clip_ratio/region_mean": 0.0023016169216134585, "epoch": 0.04386655700027416, "grad_norm": 0.11060360819101334, "learning_rate": 1e-06, "loss": 0.056, "step": 470 }, { "clip_ratio/high_max": 0.0023246165656019002, "clip_ratio/high_mean": 0.0010427327506477013, "clip_ratio/low_mean": 0.0011812723314506002, "clip_ratio/low_min": 0.00010678233047656249, "clip_ratio/region_mean": 0.0022240050457185134, "epoch": 0.04395989010027475, "grad_norm": 0.13037686049938202, "learning_rate": 1e-06, "loss": 0.0097, "step": 471 }, { "clip_ratio/high_max": 0.0019807315911748447, "clip_ratio/high_mean": 0.000886119763890747, "clip_ratio/low_mean": 0.0011592583723540884, "clip_ratio/low_min": 0.000134061661810847, "clip_ratio/region_mean": 0.0020453781107789837, "epoch": 0.044053223200275334, "grad_norm": 0.11048023402690887, "learning_rate": 1e-06, "loss": 0.0296, "step": 472 }, { "clip_ratio/high_max": 0.0021982141843182035, "clip_ratio/high_mean": 0.0008661290266900323, "clip_ratio/low_mean": 0.0012787375053449068, "clip_ratio/low_min": 8.145925312419422e-05, "clip_ratio/region_mean": 0.0021448665211210027, "epoch": 0.04414655630027592, "grad_norm": 0.12781262397766113, "learning_rate": 1e-06, "loss": 0.0295, "step": 473 }, { "clip_ratio/high_max": 0.0025377973179274704, "clip_ratio/high_mean": 0.0010488300358701963, "clip_ratio/low_mean": 0.0012911906815133989, "clip_ratio/low_min": 0.00013619333185488358, "clip_ratio/region_mean": 0.0023400206773658283, "epoch": 0.0442398894002765, "grad_norm": 0.11543948948383331, "learning_rate": 1e-06, "loss": 0.0471, "step": 474 }, { "clip_ratio/high_max": 0.002485527344106231, "clip_ratio/high_mean": 0.000976199104115949, "clip_ratio/low_mean": 0.0011549007213034201, "clip_ratio/low_min": 7.295667455764487e-05, "clip_ratio/region_mean": 0.0021310998490662314, "epoch": 0.04433322250027708, "grad_norm": 0.10828111320734024, "learning_rate": 1e-06, "loss": 0.0199, "step": 475 }, { "clip_ratio/high_max": 0.0022357157067744993, "clip_ratio/high_mean": 0.0009449659264646471, "clip_ratio/low_mean": 0.0012177722055639606, "clip_ratio/low_min": 7.049992518659565e-05, "clip_ratio/region_mean": 0.002162738106562756, "epoch": 0.04442655560027767, "grad_norm": 0.1449502855539322, "learning_rate": 1e-06, "loss": 0.0198, "step": 476 }, { "clip_ratio/high_max": 0.0023715003990218975, "clip_ratio/high_mean": 0.0009669085684436141, "clip_ratio/low_mean": 0.0009724647843540879, "clip_ratio/low_min": 8.651555435790215e-05, "clip_ratio/region_mean": 0.0019393733091419563, "epoch": 0.04451988870027825, "grad_norm": 0.10657148063182831, "learning_rate": 1e-06, "loss": 0.0291, "step": 477 }, { "clip_ratio/high_max": 0.002561667024565395, "clip_ratio/high_mean": 0.0010363210276409518, "clip_ratio/low_mean": 0.0009192878987960285, "clip_ratio/low_min": 7.535339773312444e-05, "clip_ratio/region_mean": 0.001955608939169906, "epoch": 0.04461322180027883, "grad_norm": 0.11485034227371216, "learning_rate": 1e-06, "loss": -0.0229, "step": 478 }, { "clip_ratio/high_max": 0.0024154626407835167, "clip_ratio/high_mean": 0.001086388569092378, "clip_ratio/low_mean": 0.0012085107864550082, "clip_ratio/low_min": 0.00012855926979682408, "clip_ratio/region_mean": 0.00229489931371063, "epoch": 0.044706554900279416, "grad_norm": 0.13590556383132935, "learning_rate": 1e-06, "loss": 0.0149, "step": 479 }, { "clip_ratio/high_max": 0.0021419750337372534, "clip_ratio/high_mean": 0.0009179272092296742, "clip_ratio/low_mean": 0.001058829093381064, "clip_ratio/low_min": 0.0002057566998701077, "clip_ratio/region_mean": 0.001976756306248717, "epoch": 0.04479988800028, "grad_norm": 0.11014196276664734, "learning_rate": 1e-06, "loss": 0.0458, "step": 480 }, { "clip_ratio/high_max": 0.0068283693108242005, "clip_ratio/high_mean": 0.003388633020222187, "clip_ratio/low_mean": 0.0025814563559833914, "clip_ratio/low_min": 0.00017360274796374142, "clip_ratio/region_mean": 0.005970089347101748, "completions/clipped_ratio": 0.013846261160714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 4092.0, "completions/mean_length": 612.9391479492188, "completions/mean_terminated_length": 564.0345458984375, "completions/min_length": 3.0, "completions/min_terminated_length": 3.0, "epoch": 0.04489322110028058, "grad_norm": 0.4843619465827942, "learning_rate": 1e-06, "loss": 0.001, "num_tokens": 407094918.0, "reward": 0.5883702039718628, "reward_std": 0.19560304284095764, "rewards/simpleverify_reward/mean": 0.5883702039718628, "rewards/simpleverify_reward/std": 0.4921310544013977, "step": 481 }, { "clip_ratio/high_max": 0.0033458515972597525, "clip_ratio/high_mean": 0.0013139616130501963, "clip_ratio/low_mean": 0.0015563023334834725, "clip_ratio/low_min": 5.296991093928227e-05, "clip_ratio/region_mean": 0.0028702639101538807, "epoch": 0.044986554200281165, "grad_norm": 2527.047607421875, "learning_rate": 1e-06, "loss": 1.4222, "step": 482 }, { "clip_ratio/high_max": 0.0027680123457685113, "clip_ratio/high_mean": 0.0011017267388524488, "clip_ratio/low_mean": 0.0016614682244835421, "clip_ratio/low_min": 0.00014853656830382533, "clip_ratio/region_mean": 0.0027631948614725843, "epoch": 0.04507988730028175, "grad_norm": 510.69989013671875, "learning_rate": 1e-06, "loss": 0.6028, "step": 483 }, { "clip_ratio/high_max": 0.0031768308181199245, "clip_ratio/high_mean": 0.0011142092862428399, "clip_ratio/low_mean": 0.0014178931924107019, "clip_ratio/low_min": 0.00019267274910816923, "clip_ratio/region_mean": 0.002532102502300404, "epoch": 0.045173220400282335, "grad_norm": 0.23238655924797058, "learning_rate": 1e-06, "loss": 0.0192, "step": 484 }, { "clip_ratio/high_max": 0.002577004437625874, "clip_ratio/high_mean": 0.0011110767445643432, "clip_ratio/low_mean": 0.0013797566316497978, "clip_ratio/low_min": 0.00021669562556780875, "clip_ratio/region_mean": 0.0024908332779887132, "epoch": 0.04526655350028291, "grad_norm": 0.10628186166286469, "learning_rate": 1e-06, "loss": 0.0635, "step": 485 }, { "clip_ratio/high_max": 0.0025845861964626238, "clip_ratio/high_mean": 0.0010997064418916125, "clip_ratio/low_mean": 0.0016606345343461726, "clip_ratio/low_min": 0.00026107554003829136, "clip_ratio/region_mean": 0.002760341048997361, "epoch": 0.0453598866002835, "grad_norm": 0.8131551146507263, "learning_rate": 1e-06, "loss": 0.0532, "step": 486 }, { "clip_ratio/high_max": 0.0024196689555537887, "clip_ratio/high_mean": 0.0010397553087386768, "clip_ratio/low_mean": 0.0012581970731844194, "clip_ratio/low_min": 0.00016405338919867063, "clip_ratio/region_mean": 0.0022979523564572446, "epoch": 0.045453219700284084, "grad_norm": 0.23600368201732635, "learning_rate": 1e-06, "loss": 0.0385, "step": 487 }, { "clip_ratio/high_max": 0.002515620188205503, "clip_ratio/high_mean": 0.000991558645182522, "clip_ratio/low_mean": 0.0012843496406276245, "clip_ratio/low_min": 6.644496625085594e-05, "clip_ratio/region_mean": 0.0022759083367418498, "epoch": 0.04554655280028467, "grad_norm": 0.1144188642501831, "learning_rate": 1e-06, "loss": 0.0411, "step": 488 }, { "clip_ratio/high_max": 0.0024526447887183167, "clip_ratio/high_mean": 0.0010472735157236457, "clip_ratio/low_mean": 0.0012560411632875912, "clip_ratio/low_min": 4.429894033819437e-05, "clip_ratio/region_mean": 0.002303314679011237, "epoch": 0.04563988590028525, "grad_norm": 0.11381571739912033, "learning_rate": 1e-06, "loss": 0.0248, "step": 489 }, { "clip_ratio/high_max": 0.0026885835322900675, "clip_ratio/high_mean": 0.0009901262747007422, "clip_ratio/low_mean": 0.0013964284589746967, "clip_ratio/low_min": 0.00018861614807974547, "clip_ratio/region_mean": 0.0023865547045716085, "epoch": 0.04573321900028583, "grad_norm": 0.11780530214309692, "learning_rate": 1e-06, "loss": 0.0414, "step": 490 }, { "clip_ratio/high_max": 0.002672574919415638, "clip_ratio/high_mean": 0.0010306015974492766, "clip_ratio/low_mean": 0.0012131199691793881, "clip_ratio/low_min": 0.00020800485071958974, "clip_ratio/region_mean": 0.002243721595732495, "epoch": 0.04582655210028642, "grad_norm": 0.11309982091188431, "learning_rate": 1e-06, "loss": 0.0027, "step": 491 }, { "clip_ratio/high_max": 0.0028789570496883243, "clip_ratio/high_mean": 0.001139427004090976, "clip_ratio/low_mean": 0.0011789236341428477, "clip_ratio/low_min": 0.00014193737115419935, "clip_ratio/region_mean": 0.002318350634595845, "epoch": 0.045919885200286996, "grad_norm": 0.114866241812706, "learning_rate": 1e-06, "loss": 0.0027, "step": 492 }, { "clip_ratio/high_max": 0.0023531435454060556, "clip_ratio/high_mean": 0.0010862128874578048, "clip_ratio/low_mean": 0.0010686217556212796, "clip_ratio/low_min": 5.942418283666484e-05, "clip_ratio/region_mean": 0.002154834663087968, "epoch": 0.04601321830028758, "grad_norm": 0.10684971511363983, "learning_rate": 1e-06, "loss": 0.0099, "step": 493 }, { "clip_ratio/high_max": 0.002385315779974917, "clip_ratio/high_mean": 0.0010112362306244904, "clip_ratio/low_mean": 0.0012513366837083595, "clip_ratio/low_min": 0.00010183455015067011, "clip_ratio/region_mean": 0.002262572896142956, "epoch": 0.046106551400288166, "grad_norm": 118.60794830322266, "learning_rate": 1e-06, "loss": 0.0764, "step": 494 }, { "clip_ratio/high_max": 0.002378626901190728, "clip_ratio/high_mean": 0.0010235946356260683, "clip_ratio/low_mean": 0.0013515273640223313, "clip_ratio/low_min": 0.00021047444624855416, "clip_ratio/region_mean": 0.002375121955992654, "epoch": 0.04619988450028875, "grad_norm": 0.1452789604663849, "learning_rate": 1e-06, "loss": 0.0794, "step": 495 }, { "clip_ratio/high_max": 0.0028749601187882945, "clip_ratio/high_mean": 0.0012039989815093577, "clip_ratio/low_mean": 0.0013720227125304518, "clip_ratio/low_min": 8.736413110455032e-05, "clip_ratio/region_mean": 0.0025760217758943327, "epoch": 0.04629321760028933, "grad_norm": 0.17568452656269073, "learning_rate": 1e-06, "loss": -0.0116, "step": 496 }, { "clip_ratio/high_max": 0.0026140173504245467, "clip_ratio/high_mean": 0.0010809658142534317, "clip_ratio/low_mean": 0.0011473041122371797, "clip_ratio/low_min": 4.950197217112873e-05, "clip_ratio/region_mean": 0.0022282698628259823, "epoch": 0.046386550700289915, "grad_norm": 0.11483661830425262, "learning_rate": 1e-06, "loss": -0.0022, "step": 497 }, { "clip_ratio/high_max": 0.0028459546956582926, "clip_ratio/high_mean": 0.0012019352398056071, "clip_ratio/low_mean": 0.0010250484356220113, "clip_ratio/low_min": 8.880574750946835e-05, "clip_ratio/region_mean": 0.0022269836408668198, "epoch": 0.0464798838002905, "grad_norm": 0.11611884087324142, "learning_rate": 1e-06, "loss": -0.0317, "step": 498 }, { "clip_ratio/high_max": 0.002179981798690278, "clip_ratio/high_mean": 0.0009535617282381281, "clip_ratio/low_mean": 0.0010598306198517093, "clip_ratio/low_min": 7.751261091470951e-05, "clip_ratio/region_mean": 0.0020133923389948905, "epoch": 0.046573216900291085, "grad_norm": 0.11167261004447937, "learning_rate": 1e-06, "loss": 0.0363, "step": 499 }, { "clip_ratio/high_max": 0.002407583175227046, "clip_ratio/high_mean": 0.0010839545211638324, "clip_ratio/low_mean": 0.0012218191659485456, "clip_ratio/low_min": 8.460964272671845e-05, "clip_ratio/region_mean": 0.0023057736252667382, "epoch": 0.046666550000291664, "grad_norm": 0.11501043289899826, "learning_rate": 1e-06, "loss": 0.0172, "step": 500 }, { "clip_ratio/high_max": 0.0020014568872284144, "clip_ratio/high_mean": 0.0009790081167011522, "clip_ratio/low_mean": 0.0011973440450674389, "clip_ratio/low_min": 0.0001430540214641951, "clip_ratio/region_mean": 0.0021763521581306122, "epoch": 0.04675988310029225, "grad_norm": 0.12052631378173828, "learning_rate": 1e-06, "loss": 0.0284, "step": 501 }, { "clip_ratio/high_max": 0.0025867209405987523, "clip_ratio/high_mean": 0.0011445445288700284, "clip_ratio/low_mean": 0.0013901802085456438, "clip_ratio/low_min": 8.16054980532499e-05, "clip_ratio/region_mean": 0.0025347247283207253, "epoch": 0.046853216200292834, "grad_norm": 4.328678131103516, "learning_rate": 1e-06, "loss": 0.0247, "step": 502 }, { "clip_ratio/high_max": 0.0025174898582918104, "clip_ratio/high_mean": 0.001032432317515486, "clip_ratio/low_mean": 0.0008984917385532754, "clip_ratio/low_min": 5.683271956513636e-05, "clip_ratio/region_mean": 0.0019309240306029096, "epoch": 0.04694654930029342, "grad_norm": 0.10489113628864288, "learning_rate": 1e-06, "loss": -0.0039, "step": 503 }, { "clip_ratio/high_max": 0.002338741433050018, "clip_ratio/high_mean": 0.0008725345323910005, "clip_ratio/low_mean": 0.0011274783901171759, "clip_ratio/low_min": 0.00018113549776899163, "clip_ratio/region_mean": 0.002000012915232219, "epoch": 0.047039882400294, "grad_norm": 0.13507111370563507, "learning_rate": 1e-06, "loss": 0.03, "step": 504 }, { "clip_ratio/high_max": 0.0021831087942700833, "clip_ratio/high_mean": 0.0008164533282979392, "clip_ratio/low_mean": 0.0011655713606160134, "clip_ratio/low_min": 8.811126099317335e-05, "clip_ratio/region_mean": 0.0019820246525341645, "epoch": 0.04713321550029458, "grad_norm": 0.10306805372238159, "learning_rate": 1e-06, "loss": 0.0535, "step": 505 }, { "clip_ratio/high_max": 0.002360138860240113, "clip_ratio/high_mean": 0.000960228875555913, "clip_ratio/low_mean": 0.0010687160483939806, "clip_ratio/low_min": 5.986460109852487e-05, "clip_ratio/region_mean": 0.002028944953053724, "epoch": 0.04722654860029517, "grad_norm": 0.10228810459375381, "learning_rate": 1e-06, "loss": -0.0224, "step": 506 }, { "clip_ratio/high_max": 0.002650081616593525, "clip_ratio/high_mean": 0.0011420666924095713, "clip_ratio/low_mean": 0.0011115521429019282, "clip_ratio/low_min": 7.888176514825318e-05, "clip_ratio/region_mean": 0.0022536188407684676, "epoch": 0.047319881700295746, "grad_norm": 0.12124821543693542, "learning_rate": 1e-06, "loss": 0.0159, "step": 507 }, { "clip_ratio/high_max": 0.0019330538270878606, "clip_ratio/high_mean": 0.0008142490733007435, "clip_ratio/low_mean": 0.0013981367701489944, "clip_ratio/low_min": 5.3297328122425824e-05, "clip_ratio/region_mean": 0.002212385821621865, "epoch": 0.04741321480029633, "grad_norm": 1.066062569618225, "learning_rate": 1e-06, "loss": 0.0949, "step": 508 }, { "clip_ratio/high_max": 0.002206900480814511, "clip_ratio/high_mean": 0.0009112868629017612, "clip_ratio/low_mean": 0.0013230906952230725, "clip_ratio/low_min": 0.00019779235844907816, "clip_ratio/region_mean": 0.002234377578133717, "epoch": 0.04750654790029692, "grad_norm": 0.130077064037323, "learning_rate": 1e-06, "loss": 0.0827, "step": 509 }, { "clip_ratio/high_max": 0.002234059527836507, "clip_ratio/high_mean": 0.0009583986720826942, "clip_ratio/low_mean": 0.0011260432984272484, "clip_ratio/low_min": 3.3414446079405025e-05, "clip_ratio/region_mean": 0.002084441963233985, "epoch": 0.0475998810002975, "grad_norm": 0.10715559870004654, "learning_rate": 1e-06, "loss": 0.033, "step": 510 }, { "clip_ratio/high_max": 0.002303210807440337, "clip_ratio/high_mean": 0.000927604574826546, "clip_ratio/low_mean": 0.0012452247865439858, "clip_ratio/low_min": 0.00013492452580976533, "clip_ratio/region_mean": 0.002172829343180638, "epoch": 0.04769321410029808, "grad_norm": 0.11483106017112732, "learning_rate": 1e-06, "loss": 0.0212, "step": 511 }, { "clip_ratio/high_max": 0.0025578508502803743, "clip_ratio/high_mean": 0.0010219466203125194, "clip_ratio/low_mean": 0.0012588810750457924, "clip_ratio/low_min": 8.451135454379255e-05, "clip_ratio/region_mean": 0.0022808276626165025, "epoch": 0.047786547200298665, "grad_norm": 0.1137489378452301, "learning_rate": 1e-06, "loss": 0.0596, "step": 512 }, { "clip_ratio/high_max": 0.002436665912682656, "clip_ratio/high_mean": 0.0010937957777059637, "clip_ratio/low_mean": 0.0014357199070218485, "clip_ratio/low_min": 0.0001584622377777123, "clip_ratio/region_mean": 0.0025295156956417486, "epoch": 0.04787988030029925, "grad_norm": 0.12277857214212418, "learning_rate": 1e-06, "loss": 0.0135, "step": 513 }, { "clip_ratio/high_max": 0.0025481016127741896, "clip_ratio/high_mean": 0.0010744818464445416, "clip_ratio/low_mean": 0.001207023422466591, "clip_ratio/low_min": 1.9942564904340543e-05, "clip_ratio/region_mean": 0.0022815053089288995, "epoch": 0.047973213400299836, "grad_norm": 0.11549944430589676, "learning_rate": 1e-06, "loss": -0.0156, "step": 514 }, { "clip_ratio/high_max": 0.0024392907944275066, "clip_ratio/high_mean": 0.0010153075454581995, "clip_ratio/low_mean": 0.0011312407295918092, "clip_ratio/low_min": 0.00010762080364656867, "clip_ratio/region_mean": 0.0021465483514475636, "epoch": 0.048066546500300414, "grad_norm": 0.11370525509119034, "learning_rate": 1e-06, "loss": 0.0134, "step": 515 }, { "clip_ratio/high_max": 0.0022776698897359893, "clip_ratio/high_mean": 0.0009391976673214231, "clip_ratio/low_mean": 0.0009749795717652887, "clip_ratio/low_min": 8.09999655757565e-05, "clip_ratio/region_mean": 0.0019141772427246906, "epoch": 0.048159879600301, "grad_norm": 0.11053760349750519, "learning_rate": 1e-06, "loss": 0.0399, "step": 516 }, { "clip_ratio/high_max": 0.0022873763227835298, "clip_ratio/high_mean": 0.0008552894523745636, "clip_ratio/low_mean": 0.0012872849547420628, "clip_ratio/low_min": 0.00017003757238853723, "clip_ratio/region_mean": 0.00214257442712551, "epoch": 0.048253212700301584, "grad_norm": 0.11593953520059586, "learning_rate": 1e-06, "loss": 0.0414, "step": 517 }, { "clip_ratio/high_max": 0.0022689488650939893, "clip_ratio/high_mean": 0.0008752079093028442, "clip_ratio/low_mean": 0.0010455894152983092, "clip_ratio/low_min": 7.386402103293221e-05, "clip_ratio/region_mean": 0.0019207973018637858, "epoch": 0.04834654580030216, "grad_norm": 0.10284002870321274, "learning_rate": 1e-06, "loss": 0.0483, "step": 518 }, { "clip_ratio/high_max": 0.0022463795576186385, "clip_ratio/high_mean": 0.000922795285077882, "clip_ratio/low_mean": 0.0012132966548961122, "clip_ratio/low_min": 0.00017335863049083855, "clip_ratio/region_mean": 0.002136091956344899, "epoch": 0.04843987890030275, "grad_norm": 0.15327061712741852, "learning_rate": 1e-06, "loss": 0.0369, "step": 519 }, { "clip_ratio/high_max": 0.002545334617025219, "clip_ratio/high_mean": 0.001126107537857024, "clip_ratio/low_mean": 0.0012021581442240858, "clip_ratio/low_min": 6.505160035885638e-05, "clip_ratio/region_mean": 0.0023282656766241416, "epoch": 0.04853321200030333, "grad_norm": 0.133977010846138, "learning_rate": 1e-06, "loss": 0.0609, "step": 520 }, { "clip_ratio/high_max": 0.002465743396896869, "clip_ratio/high_mean": 0.0009910798908094876, "clip_ratio/low_mean": 0.0011964750337938312, "clip_ratio/low_min": 0.00012408259044605074, "clip_ratio/region_mean": 0.002187554935517255, "epoch": 0.04862654510030392, "grad_norm": 0.14651188254356384, "learning_rate": 1e-06, "loss": 0.0347, "step": 521 }, { "clip_ratio/high_max": 0.0022333893866743892, "clip_ratio/high_mean": 0.0009058175401150947, "clip_ratio/low_mean": 0.0012181241108919494, "clip_ratio/low_min": 6.566829506482463e-05, "clip_ratio/region_mean": 0.0021239416673779488, "epoch": 0.048719878200304496, "grad_norm": 0.11617682129144669, "learning_rate": 1e-06, "loss": 0.0752, "step": 522 }, { "clip_ratio/high_max": 0.0025374851829838008, "clip_ratio/high_mean": 0.001010436229989864, "clip_ratio/low_mean": 0.0013953620236861752, "clip_ratio/low_min": 8.938389146351255e-05, "clip_ratio/region_mean": 0.0024057982009253465, "epoch": 0.04881321130030508, "grad_norm": 0.14823086559772491, "learning_rate": 1e-06, "loss": 0.0201, "step": 523 }, { "clip_ratio/high_max": 0.0025780609212233685, "clip_ratio/high_mean": 0.0010427060842630453, "clip_ratio/low_mean": 0.0010623884554661345, "clip_ratio/low_min": 1.6473379218950868e-05, "clip_ratio/region_mean": 0.0021050945433671586, "epoch": 0.04890654440030567, "grad_norm": 0.11438409239053726, "learning_rate": 1e-06, "loss": 0.021, "step": 524 }, { "clip_ratio/high_max": 0.0020961795817129314, "clip_ratio/high_mean": 0.0008928020452003693, "clip_ratio/low_mean": 0.0010564436197455507, "clip_ratio/low_min": 1.8606728190206923e-05, "clip_ratio/region_mean": 0.0019492456849548034, "epoch": 0.04899987750030625, "grad_norm": 1.1766196489334106, "learning_rate": 1e-06, "loss": 0.0741, "step": 525 }, { "clip_ratio/high_max": 0.002311453288712073, "clip_ratio/high_mean": 0.00102697366673965, "clip_ratio/low_mean": 0.00102821009568288, "clip_ratio/low_min": 0.00010042948633781634, "clip_ratio/region_mean": 0.0020551837733364664, "epoch": 0.04909321060030683, "grad_norm": 0.12878233194351196, "learning_rate": 1e-06, "loss": 0.0103, "step": 526 }, { "clip_ratio/high_max": 0.002335183755349135, "clip_ratio/high_mean": 0.0009238782386091771, "clip_ratio/low_mean": 0.0010043288602901157, "clip_ratio/low_min": 0.00013413556189334486, "clip_ratio/region_mean": 0.0019282071370980702, "epoch": 0.049186543700307415, "grad_norm": 0.10969037562608719, "learning_rate": 1e-06, "loss": 0.034, "step": 527 }, { "clip_ratio/high_max": 0.0019988704880233854, "clip_ratio/high_mean": 0.0008094976255961228, "clip_ratio/low_mean": 0.0010959244318655692, "clip_ratio/low_min": 0.00018863374862121418, "clip_ratio/region_mean": 0.0019054220610996708, "epoch": 0.049279876800308, "grad_norm": 0.11354716867208481, "learning_rate": 1e-06, "loss": 0.074, "step": 528 }, { "clip_ratio/high_max": 0.002741457319643814, "clip_ratio/high_mean": 0.0010872539205593057, "clip_ratio/low_mean": 0.0010173525843129028, "clip_ratio/low_min": 2.862017117877258e-05, "clip_ratio/region_mean": 0.002104606493958272, "epoch": 0.049373209900308586, "grad_norm": 0.11658629029989243, "learning_rate": 1e-06, "loss": 0.0483, "step": 529 }, { "clip_ratio/high_max": 0.0021164071513339877, "clip_ratio/high_mean": 0.0009590825029590633, "clip_ratio/low_mean": 0.0013941892648290377, "clip_ratio/low_min": 0.00010167742766498122, "clip_ratio/region_mean": 0.002353271731408313, "epoch": 0.049466543000309164, "grad_norm": 0.1637180894613266, "learning_rate": 1e-06, "loss": 0.0355, "step": 530 }, { "clip_ratio/high_max": 0.0027464583254186437, "clip_ratio/high_mean": 0.0011867928405990824, "clip_ratio/low_mean": 0.0008808790043985937, "clip_ratio/low_min": 6.548280634888215e-05, "clip_ratio/region_mean": 0.002067671804979909, "epoch": 0.04955987610030975, "grad_norm": 0.11159728467464447, "learning_rate": 1e-06, "loss": -0.0175, "step": 531 }, { "clip_ratio/high_max": 0.002653493869729573, "clip_ratio/high_mean": 0.0011114626468042843, "clip_ratio/low_mean": 0.0010718940557126189, "clip_ratio/low_min": 9.245149885828141e-05, "clip_ratio/region_mean": 0.0021833567661815323, "epoch": 0.049653209200310335, "grad_norm": 0.12469938397407532, "learning_rate": 1e-06, "loss": 0.0376, "step": 532 }, { "clip_ratio/high_max": 0.0023215410656121094, "clip_ratio/high_mean": 0.0010041421992355026, "clip_ratio/low_mean": 0.0011800426000263542, "clip_ratio/low_min": 0.00010731277052400401, "clip_ratio/region_mean": 0.002184184799261857, "epoch": 0.04974654230031091, "grad_norm": 0.12142664194107056, "learning_rate": 1e-06, "loss": 0.0277, "step": 533 }, { "clip_ratio/high_max": 0.00217046843317803, "clip_ratio/high_mean": 0.0009255408476747107, "clip_ratio/low_mean": 0.001085233847334166, "clip_ratio/low_min": 9.088732167583657e-05, "clip_ratio/region_mean": 0.0020107746859139297, "epoch": 0.0498398754003115, "grad_norm": 0.11844430863857269, "learning_rate": 1e-06, "loss": 0.0502, "step": 534 }, { "clip_ratio/high_max": 0.002323992252058815, "clip_ratio/high_mean": 0.0009733416554809082, "clip_ratio/low_mean": 0.0011069518513977528, "clip_ratio/low_min": 0.00010025935262092389, "clip_ratio/region_mean": 0.0020802935177925974, "epoch": 0.04993320850031208, "grad_norm": 0.10840032994747162, "learning_rate": 1e-06, "loss": 0.0216, "step": 535 }, { "clip_ratio/high_max": 0.0024229628907050937, "clip_ratio/high_mean": 0.001014901803500834, "clip_ratio/low_mean": 0.001073445975634968, "clip_ratio/low_min": 5.582950143434573e-05, "clip_ratio/region_mean": 0.002088347806420643, "epoch": 0.05002654160031267, "grad_norm": 0.14263685047626495, "learning_rate": 1e-06, "loss": 0.0268, "step": 536 }, { "clip_ratio/high_max": 0.0021586509901680984, "clip_ratio/high_mean": 0.0009574469113431405, "clip_ratio/low_mean": 0.0010398111735412385, "clip_ratio/low_min": 0.00016133830831677187, "clip_ratio/region_mean": 0.0019972581139882095, "epoch": 0.05011987470031325, "grad_norm": 0.12401726841926575, "learning_rate": 1e-06, "loss": 0.0508, "step": 537 }, { "clip_ratio/high_max": 0.0022127531774458475, "clip_ratio/high_mean": 0.0008974480588221923, "clip_ratio/low_mean": 0.0009581594604242127, "clip_ratio/low_min": 8.482113571517402e-05, "clip_ratio/region_mean": 0.0018556075301603414, "epoch": 0.05021320780031383, "grad_norm": 0.3128924071788788, "learning_rate": 1e-06, "loss": 0.0327, "step": 538 }, { "clip_ratio/high_max": 0.0024360563547816128, "clip_ratio/high_mean": 0.0009754931506904541, "clip_ratio/low_mean": 0.0009930350197464577, "clip_ratio/low_min": 0.00011733457722584717, "clip_ratio/region_mean": 0.0019685281804413535, "epoch": 0.05030654090031442, "grad_norm": 0.10646910220384598, "learning_rate": 1e-06, "loss": 0.0257, "step": 539 }, { "clip_ratio/high_max": 0.0024534834374208003, "clip_ratio/high_mean": 0.0009934416721080197, "clip_ratio/low_mean": 0.0009415506701770937, "clip_ratio/low_min": 5.170057738723699e-05, "clip_ratio/region_mean": 0.0019349923240952194, "epoch": 0.050399874000315, "grad_norm": 0.10769926756620407, "learning_rate": 1e-06, "loss": 0.0162, "step": 540 }, { "clip_ratio/high_max": 0.002602410997496918, "clip_ratio/high_mean": 0.0010775359332910739, "clip_ratio/low_mean": 0.0010410390732431551, "clip_ratio/low_min": 2.5515987545077223e-05, "clip_ratio/region_mean": 0.0021185750229051337, "epoch": 0.05049320710031558, "grad_norm": 0.10725007951259613, "learning_rate": 1e-06, "loss": -0.0192, "step": 541 }, { "clip_ratio/high_max": 0.002776624110992998, "clip_ratio/high_mean": 0.0010355851754866308, "clip_ratio/low_mean": 0.0008953355554695008, "clip_ratio/low_min": 4.3283366721880157e-05, "clip_ratio/region_mean": 0.0019309207709738985, "epoch": 0.050586540200316166, "grad_norm": 0.10528475046157837, "learning_rate": 1e-06, "loss": 0.0094, "step": 542 }, { "clip_ratio/high_max": 0.0020246337408025283, "clip_ratio/high_mean": 0.0009088136594073148, "clip_ratio/low_mean": 0.0010445900516060647, "clip_ratio/low_min": 7.107205146894557e-05, "clip_ratio/region_mean": 0.001953403712832369, "epoch": 0.05067987330031675, "grad_norm": 0.10592474788427353, "learning_rate": 1e-06, "loss": 0.0339, "step": 543 }, { "clip_ratio/high_max": 0.003016086768184323, "clip_ratio/high_mean": 0.0011549005066626705, "clip_ratio/low_mean": 0.0009634954649300198, "clip_ratio/low_min": 5.9315631006029435e-05, "clip_ratio/region_mean": 0.0021183959834161215, "epoch": 0.05077320640031733, "grad_norm": 0.155888170003891, "learning_rate": 1e-06, "loss": 0.0132, "step": 544 }, { "clip_ratio/high_max": 0.0025852081816992722, "clip_ratio/high_mean": 0.0010890886405832134, "clip_ratio/low_mean": 0.001064599349774653, "clip_ratio/low_min": 0.0001754275444909581, "clip_ratio/region_mean": 0.002153688015823718, "epoch": 0.050866539500317914, "grad_norm": 0.18943233788013458, "learning_rate": 1e-06, "loss": -0.0119, "step": 545 }, { "clip_ratio/high_max": 0.0027793663684860803, "clip_ratio/high_mean": 0.0010125482913281303, "clip_ratio/low_mean": 0.0009451341775275068, "clip_ratio/low_min": 5.034581045038067e-05, "clip_ratio/region_mean": 0.001957682477950584, "epoch": 0.0509598726003185, "grad_norm": 0.1030670553445816, "learning_rate": 1e-06, "loss": -0.0069, "step": 546 }, { "clip_ratio/high_max": 0.002209324185969308, "clip_ratio/high_mean": 0.0008844848616718082, "clip_ratio/low_mean": 0.0009243311214959249, "clip_ratio/low_min": 0.00013581581879407167, "clip_ratio/region_mean": 0.0018088159995386377, "epoch": 0.051053205700319085, "grad_norm": 0.11897844821214676, "learning_rate": 1e-06, "loss": 0.0555, "step": 547 }, { "clip_ratio/high_max": 0.0026205383910564706, "clip_ratio/high_mean": 0.001054772234056145, "clip_ratio/low_mean": 0.0009358568895549979, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.00199062919273274, "epoch": 0.05114653880031966, "grad_norm": 0.10565313696861267, "learning_rate": 1e-06, "loss": 0.0193, "step": 548 }, { "clip_ratio/high_max": 0.0026611354187480174, "clip_ratio/high_mean": 0.0010324603535991628, "clip_ratio/low_mean": 0.0011964449749939376, "clip_ratio/low_min": 1.6225338185904548e-05, "clip_ratio/region_mean": 0.002228905344964005, "epoch": 0.05123987190032025, "grad_norm": 0.15604837238788605, "learning_rate": 1e-06, "loss": 0.0236, "step": 549 }, { "clip_ratio/high_max": 0.0024267802509712055, "clip_ratio/high_mean": 0.0009575654075888451, "clip_ratio/low_mean": 0.001118169617257081, "clip_ratio/low_min": 9.856144424702507e-05, "clip_ratio/region_mean": 0.0020757350139319897, "epoch": 0.05133320500032083, "grad_norm": 0.11913684010505676, "learning_rate": 1e-06, "loss": 0.0519, "step": 550 }, { "clip_ratio/high_max": 0.002002275021368405, "clip_ratio/high_mean": 0.0009510376985417679, "clip_ratio/low_mean": 0.0011226929418626241, "clip_ratio/low_min": 0.00013906175900046946, "clip_ratio/region_mean": 0.0020737306476803496, "epoch": 0.05142653810032142, "grad_norm": 0.10895012319087982, "learning_rate": 1e-06, "loss": 0.0018, "step": 551 }, { "clip_ratio/high_max": 0.002054436034086393, "clip_ratio/high_mean": 0.0009376745620102156, "clip_ratio/low_mean": 0.0008807534522929927, "clip_ratio/low_min": 5.2354688705236185e-05, "clip_ratio/region_mean": 0.0018184279761044309, "epoch": 0.051519871200322, "grad_norm": 0.09573733061552048, "learning_rate": 1e-06, "loss": 0.0026, "step": 552 }, { "clip_ratio/high_max": 0.002352635572606232, "clip_ratio/high_mean": 0.0009439997666049749, "clip_ratio/low_mean": 0.0012090514628653182, "clip_ratio/low_min": 9.540954215481179e-05, "clip_ratio/region_mean": 0.0021530512094614096, "epoch": 0.05161320430032258, "grad_norm": 0.10818588733673096, "learning_rate": 1e-06, "loss": 0.0319, "step": 553 }, { "clip_ratio/high_max": 0.0024579532255302183, "clip_ratio/high_mean": 0.0009594352577551035, "clip_ratio/low_mean": 0.001280119740840746, "clip_ratio/low_min": 0.00018664671642909525, "clip_ratio/region_mean": 0.0022395550622604787, "epoch": 0.05170653740032317, "grad_norm": 0.10303737968206406, "learning_rate": 1e-06, "loss": 0.0301, "step": 554 }, { "clip_ratio/high_max": 0.002039117993263062, "clip_ratio/high_mean": 0.0009296319258282892, "clip_ratio/low_mean": 0.0011010477355739567, "clip_ratio/low_min": 3.5991939512314275e-05, "clip_ratio/region_mean": 0.0020306796723161824, "epoch": 0.05179987050032375, "grad_norm": 0.11468939483165741, "learning_rate": 1e-06, "loss": 0.0401, "step": 555 }, { "clip_ratio/high_max": 0.0022879065145389177, "clip_ratio/high_mean": 0.0011766945845010923, "clip_ratio/low_mean": 0.0010166709653276484, "clip_ratio/low_min": 1.5038498531794176e-05, "clip_ratio/region_mean": 0.002193365537095815, "epoch": 0.05189320360032433, "grad_norm": 0.11673793941736221, "learning_rate": 1e-06, "loss": -0.0213, "step": 556 }, { "clip_ratio/high_max": 0.0020410397555679083, "clip_ratio/high_mean": 0.0009163688300759532, "clip_ratio/low_mean": 0.0012360066211840604, "clip_ratio/low_min": 0.0001504603392277204, "clip_ratio/region_mean": 0.002152375498553738, "epoch": 0.051986536700324916, "grad_norm": 0.12710396945476532, "learning_rate": 1e-06, "loss": 0.0465, "step": 557 }, { "clip_ratio/high_max": 0.0019881465050275438, "clip_ratio/high_mean": 0.0009186605966533534, "clip_ratio/low_mean": 0.0011601347614487167, "clip_ratio/low_min": 7.704248127993196e-05, "clip_ratio/region_mean": 0.0020787953180843033, "epoch": 0.0520798698003255, "grad_norm": 0.11551938205957413, "learning_rate": 1e-06, "loss": 0.0158, "step": 558 }, { "clip_ratio/high_max": 0.0026306369472877122, "clip_ratio/high_mean": 0.0010184266757278237, "clip_ratio/low_mean": 0.0011486162911751308, "clip_ratio/low_min": 0.00010442700386192882, "clip_ratio/region_mean": 0.002167042955989018, "epoch": 0.05217320290032608, "grad_norm": 0.11591550707817078, "learning_rate": 1e-06, "loss": -0.0262, "step": 559 }, { "clip_ratio/high_max": 0.0017481404975114856, "clip_ratio/high_mean": 0.0007484360558009939, "clip_ratio/low_mean": 0.001068901543476386, "clip_ratio/low_min": 6.654752178292256e-05, "clip_ratio/region_mean": 0.0018173376010963693, "epoch": 0.052266536000326665, "grad_norm": 0.10007625818252563, "learning_rate": 1e-06, "loss": 0.0402, "step": 560 }, { "clip_ratio/high_max": 0.002342764440982137, "clip_ratio/high_mean": 0.0009339088519482175, "clip_ratio/low_mean": 0.0011359662930772174, "clip_ratio/low_min": 8.061372591328109e-05, "clip_ratio/region_mean": 0.002069875132292509, "epoch": 0.05235986910032725, "grad_norm": 0.1115155890583992, "learning_rate": 1e-06, "loss": 0.0258, "step": 561 }, { "clip_ratio/high_max": 0.0020411468722159043, "clip_ratio/high_mean": 0.0008373003420274472, "clip_ratio/low_mean": 0.0010375615347584244, "clip_ratio/low_min": 7.315738275792683e-05, "clip_ratio/region_mean": 0.0018748619040707126, "epoch": 0.052453202200327835, "grad_norm": 0.10713379085063934, "learning_rate": 1e-06, "loss": 0.0213, "step": 562 }, { "clip_ratio/high_max": 0.00212736550020054, "clip_ratio/high_mean": 0.00101913820617483, "clip_ratio/low_mean": 0.0012943415604240727, "clip_ratio/low_min": 0.0002359375503147021, "clip_ratio/region_mean": 0.0023134798102546483, "epoch": 0.05254653530032841, "grad_norm": 0.12580713629722595, "learning_rate": 1e-06, "loss": 0.0124, "step": 563 }, { "clip_ratio/high_max": 0.0020882370918116067, "clip_ratio/high_mean": 0.0008644727167848032, "clip_ratio/low_mean": 0.001227024280524347, "clip_ratio/low_min": 0.00023539673202321865, "clip_ratio/region_mean": 0.0020914969936711714, "epoch": 0.052639868400329, "grad_norm": 0.10341587662696838, "learning_rate": 1e-06, "loss": 0.0437, "step": 564 }, { "clip_ratio/high_max": 0.002121015335433185, "clip_ratio/high_mean": 0.0008249619422713295, "clip_ratio/low_mean": 0.0011256293910264503, "clip_ratio/low_min": 0.00013979219147586264, "clip_ratio/region_mean": 0.0019505913078319281, "epoch": 0.052733201500329584, "grad_norm": 0.11013689637184143, "learning_rate": 1e-06, "loss": 0.0295, "step": 565 }, { "clip_ratio/high_max": 0.0020267815634724684, "clip_ratio/high_mean": 0.0007923935818325845, "clip_ratio/low_mean": 0.0012331218567851465, "clip_ratio/low_min": 0.00010452612241351744, "clip_ratio/region_mean": 0.002025515503191855, "epoch": 0.05282653460033017, "grad_norm": 0.11691796034574509, "learning_rate": 1e-06, "loss": 0.0838, "step": 566 }, { "clip_ratio/high_max": 0.0020507840235950425, "clip_ratio/high_mean": 0.0008618296560598537, "clip_ratio/low_mean": 0.001163931574410526, "clip_ratio/low_min": 8.01679425421753e-05, "clip_ratio/region_mean": 0.002025761190452613, "epoch": 0.05291986770033075, "grad_norm": 0.122233085334301, "learning_rate": 1e-06, "loss": 0.0378, "step": 567 }, { "clip_ratio/high_max": 0.002239193483546842, "clip_ratio/high_mean": 0.0010256996247335337, "clip_ratio/low_mean": 0.000930793705265387, "clip_ratio/low_min": 3.524883049976779e-05, "clip_ratio/region_mean": 0.001956493390025571, "epoch": 0.05301320080033133, "grad_norm": 0.20606251060962677, "learning_rate": 1e-06, "loss": -0.0168, "step": 568 }, { "clip_ratio/high_max": 0.002508563680748921, "clip_ratio/high_mean": 0.001143041499744868, "clip_ratio/low_mean": 0.001249512877620873, "clip_ratio/low_min": 0.0001102345504477853, "clip_ratio/region_mean": 0.002392554364632815, "epoch": 0.05310653390033192, "grad_norm": 0.178523451089859, "learning_rate": 1e-06, "loss": 0.0048, "step": 569 }, { "clip_ratio/high_max": 0.002086374668579083, "clip_ratio/high_mean": 0.0008527624759153696, "clip_ratio/low_mean": 0.0010658229275577469, "clip_ratio/low_min": 4.572267789626494e-05, "clip_ratio/region_mean": 0.0019185853816452436, "epoch": 0.053199867000332496, "grad_norm": 0.11680780351161957, "learning_rate": 1e-06, "loss": 0.0422, "step": 570 }, { "clip_ratio/high_max": 0.0019599174775066786, "clip_ratio/high_mean": 0.0008517913120158482, "clip_ratio/low_mean": 0.001141517590440344, "clip_ratio/low_min": 0.0001032355175993871, "clip_ratio/region_mean": 0.0019933089133701287, "epoch": 0.05329320010033308, "grad_norm": 0.11743775010108948, "learning_rate": 1e-06, "loss": 0.0336, "step": 571 }, { "clip_ratio/high_max": 0.002138647156243678, "clip_ratio/high_mean": 0.0009568691821186803, "clip_ratio/low_mean": 0.0009216963899234543, "clip_ratio/low_min": 9.939492974808672e-05, "clip_ratio/region_mean": 0.0018785655774991028, "epoch": 0.053386533200333666, "grad_norm": 0.11797573417425156, "learning_rate": 1e-06, "loss": 0.0052, "step": 572 }, { "clip_ratio/high_max": 0.002300238855241332, "clip_ratio/high_mean": 0.0009358835504826857, "clip_ratio/low_mean": 0.001090615618522861, "clip_ratio/low_min": 8.214059016609099e-05, "clip_ratio/region_mean": 0.0020264991908334196, "epoch": 0.05347986630033425, "grad_norm": 0.11401925981044769, "learning_rate": 1e-06, "loss": 0.052, "step": 573 }, { "clip_ratio/high_max": 0.00239048810544773, "clip_ratio/high_mean": 0.0009859963784037973, "clip_ratio/low_mean": 0.0009539177826809464, "clip_ratio/low_min": 0.0001569271735206712, "clip_ratio/region_mean": 0.0019399141383473761, "epoch": 0.05357319940033483, "grad_norm": 0.10502737015485764, "learning_rate": 1e-06, "loss": 0.0416, "step": 574 }, { "clip_ratio/high_max": 0.002273719474032987, "clip_ratio/high_mean": 0.0009481723755015992, "clip_ratio/low_mean": 0.0010330390650779009, "clip_ratio/low_min": 5.5017604609020054e-05, "clip_ratio/region_mean": 0.0019812115060631186, "epoch": 0.053666532500335415, "grad_norm": 0.10575493425130844, "learning_rate": 1e-06, "loss": -0.0016, "step": 575 }, { "clip_ratio/high_max": 0.0022129845892777666, "clip_ratio/high_mean": 0.0008744621991354506, "clip_ratio/low_mean": 0.0009994291212933604, "clip_ratio/low_min": 0.0001251541543751955, "clip_ratio/region_mean": 0.0018738912986009382, "epoch": 0.053759865600336, "grad_norm": 0.11958342790603638, "learning_rate": 1e-06, "loss": 0.0547, "step": 576 }, { "clip_ratio/high_max": 0.0022965703246882185, "clip_ratio/high_mean": 0.0009411160499439575, "clip_ratio/low_mean": 0.0010088508806802565, "clip_ratio/low_min": 7.881337842263747e-05, "clip_ratio/region_mean": 0.0019499669870128855, "epoch": 0.053853198700336585, "grad_norm": 0.09806469082832336, "learning_rate": 1e-06, "loss": 0.0414, "step": 577 }, { "clip_ratio/high_max": 0.0024612118941149674, "clip_ratio/high_mean": 0.0009899283722916152, "clip_ratio/low_mean": 0.0008869870707712835, "clip_ratio/low_min": 4.3028127947764006e-05, "clip_ratio/region_mean": 0.0018769154012261424, "epoch": 0.053946531800337164, "grad_norm": 0.17731398344039917, "learning_rate": 1e-06, "loss": -0.0012, "step": 578 }, { "clip_ratio/high_max": 0.002557455918577034, "clip_ratio/high_mean": 0.0010909680931945331, "clip_ratio/low_mean": 0.0019788719655480236, "clip_ratio/low_min": 7.460481720045209e-05, "clip_ratio/region_mean": 0.0030698400296387263, "epoch": 0.05403986490033775, "grad_norm": 0.12621797621250153, "learning_rate": 1e-06, "loss": -0.0047, "step": 579 }, { "clip_ratio/high_max": 0.002250625784654403, "clip_ratio/high_mean": 0.0009864630592346657, "clip_ratio/low_mean": 0.0008735170958971139, "clip_ratio/low_min": 7.316630035347771e-05, "clip_ratio/region_mean": 0.0018599801769596525, "epoch": 0.054133198000338334, "grad_norm": 0.10964135825634003, "learning_rate": 1e-06, "loss": -0.0006, "step": 580 }, { "clip_ratio/high_max": 0.0025588975331629626, "clip_ratio/high_mean": 0.0010408836969872937, "clip_ratio/low_mean": 0.000953475282585714, "clip_ratio/low_min": 0.00012291524853935698, "clip_ratio/region_mean": 0.0019943590014008805, "epoch": 0.05422653110033892, "grad_norm": 0.10963660478591919, "learning_rate": 1e-06, "loss": -0.0003, "step": 581 }, { "clip_ratio/high_max": 0.0020415589242475107, "clip_ratio/high_mean": 0.0009287152479373617, "clip_ratio/low_mean": 0.001003074358777667, "clip_ratio/low_min": 8.005333256733138e-05, "clip_ratio/region_mean": 0.0019317895894346293, "epoch": 0.0543198642003395, "grad_norm": 0.11718995869159698, "learning_rate": 1e-06, "loss": -0.0077, "step": 582 }, { "clip_ratio/high_max": 0.002175417546823155, "clip_ratio/high_mean": 0.0008287317687063478, "clip_ratio/low_mean": 0.001077540175174363, "clip_ratio/low_min": 8.137037821143167e-05, "clip_ratio/region_mean": 0.0019062719511566684, "epoch": 0.05441319730034008, "grad_norm": 0.129413440823555, "learning_rate": 1e-06, "loss": 0.0643, "step": 583 }, { "clip_ratio/high_max": 0.0024820500548230484, "clip_ratio/high_mean": 0.001035144417983247, "clip_ratio/low_mean": 0.0010529138853598852, "clip_ratio/low_min": 0.0001305453188251704, "clip_ratio/region_mean": 0.0020880583324469626, "epoch": 0.05450653040034067, "grad_norm": 31.309160232543945, "learning_rate": 1e-06, "loss": 0.0015, "step": 584 }, { "clip_ratio/high_max": 0.002341946015803842, "clip_ratio/high_mean": 0.0010095967209053924, "clip_ratio/low_mean": 0.0011283074018137995, "clip_ratio/low_min": 6.25489883532282e-05, "clip_ratio/region_mean": 0.0021379041063482873, "epoch": 0.054599863500341246, "grad_norm": 0.12081718444824219, "learning_rate": 1e-06, "loss": 0.0597, "step": 585 }, { "clip_ratio/high_max": 0.0021600510081043467, "clip_ratio/high_mean": 0.0008407682380493497, "clip_ratio/low_mean": 0.001240364301338559, "clip_ratio/low_min": 7.582831312902272e-05, "clip_ratio/region_mean": 0.0020811325157410465, "epoch": 0.05469319660034183, "grad_norm": 0.11999684572219849, "learning_rate": 1e-06, "loss": 0.0354, "step": 586 }, { "clip_ratio/high_max": 0.0020459194674913306, "clip_ratio/high_mean": 0.0007620115393365268, "clip_ratio/low_mean": 0.001060848338966025, "clip_ratio/low_min": 5.7424944316153415e-05, "clip_ratio/region_mean": 0.0018228598855785094, "epoch": 0.054786529700342416, "grad_norm": 0.11038127541542053, "learning_rate": 1e-06, "loss": 0.0467, "step": 587 }, { "clip_ratio/high_max": 0.0022186188616615254, "clip_ratio/high_mean": 0.0009010556677822024, "clip_ratio/low_mean": 0.0011774348458857276, "clip_ratio/low_min": 6.137622676760657e-05, "clip_ratio/region_mean": 0.0020784905209438875, "epoch": 0.054879862800343, "grad_norm": 0.10276598483324051, "learning_rate": 1e-06, "loss": 0.0256, "step": 588 }, { "clip_ratio/high_max": 0.0023173702938947827, "clip_ratio/high_mean": 0.0010650505046214676, "clip_ratio/low_mean": 0.0009969648326659808, "clip_ratio/low_min": 0.00018824595281330403, "clip_ratio/region_mean": 0.0020620153518393636, "epoch": 0.05497319590034358, "grad_norm": 0.10746926069259644, "learning_rate": 1e-06, "loss": -0.0238, "step": 589 }, { "clip_ratio/high_max": 0.0024066834012046456, "clip_ratio/high_mean": 0.0010481681747478433, "clip_ratio/low_mean": 0.0009437980788788991, "clip_ratio/low_min": 0.00013782995938527165, "clip_ratio/region_mean": 0.0019919662809115835, "epoch": 0.055066529000344165, "grad_norm": 0.10680969059467316, "learning_rate": 1e-06, "loss": -0.0009, "step": 590 }, { "clip_ratio/high_max": 0.002390223104157485, "clip_ratio/high_mean": 0.0009545165357849328, "clip_ratio/low_mean": 0.0011933237256016582, "clip_ratio/low_min": 0.00015012832500360673, "clip_ratio/region_mean": 0.0021478402995853685, "epoch": 0.05515986210034475, "grad_norm": 0.11171848326921463, "learning_rate": 1e-06, "loss": 0.0268, "step": 591 }, { "clip_ratio/high_max": 0.0018758612241072115, "clip_ratio/high_mean": 0.0008407090481341584, "clip_ratio/low_mean": 0.0010596802785585169, "clip_ratio/low_min": 6.277364354900783e-05, "clip_ratio/region_mean": 0.0019003893830813468, "epoch": 0.055253195200345336, "grad_norm": 0.11056873947381973, "learning_rate": 1e-06, "loss": 0.0344, "step": 592 }, { "clip_ratio/high_max": 0.0024359398375963792, "clip_ratio/high_mean": 0.0009761374622030417, "clip_ratio/low_mean": 0.0009749716155056376, "clip_ratio/low_min": 0.00014220718639990082, "clip_ratio/region_mean": 0.0019511091231834143, "epoch": 0.055346528300345914, "grad_norm": 0.10610097646713257, "learning_rate": 1e-06, "loss": 0.0155, "step": 593 }, { "clip_ratio/high_max": 0.002388142966083251, "clip_ratio/high_mean": 0.0010742774702521274, "clip_ratio/low_mean": 0.0009646805610827869, "clip_ratio/low_min": 9.010510439111385e-05, "clip_ratio/region_mean": 0.002038958038610872, "epoch": 0.0554398614003465, "grad_norm": 0.10572066158056259, "learning_rate": 1e-06, "loss": 0.0122, "step": 594 }, { "clip_ratio/high_max": 0.0021172760461922735, "clip_ratio/high_mean": 0.0010117343263118528, "clip_ratio/low_mean": 0.0010011274061980657, "clip_ratio/low_min": 0.00012805586356989807, "clip_ratio/region_mean": 0.0020128617106820457, "epoch": 0.055533194500347084, "grad_norm": 0.11294758319854736, "learning_rate": 1e-06, "loss": 0.0337, "step": 595 }, { "clip_ratio/high_max": 0.0022302978395600803, "clip_ratio/high_mean": 0.0010535142428125255, "clip_ratio/low_mean": 0.0010577405337244272, "clip_ratio/low_min": 4.503955278778449e-05, "clip_ratio/region_mean": 0.00211125475470908, "epoch": 0.05562652760034766, "grad_norm": 0.12416896224021912, "learning_rate": 1e-06, "loss": 0.0122, "step": 596 }, { "clip_ratio/high_max": 0.0021628181129926816, "clip_ratio/high_mean": 0.0010035060440714005, "clip_ratio/low_mean": 0.0011121814659418305, "clip_ratio/low_min": 0.0002266084684379166, "clip_ratio/region_mean": 0.002115687522746157, "epoch": 0.05571986070034825, "grad_norm": 0.12608219683170319, "learning_rate": 1e-06, "loss": 0.0215, "step": 597 }, { "clip_ratio/high_max": 0.0023476473143091425, "clip_ratio/high_mean": 0.0009626134633435868, "clip_ratio/low_mean": 0.0008621300139566301, "clip_ratio/low_min": 8.547347533749416e-05, "clip_ratio/region_mean": 0.0018247434200020507, "epoch": 0.05581319380034883, "grad_norm": 0.10490106046199799, "learning_rate": 1e-06, "loss": 0.0613, "step": 598 }, { "clip_ratio/high_max": 0.002324442299141083, "clip_ratio/high_mean": 0.0009290950983995572, "clip_ratio/low_mean": 0.0010374554749432718, "clip_ratio/low_min": 0.00010561935414443724, "clip_ratio/region_mean": 0.001966550560609903, "epoch": 0.05590652690034942, "grad_norm": 0.11225977540016174, "learning_rate": 1e-06, "loss": 0.0384, "step": 599 }, { "clip_ratio/high_max": 0.002352143004827667, "clip_ratio/high_mean": 0.0009724052797537297, "clip_ratio/low_mean": 0.0010755634866654873, "clip_ratio/low_min": 9.959659655578434e-05, "clip_ratio/region_mean": 0.0020479687736951746, "epoch": 0.055999860000349996, "grad_norm": 0.11923374235630035, "learning_rate": 1e-06, "loss": 0.0169, "step": 600 }, { "clip_ratio/high_max": 0.0023188820923678577, "clip_ratio/high_mean": 0.0009774119080248056, "clip_ratio/low_mean": 0.0008481391596433241, "clip_ratio/low_min": 5.699911525880452e-05, "clip_ratio/region_mean": 0.0018255510658491403, "epoch": 0.05609319310035058, "grad_norm": 0.10098270326852798, "learning_rate": 1e-06, "loss": 0.0019, "step": 601 }, { "clip_ratio/high_max": 0.002604272187454626, "clip_ratio/high_mean": 0.0009815658449952025, "clip_ratio/low_mean": 0.001053436581059941, "clip_ratio/low_min": 5.977434830128914e-05, "clip_ratio/region_mean": 0.0020350023842183873, "epoch": 0.05618652620035117, "grad_norm": 0.11951037496328354, "learning_rate": 1e-06, "loss": 0.0077, "step": 602 }, { "clip_ratio/high_max": 0.002145596910850145, "clip_ratio/high_mean": 0.0009673955046309857, "clip_ratio/low_mean": 0.001266419636522187, "clip_ratio/low_min": 8.729849287192337e-05, "clip_ratio/region_mean": 0.002233815161162056, "epoch": 0.05627985930035175, "grad_norm": 0.11715636402368546, "learning_rate": 1e-06, "loss": 0.0457, "step": 603 }, { "clip_ratio/high_max": 0.0022288630607363302, "clip_ratio/high_mean": 0.0008411181770497933, "clip_ratio/low_mean": 0.0012082733919669408, "clip_ratio/low_min": 1.3177314031054266e-05, "clip_ratio/region_mean": 0.0020493915799306706, "epoch": 0.05637319240035233, "grad_norm": 0.30399492383003235, "learning_rate": 1e-06, "loss": 0.0631, "step": 604 }, { "clip_ratio/high_max": 0.0025918883038684726, "clip_ratio/high_mean": 0.001042553278239211, "clip_ratio/low_mean": 0.0012045267303619767, "clip_ratio/low_min": 0.00013828648116032127, "clip_ratio/region_mean": 0.002247080068627838, "epoch": 0.056466525500352915, "grad_norm": 0.11313539743423462, "learning_rate": 1e-06, "loss": 0.032, "step": 605 }, { "clip_ratio/high_max": 0.002171586485928856, "clip_ratio/high_mean": 0.000912574239919195, "clip_ratio/low_mean": 0.0012378535466268659, "clip_ratio/low_min": 0.00014586246834369376, "clip_ratio/region_mean": 0.0021504278120119125, "epoch": 0.0565598586003535, "grad_norm": 0.1191086545586586, "learning_rate": 1e-06, "loss": 0.0186, "step": 606 }, { "clip_ratio/high_max": 0.0028559632191900164, "clip_ratio/high_mean": 0.0011514084944792558, "clip_ratio/low_mean": 0.0013359503755054902, "clip_ratio/low_min": 9.57002084760461e-05, "clip_ratio/region_mean": 0.0024873588990885764, "epoch": 0.056653191700354086, "grad_norm": 0.13263286650180817, "learning_rate": 1e-06, "loss": 0.0372, "step": 607 }, { "clip_ratio/high_max": 0.002273844998853747, "clip_ratio/high_mean": 0.0010449366964166984, "clip_ratio/low_mean": 0.0011178272470715456, "clip_ratio/low_min": 0.00014016128807270434, "clip_ratio/region_mean": 0.002162763979868032, "epoch": 0.056746524800354664, "grad_norm": 0.10531739890575409, "learning_rate": 1e-06, "loss": 0.0329, "step": 608 }, { "clip_ratio/high_max": 0.002481375660863705, "clip_ratio/high_mean": 0.0010798064031405374, "clip_ratio/low_mean": 0.0009624852191336686, "clip_ratio/low_min": 5.298283213051036e-05, "clip_ratio/region_mean": 0.0020422916059033014, "completions/clipped_ratio": 0.014107840401785698, "completions/max_length": 4096.0, "completions/max_terminated_length": 4079.0, "completions/mean_length": 614.7670288085938, "completions/mean_terminated_length": 564.9515991210938, "completions/min_length": 3.0, "completions/min_terminated_length": 3.0, "epoch": 0.05683985790035525, "grad_norm": 0.13478153944015503, "learning_rate": 1e-06, "loss": 0.003, "num_tokens": 489065502.0, "reward": 0.5779855251312256, "reward_std": 0.19758881628513336, "rewards/simpleverify_reward/mean": 0.5779854655265808, "rewards/simpleverify_reward/std": 0.4938828945159912, "step": 609 }, { "clip_ratio/high_max": 0.0018826923187589273, "clip_ratio/high_mean": 0.000883283328221296, "clip_ratio/low_mean": 0.0009435781030333601, "clip_ratio/low_min": 8.98214466360514e-05, "clip_ratio/region_mean": 0.001826861458539497, "epoch": 0.056933191000355834, "grad_norm": 1.8454992771148682, "learning_rate": 1e-06, "loss": 0.0656, "step": 610 }, { "clip_ratio/high_max": 0.002433124180242885, "clip_ratio/high_mean": 0.0011184483428223757, "clip_ratio/low_mean": 0.0010766275809146464, "clip_ratio/low_min": 7.665696466574445e-05, "clip_ratio/region_mean": 0.0021950759037281387, "epoch": 0.05702652410035641, "grad_norm": 0.1422373205423355, "learning_rate": 1e-06, "loss": 0.0329, "step": 611 }, { "clip_ratio/high_max": 0.0026827713300008327, "clip_ratio/high_mean": 0.0011337460673530586, "clip_ratio/low_mean": 0.001051625289619551, "clip_ratio/low_min": 0.0001889002614916535, "clip_ratio/region_mean": 0.0021853713478776626, "epoch": 0.057119857200357, "grad_norm": 0.13265828788280487, "learning_rate": 1e-06, "loss": 0.007, "step": 612 }, { "clip_ratio/high_max": 0.002417869749478996, "clip_ratio/high_mean": 0.0009905324168357765, "clip_ratio/low_mean": 0.0011561478459043428, "clip_ratio/low_min": 0.00017516242223791778, "clip_ratio/region_mean": 0.002146680242731236, "epoch": 0.05721319030035758, "grad_norm": 0.12621721625328064, "learning_rate": 1e-06, "loss": 0.0461, "step": 613 }, { "clip_ratio/high_max": 0.0021351930918172, "clip_ratio/high_mean": 0.0009299481207563076, "clip_ratio/low_mean": 0.0011066958632000023, "clip_ratio/low_min": 9.874462193693034e-05, "clip_ratio/region_mean": 0.0020366439493955113, "epoch": 0.05730652340035817, "grad_norm": 14072937472.0, "learning_rate": 1e-06, "loss": 6914351.0, "step": 614 }, { "clip_ratio/high_max": 0.0024725770417717285, "clip_ratio/high_mean": 0.001070097780029755, "clip_ratio/low_mean": 0.0009615337785362499, "clip_ratio/low_min": 6.97443974786438e-05, "clip_ratio/region_mean": 0.002031631549471058, "epoch": 0.05739985650035875, "grad_norm": 1.0688698291778564, "learning_rate": 1e-06, "loss": 0.0509, "step": 615 }, { "clip_ratio/high_max": 0.0027717277116607875, "clip_ratio/high_mean": 0.0010635005201038439, "clip_ratio/low_mean": 0.001033225857099751, "clip_ratio/low_min": 8.539021473552566e-05, "clip_ratio/region_mean": 0.0020967263699276373, "epoch": 0.05749318960035933, "grad_norm": 0.10949176549911499, "learning_rate": 1e-06, "loss": 0.0277, "step": 616 }, { "clip_ratio/high_max": 0.002559550375735853, "clip_ratio/high_mean": 0.001045377061018371, "clip_ratio/low_mean": 0.001116386421927018, "clip_ratio/low_min": 0.00013004834636376472, "clip_ratio/region_mean": 0.0021617634847643785, "epoch": 0.05758652270035992, "grad_norm": 0.3339146077632904, "learning_rate": 1e-06, "loss": 0.021, "step": 617 }, { "clip_ratio/high_max": 0.0023403273662552238, "clip_ratio/high_mean": 0.0009199930627801223, "clip_ratio/low_mean": 0.001144464811659418, "clip_ratio/low_min": 0.00016603298718109727, "clip_ratio/region_mean": 0.002064457890810445, "epoch": 0.0576798558003605, "grad_norm": 249700.59375, "learning_rate": 1e-06, "loss": 62.5008, "step": 618 }, { "clip_ratio/high_max": 0.0024026289393077604, "clip_ratio/high_mean": 0.001161467815109063, "clip_ratio/low_mean": 0.001040013969031861, "clip_ratio/low_min": 7.728922901151236e-05, "clip_ratio/region_mean": 0.0022014817586750723, "epoch": 0.05777318890036108, "grad_norm": 0.12954115867614746, "learning_rate": 1e-06, "loss": 0.0157, "step": 619 }, { "clip_ratio/high_max": 0.0024663466901984066, "clip_ratio/high_mean": 0.0010262158739351435, "clip_ratio/low_mean": 0.0011104269688075874, "clip_ratio/low_min": 2.800179208861664e-05, "clip_ratio/region_mean": 0.002136642920959275, "epoch": 0.057866522000361666, "grad_norm": 0.1635865569114685, "learning_rate": 1e-06, "loss": 0.0413, "step": 620 }, { "clip_ratio/high_max": 0.0023338408282143064, "clip_ratio/high_mean": 0.0009335654831374995, "clip_ratio/low_mean": 0.0011179965804331005, "clip_ratio/low_min": 0.00015746902499813586, "clip_ratio/region_mean": 0.0020515621145023033, "epoch": 0.05795985510036225, "grad_norm": 0.11043478548526764, "learning_rate": 1e-06, "loss": 0.0522, "step": 621 }, { "clip_ratio/high_max": 0.002195848341216333, "clip_ratio/high_mean": 0.0008944363162299851, "clip_ratio/low_mean": 0.0011744457733584568, "clip_ratio/low_min": 6.835143176431302e-05, "clip_ratio/region_mean": 0.002068882131425198, "epoch": 0.05805318820036283, "grad_norm": 0.1274164766073227, "learning_rate": 1e-06, "loss": 0.04, "step": 622 }, { "clip_ratio/high_max": 0.002555703977122903, "clip_ratio/high_mean": 0.0009996252265409566, "clip_ratio/low_mean": 0.0011054466704081278, "clip_ratio/low_min": 7.226069556054426e-05, "clip_ratio/region_mean": 0.0021050718569313176, "epoch": 0.058146521300363414, "grad_norm": 0.12116770446300507, "learning_rate": 1e-06, "loss": 0.0344, "step": 623 }, { "clip_ratio/high_max": 0.0023710469213256147, "clip_ratio/high_mean": 0.0009325961764261592, "clip_ratio/low_mean": 0.0009719665431475732, "clip_ratio/low_min": 2.7580582354858052e-05, "clip_ratio/region_mean": 0.0019045626831939444, "epoch": 0.058239854400364, "grad_norm": 965.5730590820312, "learning_rate": 1e-06, "loss": 0.4894, "step": 624 }, { "clip_ratio/high_max": 0.0024476631078869104, "clip_ratio/high_mean": 0.0009908318879752187, "clip_ratio/low_mean": 0.0012181521378806792, "clip_ratio/low_min": 2.84762872979627e-05, "clip_ratio/region_mean": 0.002208983998571057, "epoch": 0.058333187500364585, "grad_norm": 0.1634451001882553, "learning_rate": 1e-06, "loss": 0.0331, "step": 625 }, { "clip_ratio/high_max": 0.002235976582596777, "clip_ratio/high_mean": 0.0009083714066946413, "clip_ratio/low_mean": 0.0011043392005376518, "clip_ratio/low_min": 7.413418097712565e-05, "clip_ratio/region_mean": 0.0020127105963183567, "epoch": 0.05842652060036516, "grad_norm": 0.12290123850107193, "learning_rate": 1e-06, "loss": 0.0217, "step": 626 }, { "clip_ratio/high_max": 0.0025451778201386333, "clip_ratio/high_mean": 0.001139744788815733, "clip_ratio/low_mean": 0.0013215845538070425, "clip_ratio/low_min": 6.971872699068626e-05, "clip_ratio/region_mean": 0.002461329349898733, "epoch": 0.05851985370036575, "grad_norm": 0.12009480595588684, "learning_rate": 1e-06, "loss": 0.0309, "step": 627 }, { "clip_ratio/high_max": 0.002643365107360296, "clip_ratio/high_mean": 0.0011768949079851154, "clip_ratio/low_mean": 0.0011976478272117674, "clip_ratio/low_min": 0.0001431542623322457, "clip_ratio/region_mean": 0.0023745427970425226, "epoch": 0.05861318680036633, "grad_norm": 0.11753562092781067, "learning_rate": 1e-06, "loss": 0.0276, "step": 628 }, { "clip_ratio/high_max": 0.002438943429297069, "clip_ratio/high_mean": 0.001046201165081584, "clip_ratio/low_mean": 0.0013472400642058346, "clip_ratio/low_min": 0.00020198828860884532, "clip_ratio/region_mean": 0.002393441172898747, "epoch": 0.05870651990036692, "grad_norm": 0.11168830096721649, "learning_rate": 1e-06, "loss": 0.0703, "step": 629 }, { "clip_ratio/high_max": 0.002333088868908817, "clip_ratio/high_mean": 0.0009067199443961727, "clip_ratio/low_mean": 0.0017064188577933237, "clip_ratio/low_min": 0.00022318211449601222, "clip_ratio/region_mean": 0.0026131387567147613, "epoch": 0.0587998530003675, "grad_norm": 0.11786709725856781, "learning_rate": 1e-06, "loss": 0.0916, "step": 630 }, { "clip_ratio/high_max": 0.0027130329035571776, "clip_ratio/high_mean": 0.0010847731264220783, "clip_ratio/low_mean": 0.0014855799272481818, "clip_ratio/low_min": 0.00016107022383948788, "clip_ratio/region_mean": 0.002570353026385419, "epoch": 0.05889318610036808, "grad_norm": 10.57763385772705, "learning_rate": 1e-06, "loss": 0.0595, "step": 631 }, { "clip_ratio/high_max": 0.002469524544721935, "clip_ratio/high_mean": 0.000999443742330186, "clip_ratio/low_mean": 0.001408760135745979, "clip_ratio/low_min": 0.0001192190138681326, "clip_ratio/region_mean": 0.002408203828963451, "epoch": 0.05898651920036867, "grad_norm": 0.355061799287796, "learning_rate": 1e-06, "loss": 0.0343, "step": 632 }, { "clip_ratio/high_max": 0.002734995221544523, "clip_ratio/high_mean": 0.0011616006140684476, "clip_ratio/low_mean": 0.0014469487032329198, "clip_ratio/low_min": 5.343414613889763e-05, "clip_ratio/region_mean": 0.00260854927910259, "epoch": 0.05907985230036925, "grad_norm": 0.10703109949827194, "learning_rate": 1e-06, "loss": 0.0147, "step": 633 }, { "clip_ratio/high_max": 0.002768713493424002, "clip_ratio/high_mean": 0.0010938397063000593, "clip_ratio/low_mean": 0.0011693555825331714, "clip_ratio/low_min": 2.7276426408207044e-05, "clip_ratio/region_mean": 0.0022631952888332307, "epoch": 0.05917318540036983, "grad_norm": 0.0950351357460022, "learning_rate": 1e-06, "loss": -0.0122, "step": 634 }, { "clip_ratio/high_max": 0.002465018398652319, "clip_ratio/high_mean": 0.0009798237042559776, "clip_ratio/low_mean": 0.0016322332103300141, "clip_ratio/low_min": 0.00019665213130792836, "clip_ratio/region_mean": 0.002612056880025193, "epoch": 0.059266518500370416, "grad_norm": 0.11052601039409637, "learning_rate": 1e-06, "loss": 0.0763, "step": 635 }, { "clip_ratio/high_max": 0.002533671118726488, "clip_ratio/high_mean": 0.0009781113803910557, "clip_ratio/low_mean": 0.0014456789394898806, "clip_ratio/low_min": 0.00011923054444196168, "clip_ratio/region_mean": 0.002423790378088597, "epoch": 0.059359851600371, "grad_norm": 0.10943303257226944, "learning_rate": 1e-06, "loss": 0.0262, "step": 636 }, { "clip_ratio/high_max": 0.0023464998630515765, "clip_ratio/high_mean": 0.001087652106434689, "clip_ratio/low_mean": 0.0014745159605809022, "clip_ratio/low_min": 0.00018325439941691002, "clip_ratio/region_mean": 0.002562168097938411, "epoch": 0.05945318470037158, "grad_norm": 0.12654410302639008, "learning_rate": 1e-06, "loss": -0.0059, "step": 637 }, { "clip_ratio/high_max": 0.0025381618324900046, "clip_ratio/high_mean": 0.0011521074775373563, "clip_ratio/low_mean": 0.0013021217382629402, "clip_ratio/low_min": 0.00011212957633688347, "clip_ratio/region_mean": 0.0024542291939724237, "epoch": 0.059546517800372165, "grad_norm": 0.11000388860702515, "learning_rate": 1e-06, "loss": 0.0312, "step": 638 }, { "clip_ratio/high_max": 0.002776867288048379, "clip_ratio/high_mean": 0.0011716094959410839, "clip_ratio/low_mean": 0.0011528494014783064, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0023244589538080618, "epoch": 0.05963985090037275, "grad_norm": 0.10943210124969482, "learning_rate": 1e-06, "loss": -0.005, "step": 639 }, { "clip_ratio/high_max": 0.002140525441063801, "clip_ratio/high_mean": 0.0008681018589413725, "clip_ratio/low_mean": 0.0016497030919708777, "clip_ratio/low_min": 0.00013927290819992777, "clip_ratio/region_mean": 0.0025178049836540595, "epoch": 0.059733184000373335, "grad_norm": 1.599152684211731, "learning_rate": 1e-06, "loss": 0.0822, "step": 640 }, { "clip_ratio/high_max": 0.002320529361895751, "clip_ratio/high_mean": 0.0009316174982814118, "clip_ratio/low_mean": 0.0014806601939199027, "clip_ratio/low_min": 0.0001431610789950355, "clip_ratio/region_mean": 0.0024122776594595052, "epoch": 0.05982651710037391, "grad_norm": 0.36795562505722046, "learning_rate": 1e-06, "loss": 0.0498, "step": 641 }, { "clip_ratio/high_max": 0.002545662267948501, "clip_ratio/high_mean": 0.001078245128155686, "clip_ratio/low_mean": 0.0014442984793276992, "clip_ratio/low_min": 5.239991514827125e-05, "clip_ratio/region_mean": 0.002522543611121364, "epoch": 0.0599198502003745, "grad_norm": 0.11597011983394623, "learning_rate": 1e-06, "loss": 0.0277, "step": 642 }, { "clip_ratio/high_max": 0.002377368466113694, "clip_ratio/high_mean": 0.0010435167168907356, "clip_ratio/low_mean": 0.0015064447361510247, "clip_ratio/low_min": 9.258969839720521e-05, "clip_ratio/region_mean": 0.0025499614712316543, "epoch": 0.060013183300375084, "grad_norm": 0.12030437588691711, "learning_rate": 1e-06, "loss": 0.0271, "step": 643 }, { "clip_ratio/high_max": 0.0022764059831388295, "clip_ratio/high_mean": 0.0009969972343242262, "clip_ratio/low_mean": 0.0014344241571961902, "clip_ratio/low_min": 0.00012481987869250588, "clip_ratio/region_mean": 0.002431421358778607, "epoch": 0.06010651640037567, "grad_norm": 0.11691172420978546, "learning_rate": 1e-06, "loss": 0.0108, "step": 644 }, { "clip_ratio/high_max": 0.002200591094151605, "clip_ratio/high_mean": 0.001027626805807813, "clip_ratio/low_mean": 0.0014785796774958726, "clip_ratio/low_min": 0.0001361388131044805, "clip_ratio/region_mean": 0.0025062064742087387, "epoch": 0.06019984950037625, "grad_norm": 0.6515962481498718, "learning_rate": 1e-06, "loss": 0.1354, "step": 645 }, { "clip_ratio/high_max": 0.002287144372530747, "clip_ratio/high_mean": 0.001025917266815668, "clip_ratio/low_mean": 0.0013302338411449455, "clip_ratio/low_min": 0.00010401302370155463, "clip_ratio/region_mean": 0.002356151097046677, "epoch": 0.06029318260037683, "grad_norm": 19.04352378845215, "learning_rate": 1e-06, "loss": 0.0194, "step": 646 }, { "clip_ratio/high_max": 0.0025620360247557983, "clip_ratio/high_mean": 0.000973580787103856, "clip_ratio/low_mean": 0.0013649256143253297, "clip_ratio/low_min": 0.00015349126442743, "clip_ratio/region_mean": 0.0023385063905152492, "epoch": 0.06038651570037742, "grad_norm": 0.11697337031364441, "learning_rate": 1e-06, "loss": -0.0043, "step": 647 }, { "clip_ratio/high_max": 0.002432079843856627, "clip_ratio/high_mean": 0.0009917522274918156, "clip_ratio/low_mean": 0.001269569795113057, "clip_ratio/low_min": 0.00010833987926162081, "clip_ratio/region_mean": 0.0022613220353377983, "epoch": 0.060479848800377996, "grad_norm": 0.10863753408193588, "learning_rate": 1e-06, "loss": 0.0093, "step": 648 }, { "clip_ratio/high_max": 0.0022621683310717344, "clip_ratio/high_mean": 0.0009445001178391976, "clip_ratio/low_mean": 0.0013273297627165448, "clip_ratio/low_min": 7.740234832454007e-05, "clip_ratio/region_mean": 0.0022718298860127106, "epoch": 0.06057318190037858, "grad_norm": 0.10702554881572723, "learning_rate": 1e-06, "loss": 0.0217, "step": 649 }, { "clip_ratio/high_max": 0.0024523667743778788, "clip_ratio/high_mean": 0.00097852002363652, "clip_ratio/low_mean": 0.001233047998539405, "clip_ratio/low_min": 0.00010808253045979654, "clip_ratio/region_mean": 0.0022115680621936917, "epoch": 0.060666515000379166, "grad_norm": 0.12258566915988922, "learning_rate": 1e-06, "loss": -0.0035, "step": 650 }, { "clip_ratio/high_max": 0.0020648834470193833, "clip_ratio/high_mean": 0.0008412961196881952, "clip_ratio/low_mean": 0.0012769153181579895, "clip_ratio/low_min": 6.171621680550743e-05, "clip_ratio/region_mean": 0.002118211457855068, "epoch": 0.06075984810037975, "grad_norm": 0.10569457709789276, "learning_rate": 1e-06, "loss": 0.0262, "step": 651 }, { "clip_ratio/high_max": 0.0027495974500197917, "clip_ratio/high_mean": 0.0011145794778713025, "clip_ratio/low_mean": 0.0011676070498651825, "clip_ratio/low_min": 0.00014539828634951846, "clip_ratio/region_mean": 0.002282186505908612, "epoch": 0.06085318120038033, "grad_norm": 0.1090521365404129, "learning_rate": 1e-06, "loss": 0.0134, "step": 652 }, { "clip_ratio/high_max": 0.0025869550445349887, "clip_ratio/high_mean": 0.0010744609462562948, "clip_ratio/low_mean": 0.0011594561765377875, "clip_ratio/low_min": 2.653700630617095e-05, "clip_ratio/region_mean": 0.0022339171409839764, "epoch": 0.060946514300380915, "grad_norm": 0.13510650396347046, "learning_rate": 1e-06, "loss": 0.0044, "step": 653 }, { "clip_ratio/high_max": 0.0027787536528194323, "clip_ratio/high_mean": 0.001104193739593029, "clip_ratio/low_mean": 0.0014708929556945805, "clip_ratio/low_min": 0.00015398228970298078, "clip_ratio/region_mean": 0.002575086684373673, "epoch": 0.0610398474003815, "grad_norm": 0.11401999741792679, "learning_rate": 1e-06, "loss": 0.0126, "step": 654 }, { "clip_ratio/high_max": 0.0026963075579260476, "clip_ratio/high_mean": 0.0010910008750215638, "clip_ratio/low_mean": 0.0015076146701176185, "clip_ratio/low_min": 8.807423819234828e-05, "clip_ratio/region_mean": 0.0025986155596910976, "epoch": 0.061133180500382085, "grad_norm": 0.15589646995067596, "learning_rate": 1e-06, "loss": 0.039, "step": 655 }, { "clip_ratio/high_max": 0.0026853707313421182, "clip_ratio/high_mean": 0.001006870956189232, "clip_ratio/low_mean": 0.0015459465284948237, "clip_ratio/low_min": 0.00025763245503185317, "clip_ratio/region_mean": 0.0025528174737701192, "epoch": 0.061226513600382663, "grad_norm": 0.12471504509449005, "learning_rate": 1e-06, "loss": 0.0374, "step": 656 }, { "clip_ratio/high_max": 0.0024121908318193164, "clip_ratio/high_mean": 0.001032410938933026, "clip_ratio/low_mean": 0.0013212125522841234, "clip_ratio/low_min": 0.00010638949515850982, "clip_ratio/region_mean": 0.0023536234948551282, "epoch": 0.06131984670038325, "grad_norm": 0.11951066553592682, "learning_rate": 1e-06, "loss": 0.0248, "step": 657 }, { "clip_ratio/high_max": 0.002063850646663923, "clip_ratio/high_mean": 0.0009129626741923857, "clip_ratio/low_mean": 0.0012775774303008802, "clip_ratio/low_min": 4.5842156396247447e-05, "clip_ratio/region_mean": 0.002190540100855287, "epoch": 0.061413179800383834, "grad_norm": 0.11594922095537186, "learning_rate": 1e-06, "loss": 0.0586, "step": 658 }, { "clip_ratio/high_max": 0.002625801251269877, "clip_ratio/high_mean": 0.0011583187624637503, "clip_ratio/low_mean": 0.0013048889086348936, "clip_ratio/low_min": 8.299439923575846e-05, "clip_ratio/region_mean": 0.002463207631080877, "epoch": 0.06150651290038442, "grad_norm": 0.11306975781917572, "learning_rate": 1e-06, "loss": -0.0371, "step": 659 }, { "clip_ratio/high_max": 0.0023783933866070583, "clip_ratio/high_mean": 0.0011153668237966485, "clip_ratio/low_mean": 0.0009566960943629965, "clip_ratio/low_min": 2.5938990802387707e-05, "clip_ratio/region_mean": 0.002072062918159645, "epoch": 0.061599846000385, "grad_norm": 0.11043727397918701, "learning_rate": 1e-06, "loss": -0.0405, "step": 660 }, { "clip_ratio/high_max": 0.0024551346941734664, "clip_ratio/high_mean": 0.0010959892078972189, "clip_ratio/low_mean": 0.001192376970720943, "clip_ratio/low_min": 3.619626249928842e-05, "clip_ratio/region_mean": 0.002288366187713109, "epoch": 0.06169317910038558, "grad_norm": 0.1374572366476059, "learning_rate": 1e-06, "loss": 0.039, "step": 661 }, { "clip_ratio/high_max": 0.0023530258840764873, "clip_ratio/high_mean": 0.0010477811010787264, "clip_ratio/low_mean": 0.0013215098697401118, "clip_ratio/low_min": 9.351839253213257e-05, "clip_ratio/region_mean": 0.002369290858041495, "epoch": 0.06178651220038617, "grad_norm": 0.13867244124412537, "learning_rate": 1e-06, "loss": -0.0017, "step": 662 }, { "clip_ratio/high_max": 0.002232126076705754, "clip_ratio/high_mean": 0.0008987615583464503, "clip_ratio/low_mean": 0.0011110207069577882, "clip_ratio/low_min": 4.9956074690271635e-05, "clip_ratio/region_mean": 0.002009782263485249, "epoch": 0.061879845300386746, "grad_norm": 0.10422000288963318, "learning_rate": 1e-06, "loss": -0.0049, "step": 663 }, { "clip_ratio/high_max": 0.002710353903239593, "clip_ratio/high_mean": 0.0012015982829325367, "clip_ratio/low_mean": 0.0012092820616089739, "clip_ratio/low_min": 0.00010489207670616452, "clip_ratio/region_mean": 0.002410880333627574, "epoch": 0.06197317840038733, "grad_norm": 4148.28466796875, "learning_rate": 1e-06, "loss": 6.3788, "step": 664 }, { "clip_ratio/high_max": 0.002445989379339153, "clip_ratio/high_mean": 0.0009419266734767007, "clip_ratio/low_mean": 0.0010934119709418155, "clip_ratio/low_min": 4.1172873352479655e-05, "clip_ratio/region_mean": 0.0020353386498754844, "epoch": 0.062066511500387916, "grad_norm": 0.17105872929096222, "learning_rate": 1e-06, "loss": 0.0138, "step": 665 }, { "clip_ratio/high_max": 0.0019694851071108133, "clip_ratio/high_mean": 0.0008912036519177491, "clip_ratio/low_mean": 0.001200806837005075, "clip_ratio/low_min": 0.0002483644893800374, "clip_ratio/region_mean": 0.002092010479827877, "epoch": 0.0621598446003885, "grad_norm": 0.10640674829483032, "learning_rate": 1e-06, "loss": 0.0192, "step": 666 }, { "clip_ratio/high_max": 0.002233234976301901, "clip_ratio/high_mean": 0.0009870616231637541, "clip_ratio/low_mean": 0.0012969460985914338, "clip_ratio/low_min": 0.00015328968584071845, "clip_ratio/region_mean": 0.002284007736307103, "epoch": 0.06225317770038908, "grad_norm": 0.10902467370033264, "learning_rate": 1e-06, "loss": 0.0233, "step": 667 }, { "clip_ratio/high_max": 0.002011421100178268, "clip_ratio/high_mean": 0.0009281812126573641, "clip_ratio/low_mean": 0.001393393617036054, "clip_ratio/low_min": 0.00019431123837421183, "clip_ratio/region_mean": 0.0023215748005895875, "epoch": 0.062346510800389665, "grad_norm": 0.17937719821929932, "learning_rate": 1e-06, "loss": 0.0588, "step": 668 }, { "clip_ratio/high_max": 0.0020904075463477056, "clip_ratio/high_mean": 0.0008997024615382543, "clip_ratio/low_mean": 0.0013801790446450468, "clip_ratio/low_min": 0.0001435560807294678, "clip_ratio/region_mean": 0.0022798815261921845, "epoch": 0.06243984390039025, "grad_norm": 0.10588958114385605, "learning_rate": 1e-06, "loss": -0.0151, "step": 669 }, { "clip_ratio/high_max": 0.002417557443550322, "clip_ratio/high_mean": 0.0010329084743716521, "clip_ratio/low_mean": 0.001336307374003809, "clip_ratio/low_min": 6.038274568709312e-05, "clip_ratio/region_mean": 0.002369215857470408, "epoch": 0.06253317700039084, "grad_norm": 0.1187441274523735, "learning_rate": 1e-06, "loss": 0.0352, "step": 670 }, { "clip_ratio/high_max": 0.002480857896443922, "clip_ratio/high_mean": 0.0010278425834258087, "clip_ratio/low_mean": 0.0013508358097169548, "clip_ratio/low_min": 0.00010568636116659036, "clip_ratio/region_mean": 0.002378678422246594, "epoch": 0.06262651010039141, "grad_norm": 0.11163929104804993, "learning_rate": 1e-06, "loss": -0.0164, "step": 671 }, { "clip_ratio/high_max": 0.0022365770164469723, "clip_ratio/high_mean": 0.0009602849804650759, "clip_ratio/low_mean": 0.001330734790826682, "clip_ratio/low_min": 0.00015553867615381023, "clip_ratio/region_mean": 0.0022910197731107473, "epoch": 0.062719843200392, "grad_norm": 0.2668677568435669, "learning_rate": 1e-06, "loss": 0.0067, "step": 672 }, { "clip_ratio/high_max": 0.0023627673945156857, "clip_ratio/high_mean": 0.0009864389485301217, "clip_ratio/low_mean": 0.001440965237634373, "clip_ratio/low_min": 0.00013962466800876427, "clip_ratio/region_mean": 0.0024274041716125794, "epoch": 0.06281317630039258, "grad_norm": 0.1117556095123291, "learning_rate": 1e-06, "loss": 0.0018, "step": 673 }, { "clip_ratio/high_max": 0.0019086802494712174, "clip_ratio/high_mean": 0.0008545816053810995, "clip_ratio/low_mean": 0.0013530310534406453, "clip_ratio/low_min": 9.74384220171487e-05, "clip_ratio/region_mean": 0.0022076127061154693, "epoch": 0.06290650940039316, "grad_norm": 0.11400160193443298, "learning_rate": 1e-06, "loss": 0.0296, "step": 674 }, { "clip_ratio/high_max": 0.0018440688945702277, "clip_ratio/high_mean": 0.0007894118371041259, "clip_ratio/low_mean": 0.0013532640623452608, "clip_ratio/low_min": 0.00010083082270284649, "clip_ratio/region_mean": 0.002142675861250609, "epoch": 0.06299984250039375, "grad_norm": 0.11896184831857681, "learning_rate": 1e-06, "loss": 0.0582, "step": 675 }, { "clip_ratio/high_max": 0.0025069195035030134, "clip_ratio/high_mean": 0.0011351410903444048, "clip_ratio/low_mean": 0.0013034091534791514, "clip_ratio/low_min": 5.0169653150078375e-05, "clip_ratio/region_mean": 0.0024385501965298317, "epoch": 0.06309317560039433, "grad_norm": 0.11076658219099045, "learning_rate": 1e-06, "loss": 0.0071, "step": 676 }, { "clip_ratio/high_max": 0.0021391708869487047, "clip_ratio/high_mean": 0.0009580245223332895, "clip_ratio/low_mean": 0.001430384028935805, "clip_ratio/low_min": 0.00010103542626893613, "clip_ratio/region_mean": 0.0023884086185717024, "epoch": 0.06318650870039491, "grad_norm": 0.10459767282009125, "learning_rate": 1e-06, "loss": 0.0624, "step": 677 }, { "clip_ratio/high_max": 0.002137682429747656, "clip_ratio/high_mean": 0.00099198569660075, "clip_ratio/low_mean": 0.0013740914582740515, "clip_ratio/low_min": 0.0002392204860370839, "clip_ratio/region_mean": 0.002366077133046929, "epoch": 0.0632798418003955, "grad_norm": 0.1039733737707138, "learning_rate": 1e-06, "loss": 0.0197, "step": 678 }, { "clip_ratio/high_max": 0.002055105462204665, "clip_ratio/high_mean": 0.0009679204322310397, "clip_ratio/low_mean": 0.001264243363038986, "clip_ratio/low_min": 0.00016018713631638093, "clip_ratio/region_mean": 0.0022321637807181105, "epoch": 0.06337317490039608, "grad_norm": 0.10956466197967529, "learning_rate": 1e-06, "loss": 0.0123, "step": 679 }, { "clip_ratio/high_max": 0.002592860924778506, "clip_ratio/high_mean": 0.0011743278046196792, "clip_ratio/low_mean": 0.0011391170937713468, "clip_ratio/low_min": 0.0001533035674583516, "clip_ratio/region_mean": 0.0023134449002100155, "epoch": 0.06346650800039666, "grad_norm": 0.10671522468328476, "learning_rate": 1e-06, "loss": -0.0452, "step": 680 }, { "clip_ratio/high_max": 0.0022551204820047133, "clip_ratio/high_mean": 0.0010086747442983324, "clip_ratio/low_mean": 0.0013218061249062885, "clip_ratio/low_min": 1.367315690004034e-05, "clip_ratio/region_mean": 0.0023304809074033983, "epoch": 0.06355984110039725, "grad_norm": 0.11453437060117722, "learning_rate": 1e-06, "loss": -0.0076, "step": 681 }, { "clip_ratio/high_max": 0.0022475221412605606, "clip_ratio/high_mean": 0.0009552631890983321, "clip_ratio/low_mean": 0.0015905241853033658, "clip_ratio/low_min": 0.00023384841188089922, "clip_ratio/region_mean": 0.0025457874580752105, "epoch": 0.06365317420039783, "grad_norm": 0.13428255915641785, "learning_rate": 1e-06, "loss": 0.0406, "step": 682 }, { "clip_ratio/high_max": 0.0021352813455450814, "clip_ratio/high_mean": 0.0009092930313272518, "clip_ratio/low_mean": 0.0012580564216477796, "clip_ratio/low_min": 4.360519415058661e-05, "clip_ratio/region_mean": 0.002167349426599685, "epoch": 0.06374650730039842, "grad_norm": 2.868584632873535, "learning_rate": 1e-06, "loss": 0.0301, "step": 683 }, { "clip_ratio/high_max": 0.002089362908009207, "clip_ratio/high_mean": 0.0009201261000271188, "clip_ratio/low_mean": 0.0013270883246150333, "clip_ratio/low_min": 0.00030350187989824917, "clip_ratio/region_mean": 0.002247214477392845, "epoch": 0.063839840400399, "grad_norm": 0.11900093406438828, "learning_rate": 1e-06, "loss": 0.0252, "step": 684 }, { "clip_ratio/high_max": 0.002147254053852521, "clip_ratio/high_mean": 0.0009190749860863434, "clip_ratio/low_mean": 0.0013434622051136103, "clip_ratio/low_min": 6.048674049452529e-05, "clip_ratio/region_mean": 0.002262537193018943, "epoch": 0.06393317350039958, "grad_norm": 0.1181182786822319, "learning_rate": 1e-06, "loss": 0.0459, "step": 685 }, { "clip_ratio/high_max": 0.002067133267701138, "clip_ratio/high_mean": 0.0008905928480089642, "clip_ratio/low_mean": 0.0011316949103274965, "clip_ratio/low_min": 0.00010732088048825972, "clip_ratio/region_mean": 0.0020222877428750508, "epoch": 0.06402650660040017, "grad_norm": 0.10931537300348282, "learning_rate": 1e-06, "loss": 0.0269, "step": 686 }, { "clip_ratio/high_max": 0.0027406108856666833, "clip_ratio/high_mean": 0.0011706712411978515, "clip_ratio/low_mean": 0.0011570697497518267, "clip_ratio/low_min": 0.00016425488502136432, "clip_ratio/region_mean": 0.0023277410145965405, "epoch": 0.06411983970040075, "grad_norm": 0.12323232740163803, "learning_rate": 1e-06, "loss": 0.008, "step": 687 }, { "clip_ratio/high_max": 0.0027424366053310223, "clip_ratio/high_mean": 0.001030556961268303, "clip_ratio/low_mean": 0.00158773594921513, "clip_ratio/low_min": 0.00019340653125254903, "clip_ratio/region_mean": 0.002618292892293539, "epoch": 0.06421317280040133, "grad_norm": 0.5412478446960449, "learning_rate": 1e-06, "loss": 0.0399, "step": 688 }, { "clip_ratio/high_max": 0.0023235082771861926, "clip_ratio/high_mean": 0.0008790420288278256, "clip_ratio/low_mean": 0.0013361312667257152, "clip_ratio/low_min": 0.00013549268351198407, "clip_ratio/region_mean": 0.0022151732773636468, "epoch": 0.06430650590040192, "grad_norm": 0.10890794545412064, "learning_rate": 1e-06, "loss": 0.0363, "step": 689 }, { "clip_ratio/high_max": 0.0023400458667310886, "clip_ratio/high_mean": 0.0009628291845729109, "clip_ratio/low_mean": 0.0014204945291567128, "clip_ratio/low_min": 0.0002308327939317678, "clip_ratio/region_mean": 0.0023833237501094118, "epoch": 0.0643998390004025, "grad_norm": 0.12933948636054993, "learning_rate": 1e-06, "loss": 0.0632, "step": 690 }, { "clip_ratio/high_max": 0.002740357434959151, "clip_ratio/high_mean": 0.001158433486125432, "clip_ratio/low_mean": 0.0011139394482597709, "clip_ratio/low_min": 2.5783829187275842e-05, "clip_ratio/region_mean": 0.0022723729634890333, "epoch": 0.06449317210040308, "grad_norm": 0.1131989061832428, "learning_rate": 1e-06, "loss": -0.0069, "step": 691 }, { "clip_ratio/high_max": 0.002331867472094018, "clip_ratio/high_mean": 0.0009702574625407578, "clip_ratio/low_mean": 0.0011739674919226673, "clip_ratio/low_min": 7.384502441709628e-05, "clip_ratio/region_mean": 0.0021442248762468807, "epoch": 0.06458650520040367, "grad_norm": 0.1338718831539154, "learning_rate": 1e-06, "loss": 0.0563, "step": 692 }, { "clip_ratio/high_max": 0.0023515865177614614, "clip_ratio/high_mean": 0.0009812846983550116, "clip_ratio/low_mean": 0.0010844938769878354, "clip_ratio/low_min": 8.952659572969424e-05, "clip_ratio/region_mean": 0.002065778578980826, "epoch": 0.06467983830040425, "grad_norm": 0.11498841643333435, "learning_rate": 1e-06, "loss": 0.0272, "step": 693 }, { "clip_ratio/high_max": 0.002324499888345599, "clip_ratio/high_mean": 0.000953960415245092, "clip_ratio/low_mean": 0.001281638706132071, "clip_ratio/low_min": 0.00023566906929772813, "clip_ratio/region_mean": 0.002235599131381605, "epoch": 0.06477317140040484, "grad_norm": 0.11788497865200043, "learning_rate": 1e-06, "loss": 0.0648, "step": 694 }, { "clip_ratio/high_max": 0.0026258151920046657, "clip_ratio/high_mean": 0.001108573282181169, "clip_ratio/low_mean": 0.0009595559749868698, "clip_ratio/low_min": 3.55531410605181e-05, "clip_ratio/region_mean": 0.002068129244435113, "epoch": 0.06486650450040542, "grad_norm": 0.11140355467796326, "learning_rate": 1e-06, "loss": 0.0112, "step": 695 }, { "clip_ratio/high_max": 0.002771210318314843, "clip_ratio/high_mean": 0.001031831772706937, "clip_ratio/low_mean": 0.0012863392475992441, "clip_ratio/low_min": 0.00016965200666163582, "clip_ratio/region_mean": 0.0023181710566859692, "epoch": 0.064959837600406, "grad_norm": 0.10203178226947784, "learning_rate": 1e-06, "loss": 0.0257, "step": 696 }, { "clip_ratio/high_max": 0.0021398632088676095, "clip_ratio/high_mean": 0.000935660777031444, "clip_ratio/low_mean": 0.0011616647734626895, "clip_ratio/low_min": 6.220911200216506e-05, "clip_ratio/region_mean": 0.00209732553048525, "epoch": 0.06505317070040659, "grad_norm": 0.10208957642316818, "learning_rate": 1e-06, "loss": 0.0257, "step": 697 }, { "clip_ratio/high_max": 0.002198674472310813, "clip_ratio/high_mean": 0.0009424757809028961, "clip_ratio/low_mean": 0.001290460455493303, "clip_ratio/low_min": 6.455591483245371e-05, "clip_ratio/region_mean": 0.002232936218206305, "epoch": 0.06514650380040717, "grad_norm": 0.11768817156553268, "learning_rate": 1e-06, "loss": 0.0358, "step": 698 }, { "clip_ratio/high_max": 0.0024097610876196995, "clip_ratio/high_mean": 0.0010552777366683586, "clip_ratio/low_mean": 0.001397711894242093, "clip_ratio/low_min": 0.00017318117534159683, "clip_ratio/region_mean": 0.0024529896400053985, "epoch": 0.06523983690040774, "grad_norm": 0.13768014311790466, "learning_rate": 1e-06, "loss": 0.0468, "step": 699 }, { "clip_ratio/high_max": 0.002438643597997725, "clip_ratio/high_mean": 0.0011547299109224696, "clip_ratio/low_mean": 0.0011869404897879576, "clip_ratio/low_min": 0.0001390645466017304, "clip_ratio/region_mean": 0.002341670318855904, "epoch": 0.06533317000040834, "grad_norm": 0.11751200258731842, "learning_rate": 1e-06, "loss": -0.0038, "step": 700 }, { "clip_ratio/high_max": 0.0028118964364693966, "clip_ratio/high_mean": 0.0011020062356692506, "clip_ratio/low_mean": 0.001395051409417647, "clip_ratio/low_min": 7.224008004413918e-05, "clip_ratio/region_mean": 0.002497057597793173, "epoch": 0.06542650310040891, "grad_norm": 0.12057358771562576, "learning_rate": 1e-06, "loss": 0.011, "step": 701 }, { "clip_ratio/high_max": 0.0026519623934291303, "clip_ratio/high_mean": 0.0009300831898144679, "clip_ratio/low_mean": 0.0012893246057501528, "clip_ratio/low_min": 0.00011676877511490602, "clip_ratio/region_mean": 0.002219407819211483, "epoch": 0.06551983620040949, "grad_norm": 0.11774525791406631, "learning_rate": 1e-06, "loss": 0.0497, "step": 702 }, { "clip_ratio/high_max": 0.0021846843264938798, "clip_ratio/high_mean": 0.0010203428064414766, "clip_ratio/low_mean": 0.0010067184284707764, "clip_ratio/low_min": 8.176434130291454e-05, "clip_ratio/region_mean": 0.0020270612367312424, "epoch": 0.06561316930041008, "grad_norm": 0.11034981906414032, "learning_rate": 1e-06, "loss": 0.0131, "step": 703 }, { "clip_ratio/high_max": 0.0022549977802555077, "clip_ratio/high_mean": 0.0009660979085310828, "clip_ratio/low_mean": 0.0011627740113908658, "clip_ratio/low_min": 6.548420788021758e-05, "clip_ratio/region_mean": 0.0021288719508447684, "epoch": 0.06570650240041066, "grad_norm": 0.10844568908214569, "learning_rate": 1e-06, "loss": -0.0068, "step": 704 }, { "clip_ratio/high_max": 0.002234734653029591, "clip_ratio/high_mean": 0.0009234660992660793, "clip_ratio/low_mean": 0.001220959147758549, "clip_ratio/low_min": 7.287728294613771e-05, "clip_ratio/region_mean": 0.0021444251615321264, "epoch": 0.06579983550041126, "grad_norm": 0.11419650912284851, "learning_rate": 1e-06, "loss": 0.018, "step": 705 }, { "clip_ratio/high_max": 0.0027125859996885993, "clip_ratio/high_mean": 0.0012363481691863853, "clip_ratio/low_mean": 0.0011968626245106861, "clip_ratio/low_min": 4.00187709601596e-05, "clip_ratio/region_mean": 0.002433210793242324, "epoch": 0.06589316860041183, "grad_norm": 0.12124373763799667, "learning_rate": 1e-06, "loss": -0.0231, "step": 706 }, { "clip_ratio/high_max": 0.0021565806127910037, "clip_ratio/high_mean": 0.0008688764110047487, "clip_ratio/low_mean": 0.001272557052288903, "clip_ratio/low_min": 0.00012981864165340085, "clip_ratio/region_mean": 0.0021414334478322417, "epoch": 0.06598650170041241, "grad_norm": 0.11262965947389603, "learning_rate": 1e-06, "loss": 0.0402, "step": 707 }, { "clip_ratio/high_max": 0.002467541169608012, "clip_ratio/high_mean": 0.0010572205173957627, "clip_ratio/low_mean": 0.0014089090182096697, "clip_ratio/low_min": 0.00035368261524126865, "clip_ratio/region_mean": 0.002466129466483835, "epoch": 0.066079834800413, "grad_norm": 0.12729088962078094, "learning_rate": 1e-06, "loss": 0.0054, "step": 708 }, { "clip_ratio/high_max": 0.002418801268504467, "clip_ratio/high_mean": 0.0009145463500317419, "clip_ratio/low_mean": 0.0013171310783945955, "clip_ratio/low_min": 7.043410187179688e-05, "clip_ratio/region_mean": 0.0022316774193313904, "epoch": 0.06617316790041358, "grad_norm": 0.09995414316654205, "learning_rate": 1e-06, "loss": 0.0095, "step": 709 }, { "clip_ratio/high_max": 0.0025145779582089745, "clip_ratio/high_mean": 0.0010026472591562197, "clip_ratio/low_mean": 0.001356084663711954, "clip_ratio/low_min": 0.0002667441121957381, "clip_ratio/region_mean": 0.0023587318501085974, "epoch": 0.06626650100041416, "grad_norm": 0.11940798908472061, "learning_rate": 1e-06, "loss": 0.0368, "step": 710 }, { "clip_ratio/high_max": 0.0022791301998950075, "clip_ratio/high_mean": 0.0009585223961039446, "clip_ratio/low_mean": 0.0011777116706070956, "clip_ratio/low_min": 0.00013839095026924042, "clip_ratio/region_mean": 0.0021362340266932733, "epoch": 0.06635983410041475, "grad_norm": 151.8549041748047, "learning_rate": 1e-06, "loss": 0.0416, "step": 711 }, { "clip_ratio/high_max": 0.0021692305235774256, "clip_ratio/high_mean": 0.0009993428357120138, "clip_ratio/low_mean": 0.0011989409795205574, "clip_ratio/low_min": 9.363579101773212e-05, "clip_ratio/region_mean": 0.0021982838079566136, "epoch": 0.06645316720041533, "grad_norm": 0.11236035078763962, "learning_rate": 1e-06, "loss": 0.0267, "step": 712 }, { "clip_ratio/high_max": 0.002412203684798442, "clip_ratio/high_mean": 0.0009201784123433754, "clip_ratio/low_mean": 0.0011337073246977525, "clip_ratio/low_min": 6.704450788674876e-05, "clip_ratio/region_mean": 0.002053885742498096, "epoch": 0.06654650030041591, "grad_norm": 0.1132621169090271, "learning_rate": 1e-06, "loss": 0.0213, "step": 713 }, { "clip_ratio/high_max": 0.002183757947932463, "clip_ratio/high_mean": 0.0009011215606733458, "clip_ratio/low_mean": 0.0012254153625690378, "clip_ratio/low_min": 0.0001408164571330417, "clip_ratio/region_mean": 0.002126536906871479, "epoch": 0.0666398334004165, "grad_norm": 0.10990343987941742, "learning_rate": 1e-06, "loss": 0.0238, "step": 714 }, { "clip_ratio/high_max": 0.002301058608281892, "clip_ratio/high_mean": 0.0009983988893509377, "clip_ratio/low_mean": 0.0011379522002243903, "clip_ratio/low_min": 8.526602869096678e-06, "clip_ratio/region_mean": 0.002136351089575328, "epoch": 0.06673316650041708, "grad_norm": 0.13453735411167145, "learning_rate": 1e-06, "loss": 0.0026, "step": 715 }, { "clip_ratio/high_max": 0.0021830594560015015, "clip_ratio/high_mean": 0.0010218853203696199, "clip_ratio/low_mean": 0.0011447137821960496, "clip_ratio/low_min": 6.446682800742565e-05, "clip_ratio/region_mean": 0.0021665990498149768, "epoch": 0.06682649960041767, "grad_norm": 0.11695725470781326, "learning_rate": 1e-06, "loss": -0.0051, "step": 716 }, { "clip_ratio/high_max": 0.002474335560691543, "clip_ratio/high_mean": 0.00103653740734444, "clip_ratio/low_mean": 0.0010922658548224717, "clip_ratio/low_min": 0.0001107346124626929, "clip_ratio/region_mean": 0.0021288032439770177, "epoch": 0.06691983270041825, "grad_norm": 0.11490193009376526, "learning_rate": 1e-06, "loss": -0.0066, "step": 717 }, { "clip_ratio/high_max": 0.002176738973503234, "clip_ratio/high_mean": 0.0009673962731540087, "clip_ratio/low_mean": 0.00108000465479563, "clip_ratio/low_min": 4.1708126445882954e-05, "clip_ratio/region_mean": 0.002047400936135091, "epoch": 0.06701316580041883, "grad_norm": 0.10898280888795853, "learning_rate": 1e-06, "loss": 0.0141, "step": 718 }, { "clip_ratio/high_max": 0.0019998301613668445, "clip_ratio/high_mean": 0.0009073895762412576, "clip_ratio/low_mean": 0.0011026247884728946, "clip_ratio/low_min": 0.00014411894608201692, "clip_ratio/region_mean": 0.002010014337429311, "epoch": 0.06710649890041942, "grad_norm": 0.09870455414056778, "learning_rate": 1e-06, "loss": 0.0148, "step": 719 }, { "clip_ratio/high_max": 0.002582363107649144, "clip_ratio/high_mean": 0.0010400756054878002, "clip_ratio/low_mean": 0.0010733928829722572, "clip_ratio/low_min": 6.478199429693632e-05, "clip_ratio/region_mean": 0.002113468464813195, "epoch": 0.06719983200042, "grad_norm": 0.2356414645910263, "learning_rate": 1e-06, "loss": 0.005, "step": 720 }, { "clip_ratio/high_max": 0.0019777264751610346, "clip_ratio/high_mean": 0.0008256175078713568, "clip_ratio/low_mean": 0.0013880869464628631, "clip_ratio/low_min": 0.00017353309522150084, "clip_ratio/region_mean": 0.002213704472524114, "epoch": 0.06729316510042058, "grad_norm": 0.18862295150756836, "learning_rate": 1e-06, "loss": 0.0628, "step": 721 }, { "clip_ratio/high_max": 0.0029793506109854206, "clip_ratio/high_mean": 0.0012327299737080466, "clip_ratio/low_mean": 0.001211147457070183, "clip_ratio/low_min": 9.931488511938369e-05, "clip_ratio/region_mean": 0.002443877463520039, "epoch": 0.06738649820042117, "grad_norm": 0.16819368302822113, "learning_rate": 1e-06, "loss": -0.0074, "step": 722 }, { "clip_ratio/high_max": 0.0021481366857187822, "clip_ratio/high_mean": 0.0009157162985502509, "clip_ratio/low_mean": 0.001276731598409242, "clip_ratio/low_min": 0.0001271406608793768, "clip_ratio/region_mean": 0.002192447878769599, "epoch": 0.06747983130042175, "grad_norm": 0.12451296299695969, "learning_rate": 1e-06, "loss": 0.0426, "step": 723 }, { "clip_ratio/high_max": 0.0026987026431015693, "clip_ratio/high_mean": 0.0010706787634262582, "clip_ratio/low_mean": 0.0014110391457506921, "clip_ratio/low_min": 0.00010851296519831521, "clip_ratio/region_mean": 0.0024817179510137066, "epoch": 0.06757316440042234, "grad_norm": 0.1136854961514473, "learning_rate": 1e-06, "loss": 0.041, "step": 724 }, { "clip_ratio/high_max": 0.0028791422737413086, "clip_ratio/high_mean": 0.0011597061711654533, "clip_ratio/low_mean": 0.0008778686897130683, "clip_ratio/low_min": 3.154972364427522e-05, "clip_ratio/region_mean": 0.0020375748863443732, "epoch": 0.06766649750042292, "grad_norm": 0.11603917181491852, "learning_rate": 1e-06, "loss": -0.0398, "step": 725 }, { "clip_ratio/high_max": 0.002593861492641736, "clip_ratio/high_mean": 0.001096424508432392, "clip_ratio/low_mean": 0.0013482749855029397, "clip_ratio/low_min": 0.000178154088644078, "clip_ratio/region_mean": 0.0024446995157632045, "epoch": 0.0677598306004235, "grad_norm": 0.09812898188829422, "learning_rate": 1e-06, "loss": -0.0021, "step": 726 }, { "clip_ratio/high_max": 0.0021078989921079483, "clip_ratio/high_mean": 0.0008909482312446926, "clip_ratio/low_mean": 0.001310424635448726, "clip_ratio/low_min": 0.00012902215712529141, "clip_ratio/region_mean": 0.0022013729176251218, "epoch": 0.06785316370042409, "grad_norm": 0.10486070066690445, "learning_rate": 1e-06, "loss": 0.0471, "step": 727 }, { "clip_ratio/high_max": 0.002501400376786478, "clip_ratio/high_mean": 0.0010108197602676228, "clip_ratio/low_mean": 0.0011508404641062953, "clip_ratio/low_min": 4.1113013139693066e-05, "clip_ratio/region_mean": 0.0021616602316498756, "epoch": 0.06794649680042467, "grad_norm": 0.11481024324893951, "learning_rate": 1e-06, "loss": 0.0269, "step": 728 }, { "clip_ratio/high_max": 0.0022550193170900457, "clip_ratio/high_mean": 0.0008807580161374062, "clip_ratio/low_mean": 0.001298228780797217, "clip_ratio/low_min": 0.00019226646054448793, "clip_ratio/region_mean": 0.002178986818762496, "epoch": 0.06803982990042524, "grad_norm": 0.23314028978347778, "learning_rate": 1e-06, "loss": 0.0884, "step": 729 }, { "clip_ratio/high_max": 0.002380192425334826, "clip_ratio/high_mean": 0.0009700238533696393, "clip_ratio/low_mean": 0.0015136128349695355, "clip_ratio/low_min": 0.00022444814658229006, "clip_ratio/region_mean": 0.002483636708348058, "epoch": 0.06813316300042584, "grad_norm": 61.04166793823242, "learning_rate": 1e-06, "loss": 0.0982, "step": 730 }, { "clip_ratio/high_max": 0.002765322686173022, "clip_ratio/high_mean": 0.00109710556716891, "clip_ratio/low_mean": 0.0010807329636008944, "clip_ratio/low_min": 4.8075104132294655e-05, "clip_ratio/region_mean": 0.002177838556235656, "epoch": 0.06822649610042641, "grad_norm": 0.15944159030914307, "learning_rate": 1e-06, "loss": -0.0055, "step": 731 }, { "clip_ratio/high_max": 0.0023373873336822726, "clip_ratio/high_mean": 0.0009826585719565628, "clip_ratio/low_mean": 0.0012317266664467752, "clip_ratio/low_min": 0.0001022003070829669, "clip_ratio/region_mean": 0.00221438523294637, "epoch": 0.06831982920042699, "grad_norm": 0.11558610945940018, "learning_rate": 1e-06, "loss": -0.0066, "step": 732 }, { "clip_ratio/high_max": 0.002130705863237381, "clip_ratio/high_mean": 0.0008369657953153364, "clip_ratio/low_mean": 0.0014816935399721842, "clip_ratio/low_min": 0.0003079702391914907, "clip_ratio/region_mean": 0.0023186594116850756, "epoch": 0.06841316230042759, "grad_norm": 0.11695141345262527, "learning_rate": 1e-06, "loss": 0.0627, "step": 733 }, { "clip_ratio/high_max": 0.002312582942977315, "clip_ratio/high_mean": 0.000989941087027546, "clip_ratio/low_mean": 0.0011645044050965225, "clip_ratio/low_min": 6.361932901199907e-05, "clip_ratio/region_mean": 0.0021544454430113547, "epoch": 0.06850649540042816, "grad_norm": 0.11236712336540222, "learning_rate": 1e-06, "loss": 0.0022, "step": 734 }, { "clip_ratio/high_max": 0.0022859539167257026, "clip_ratio/high_mean": 0.0009047948169609299, "clip_ratio/low_mean": 0.001427615710781538, "clip_ratio/low_min": 0.00017723374730849173, "clip_ratio/region_mean": 0.002332410542294383, "epoch": 0.06859982850042876, "grad_norm": 0.12946714460849762, "learning_rate": 1e-06, "loss": 0.0572, "step": 735 }, { "clip_ratio/high_max": 0.002316066420462448, "clip_ratio/high_mean": 0.0009481475117354421, "clip_ratio/low_mean": 0.001361851931505953, "clip_ratio/low_min": 0.00014957154417061247, "clip_ratio/region_mean": 0.0023099995305528864, "epoch": 0.06869316160042933, "grad_norm": 0.16663967072963715, "learning_rate": 1e-06, "loss": 0.0439, "step": 736 }, { "clip_ratio/high_max": 0.0024259949277620763, "clip_ratio/high_mean": 0.0012105186106055044, "clip_ratio/low_mean": 0.000957082309469115, "clip_ratio/low_min": 2.9418596568575595e-05, "clip_ratio/region_mean": 0.002167600927350577, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 4092.0, "completions/mean_length": 610.1763916015625, "completions/mean_terminated_length": 562.8575439453125, "completions/min_length": 3.0, "completions/min_terminated_length": 3.0, "epoch": 0.06878649470042991, "grad_norm": 1901.3165283203125, "learning_rate": 1e-06, "loss": 55.3028, "num_tokens": 570521796.0, "reward": 0.5895037055015564, "reward_std": 0.19454404711723328, "rewards/simpleverify_reward/mean": 0.5895037055015564, "rewards/simpleverify_reward/std": 0.49192607402801514, "step": 737 }, { "clip_ratio/high_max": 0.0025185917620547116, "clip_ratio/high_mean": 0.001024678191242856, "clip_ratio/low_mean": 0.001020813384457142, "clip_ratio/low_min": 0.00011270969935139874, "clip_ratio/region_mean": 0.002045491579337977, "epoch": 0.0688798278004305, "grad_norm": 11995.03515625, "learning_rate": 1e-06, "loss": 13.7257, "step": 738 }, { "clip_ratio/high_max": 0.0022589843574678525, "clip_ratio/high_mean": 0.00094279388031282, "clip_ratio/low_mean": 0.0010056657629320398, "clip_ratio/low_min": 0.00011417547466407996, "clip_ratio/region_mean": 0.001948459648701828, "epoch": 0.06897316090043108, "grad_norm": 458178656.0, "learning_rate": 1e-06, "loss": 291329.625, "step": 739 }, { "clip_ratio/high_max": 0.002194894078456855, "clip_ratio/high_mean": 0.0010231018181912077, "clip_ratio/low_mean": 0.0010900486522587016, "clip_ratio/low_min": 0.00015250716933223885, "clip_ratio/region_mean": 0.0021131505200173706, "epoch": 0.06906649400043166, "grad_norm": 1.656571388244629, "learning_rate": 1e-06, "loss": 0.0397, "step": 740 }, { "clip_ratio/high_max": 0.002340232997084968, "clip_ratio/high_mean": 0.0010958867460431065, "clip_ratio/low_mean": 0.0010355805643484928, "clip_ratio/low_min": 8.592584435973549e-05, "clip_ratio/region_mean": 0.0021314672922017053, "epoch": 0.06915982710043225, "grad_norm": 187562.546875, "learning_rate": 1e-06, "loss": 12.5408, "step": 741 }, { "clip_ratio/high_max": 0.0023647777488804422, "clip_ratio/high_mean": 0.0010473912479938008, "clip_ratio/low_mean": 0.0010617330631248478, "clip_ratio/low_min": 7.926825765025569e-05, "clip_ratio/region_mean": 0.0021091242888360284, "epoch": 0.06925316020043283, "grad_norm": 1575.906005859375, "learning_rate": 1e-06, "loss": 0.2248, "step": 742 }, { "clip_ratio/high_max": 0.002860847242118325, "clip_ratio/high_mean": 0.0010350693119107746, "clip_ratio/low_mean": 0.0012349658136372454, "clip_ratio/low_min": 0.0001598538774487679, "clip_ratio/region_mean": 0.002270035147375893, "epoch": 0.06934649330043341, "grad_norm": 7.796701431274414, "learning_rate": 1e-06, "loss": 0.1096, "step": 743 }, { "clip_ratio/high_max": 0.0026164639020862523, "clip_ratio/high_mean": 0.001119265049965179, "clip_ratio/low_mean": 0.0010607179283397272, "clip_ratio/low_min": 0.00013157120520190801, "clip_ratio/region_mean": 0.0021799830137751997, "epoch": 0.069439826400434, "grad_norm": 0.12173054367303848, "learning_rate": 1e-06, "loss": 0.0333, "step": 744 }, { "clip_ratio/high_max": 0.003021635231561959, "clip_ratio/high_mean": 0.0013215109902375843, "clip_ratio/low_mean": 0.0010101567841047654, "clip_ratio/low_min": 4.712853296950925e-05, "clip_ratio/region_mean": 0.0023316678052651696, "epoch": 0.06953315950043458, "grad_norm": 0.6599025130271912, "learning_rate": 1e-06, "loss": -0.0099, "step": 745 }, { "clip_ratio/high_max": 0.002587840805063024, "clip_ratio/high_mean": 0.0011933927598875016, "clip_ratio/low_mean": 0.001061537159330328, "clip_ratio/low_min": 0.00014597251538361888, "clip_ratio/region_mean": 0.00225492991739884, "epoch": 0.06962649260043517, "grad_norm": 0.12067849189043045, "learning_rate": 1e-06, "loss": -0.0214, "step": 746 }, { "clip_ratio/high_max": 0.0024288952190545388, "clip_ratio/high_mean": 0.0010250623054162133, "clip_ratio/low_mean": 0.0010926186459983, "clip_ratio/low_min": 0.0001026450154313352, "clip_ratio/region_mean": 0.0021176809823373333, "epoch": 0.06971982570043575, "grad_norm": 0.37357601523399353, "learning_rate": 1e-06, "loss": 0.0175, "step": 747 }, { "clip_ratio/high_max": 0.0031707603920949623, "clip_ratio/high_mean": 0.0013268662405607756, "clip_ratio/low_mean": 0.001109620649003773, "clip_ratio/low_min": 9.896720257529523e-05, "clip_ratio/region_mean": 0.002436486887745559, "epoch": 0.06981315880043633, "grad_norm": 0.12083915621042252, "learning_rate": 1e-06, "loss": 0.0015, "step": 748 }, { "clip_ratio/high_max": 0.002716463914111955, "clip_ratio/high_mean": 0.000957385678702849, "clip_ratio/low_mean": 0.001219520898303017, "clip_ratio/low_min": 8.752021221880568e-05, "clip_ratio/region_mean": 0.0021769066152046435, "epoch": 0.06990649190043692, "grad_norm": 0.1293472945690155, "learning_rate": 1e-06, "loss": 0.0929, "step": 749 }, { "clip_ratio/high_max": 0.002743180164543446, "clip_ratio/high_mean": 0.0010686141868063714, "clip_ratio/low_mean": 0.0013260503255878575, "clip_ratio/low_min": 5.803081785415998e-05, "clip_ratio/region_mean": 0.0023946644432726316, "epoch": 0.0699998250004375, "grad_norm": 0.10545019805431366, "learning_rate": 1e-06, "loss": 0.0645, "step": 750 }, { "clip_ratio/high_max": 0.0025358893690281548, "clip_ratio/high_mean": 0.0010852183077076916, "clip_ratio/low_mean": 0.0012669122552324552, "clip_ratio/low_min": 8.182271085388493e-05, "clip_ratio/region_mean": 0.0023521305120084435, "epoch": 0.07009315810043808, "grad_norm": 0.1239662915468216, "learning_rate": 1e-06, "loss": 0.0092, "step": 751 }, { "clip_ratio/high_max": 0.002627882560773287, "clip_ratio/high_mean": 0.001024362791213207, "clip_ratio/low_mean": 0.0012412054584274301, "clip_ratio/low_min": 9.672793930803891e-05, "clip_ratio/region_mean": 0.0022655682187178172, "epoch": 0.07018649120043867, "grad_norm": 0.13950856029987335, "learning_rate": 1e-06, "loss": 0.0126, "step": 752 }, { "clip_ratio/high_max": 0.0029165967243898194, "clip_ratio/high_mean": 0.00111639602255309, "clip_ratio/low_mean": 0.001319195311225485, "clip_ratio/low_min": 0.00010080587981065037, "clip_ratio/region_mean": 0.002435591391986236, "epoch": 0.07027982430043925, "grad_norm": 0.22012284398078918, "learning_rate": 1e-06, "loss": 0.0188, "step": 753 }, { "clip_ratio/high_max": 0.0027905923343496397, "clip_ratio/high_mean": 0.001184568762255367, "clip_ratio/low_mean": 0.0013675775480805896, "clip_ratio/low_min": 9.46670738812827e-05, "clip_ratio/region_mean": 0.0025521463321638294, "epoch": 0.07037315740043983, "grad_norm": 0.2553885281085968, "learning_rate": 1e-06, "loss": 0.0369, "step": 754 }, { "clip_ratio/high_max": 0.002629054506542161, "clip_ratio/high_mean": 0.0011348401567374822, "clip_ratio/low_mean": 0.0014029588528501336, "clip_ratio/low_min": 4.317703132983297e-05, "clip_ratio/region_mean": 0.0025377991260029376, "epoch": 0.07046649050044042, "grad_norm": 0.11253459751605988, "learning_rate": 1e-06, "loss": 0.0297, "step": 755 }, { "clip_ratio/high_max": 0.0026100531249539927, "clip_ratio/high_mean": 0.0010582643808447756, "clip_ratio/low_mean": 0.0013642092417285312, "clip_ratio/low_min": 0.00015064558101585135, "clip_ratio/region_mean": 0.0024224735680036247, "epoch": 0.070559823600441, "grad_norm": 0.20744431018829346, "learning_rate": 1e-06, "loss": 0.0393, "step": 756 }, { "clip_ratio/high_max": 0.0028485467410064302, "clip_ratio/high_mean": 0.0012469643152144272, "clip_ratio/low_mean": 0.0015671485452912748, "clip_ratio/low_min": 0.00010396844572824193, "clip_ratio/region_mean": 0.002814112900523469, "epoch": 0.07065315670044159, "grad_norm": 0.2889653742313385, "learning_rate": 1e-06, "loss": 0.0215, "step": 757 }, { "clip_ratio/high_max": 0.0028358777635730803, "clip_ratio/high_mean": 0.0012443721734598512, "clip_ratio/low_mean": 0.0012220137596159475, "clip_ratio/low_min": 0.00017459284572396427, "clip_ratio/region_mean": 0.0024663859730935656, "epoch": 0.07074648980044217, "grad_norm": 0.12828651070594788, "learning_rate": 1e-06, "loss": -0.0126, "step": 758 }, { "clip_ratio/high_max": 0.0025452781628700905, "clip_ratio/high_mean": 0.0011796855469583534, "clip_ratio/low_mean": 0.0013494804843503516, "clip_ratio/low_min": 1.7399777789250948e-05, "clip_ratio/region_mean": 0.002529166034946684, "epoch": 0.07083982290044274, "grad_norm": 0.2257552295923233, "learning_rate": 1e-06, "loss": -0.0108, "step": 759 }, { "clip_ratio/high_max": 0.002919017497333698, "clip_ratio/high_mean": 0.0012375586229609326, "clip_ratio/low_mean": 0.0011128117503176327, "clip_ratio/low_min": 4.926426299789455e-05, "clip_ratio/region_mean": 0.002350370370550081, "epoch": 0.07093315600044334, "grad_norm": 0.12018465995788574, "learning_rate": 1e-06, "loss": -0.024, "step": 760 }, { "clip_ratio/high_max": 0.0029345636794460006, "clip_ratio/high_mean": 0.001132534132921137, "clip_ratio/low_mean": 0.001423005018295953, "clip_ratio/low_min": 6.205457702890271e-05, "clip_ratio/region_mean": 0.0025555391621310264, "epoch": 0.07102648910044392, "grad_norm": 0.13740438222885132, "learning_rate": 1e-06, "loss": 0.0328, "step": 761 }, { "clip_ratio/high_max": 0.002372321981965797, "clip_ratio/high_mean": 0.0010106234876730014, "clip_ratio/low_mean": 0.0014293776803242508, "clip_ratio/low_min": 0.00011450143574620597, "clip_ratio/region_mean": 0.0024400011243415065, "epoch": 0.0711198222004445, "grad_norm": 0.13104663789272308, "learning_rate": 1e-06, "loss": 0.0532, "step": 762 }, { "clip_ratio/high_max": 0.002375677169766277, "clip_ratio/high_mean": 0.0009800430125324056, "clip_ratio/low_mean": 0.0015799014654476196, "clip_ratio/low_min": 4.874952355748974e-05, "clip_ratio/region_mean": 0.0025599445070838556, "epoch": 0.07121315530044509, "grad_norm": 0.7211567163467407, "learning_rate": 1e-06, "loss": 0.0282, "step": 763 }, { "clip_ratio/high_max": 0.002381478676397819, "clip_ratio/high_mean": 0.0008929906034609303, "clip_ratio/low_mean": 0.0019658247110783122, "clip_ratio/low_min": 0.00013837007372785592, "clip_ratio/region_mean": 0.0028588152999873273, "epoch": 0.07130648840044566, "grad_norm": 96.26534271240234, "learning_rate": 1e-06, "loss": 0.0913, "step": 764 }, { "clip_ratio/high_max": 0.0024116245622280985, "clip_ratio/high_mean": 0.0009714016705402173, "clip_ratio/low_mean": 0.0014594723106711172, "clip_ratio/low_min": 0.00010956234382319963, "clip_ratio/region_mean": 0.0024308740903506987, "epoch": 0.07139982150044624, "grad_norm": 0.12967278063297272, "learning_rate": 1e-06, "loss": 0.0719, "step": 765 }, { "clip_ratio/high_max": 0.0028174537801533006, "clip_ratio/high_mean": 0.0010660446296242299, "clip_ratio/low_mean": 0.0017296833102591336, "clip_ratio/low_min": 0.0003092023334829719, "clip_ratio/region_mean": 0.002795727923512459, "epoch": 0.07149315460044683, "grad_norm": 497.11798095703125, "learning_rate": 1e-06, "loss": 0.1236, "step": 766 }, { "clip_ratio/high_max": 0.002451124135404825, "clip_ratio/high_mean": 0.0010397819241916295, "clip_ratio/low_mean": 0.0014246764876588713, "clip_ratio/low_min": 0.0001576373515490559, "clip_ratio/region_mean": 0.0024644585282658227, "epoch": 0.07158648770044741, "grad_norm": 0.10416387766599655, "learning_rate": 1e-06, "loss": 0.0125, "step": 767 }, { "clip_ratio/high_max": 0.0029229553474579006, "clip_ratio/high_mean": 0.0010960493018501438, "clip_ratio/low_mean": 0.0013133260436006822, "clip_ratio/low_min": 0.00013450737151288195, "clip_ratio/region_mean": 0.0024093753963825293, "epoch": 0.071679820800448, "grad_norm": 0.11774514615535736, "learning_rate": 1e-06, "loss": -0.0233, "step": 768 }, { "clip_ratio/high_max": 0.0026815063756657764, "clip_ratio/high_mean": 0.0012512667854025494, "clip_ratio/low_mean": 0.0013472000537149142, "clip_ratio/low_min": 2.5557144908816554e-05, "clip_ratio/region_mean": 0.002598466795461718, "epoch": 0.07177315390044858, "grad_norm": 0.13921667635440826, "learning_rate": 1e-06, "loss": -0.0075, "step": 769 }, { "clip_ratio/high_max": 0.004260317735315766, "clip_ratio/high_mean": 0.0013174217856430914, "clip_ratio/low_mean": 0.0015055412623041775, "clip_ratio/low_min": 0.00014331521379062906, "clip_ratio/region_mean": 0.0028229629970155656, "epoch": 0.07186648700044916, "grad_norm": 0.15411517024040222, "learning_rate": 1e-06, "loss": 0.0181, "step": 770 }, { "clip_ratio/high_max": 0.002614982266095467, "clip_ratio/high_mean": 0.0010318012627976714, "clip_ratio/low_mean": 0.0013159202098904643, "clip_ratio/low_min": 0.00010681984804250533, "clip_ratio/region_mean": 0.0023477214708691463, "epoch": 0.07195982010044975, "grad_norm": 1.2771843671798706, "learning_rate": 1e-06, "loss": 0.0063, "step": 771 }, { "clip_ratio/high_max": 0.002211797567724716, "clip_ratio/high_mean": 0.001017159789626021, "clip_ratio/low_mean": 0.0014056354193598963, "clip_ratio/low_min": 0.0001349385165667627, "clip_ratio/region_mean": 0.002422795172606129, "epoch": 0.07205315320045033, "grad_norm": 0.11526695638895035, "learning_rate": 1e-06, "loss": 0.0144, "step": 772 }, { "clip_ratio/high_max": 0.0025003318587550893, "clip_ratio/high_mean": 0.0009228111994161736, "clip_ratio/low_mean": 0.0013565575682150666, "clip_ratio/low_min": 7.00051487001474e-05, "clip_ratio/region_mean": 0.002279368774907198, "epoch": 0.07214648630045091, "grad_norm": 0.3818627893924713, "learning_rate": 1e-06, "loss": 0.0552, "step": 773 }, { "clip_ratio/high_max": 0.0024978064247989096, "clip_ratio/high_mean": 0.0011297078963252716, "clip_ratio/low_mean": 0.0014146217872621492, "clip_ratio/low_min": 0.0001026806694426341, "clip_ratio/region_mean": 0.002544329712691251, "epoch": 0.0722398194004515, "grad_norm": 0.3097515106201172, "learning_rate": 1e-06, "loss": -0.0067, "step": 774 }, { "clip_ratio/high_max": 0.0025603243775549345, "clip_ratio/high_mean": 0.0010167901345994323, "clip_ratio/low_mean": 0.0016238366915786173, "clip_ratio/low_min": 0.0001610846957191825, "clip_ratio/region_mean": 0.002640626873471774, "epoch": 0.07233315250045208, "grad_norm": 0.21705465018749237, "learning_rate": 1e-06, "loss": 0.0081, "step": 775 }, { "clip_ratio/high_max": 0.0023104565625544637, "clip_ratio/high_mean": 0.0010679982751753414, "clip_ratio/low_mean": 0.0016751723960624076, "clip_ratio/low_min": 6.190402200445533e-05, "clip_ratio/region_mean": 0.0027431706548668444, "epoch": 0.07242648560045266, "grad_norm": 0.12319537997245789, "learning_rate": 1e-06, "loss": 0.0338, "step": 776 }, { "clip_ratio/high_max": 0.002255221043014899, "clip_ratio/high_mean": 0.0009452218710066518, "clip_ratio/low_mean": 0.0014244780504668597, "clip_ratio/low_min": 0.00010671240033843787, "clip_ratio/region_mean": 0.002369699941482395, "epoch": 0.07251981870045325, "grad_norm": 1.0070823431015015, "learning_rate": 1e-06, "loss": -0.0003, "step": 777 }, { "clip_ratio/high_max": 0.002453761058859527, "clip_ratio/high_mean": 0.000986099930742057, "clip_ratio/low_mean": 0.0012249174433236476, "clip_ratio/low_min": 0.0001155388217739528, "clip_ratio/region_mean": 0.0022110174104454927, "epoch": 0.07261315180045383, "grad_norm": 0.12033914774656296, "learning_rate": 1e-06, "loss": 0.0436, "step": 778 }, { "clip_ratio/high_max": 0.0027256453977315687, "clip_ratio/high_mean": 0.0010520487849134952, "clip_ratio/low_mean": 0.001619505444978131, "clip_ratio/low_min": 0.00022553786675416632, "clip_ratio/region_mean": 0.0026715542189776897, "epoch": 0.07270648490045442, "grad_norm": 0.12250439077615738, "learning_rate": 1e-06, "loss": 0.0564, "step": 779 }, { "clip_ratio/high_max": 0.002887733295210637, "clip_ratio/high_mean": 0.0011350037711963523, "clip_ratio/low_mean": 0.0013143059986759908, "clip_ratio/low_min": 0.00012444751882867422, "clip_ratio/region_mean": 0.0024493097662343644, "epoch": 0.072799818000455, "grad_norm": 0.5759520530700684, "learning_rate": 1e-06, "loss": 0.0125, "step": 780 }, { "clip_ratio/high_max": 0.0029090345196891576, "clip_ratio/high_mean": 0.0011424010845075827, "clip_ratio/low_mean": 0.0016882859526958782, "clip_ratio/low_min": 0.00017925473457580665, "clip_ratio/region_mean": 0.002830687000823673, "epoch": 0.07289315110045558, "grad_norm": 83.99930572509766, "learning_rate": 1e-06, "loss": 0.8566, "step": 781 }, { "clip_ratio/high_max": 0.0022162762797961477, "clip_ratio/high_mean": 0.001064435762600624, "clip_ratio/low_mean": 0.0015534348785877228, "clip_ratio/low_min": 0.0001091413214453496, "clip_ratio/region_mean": 0.002617870683025103, "epoch": 0.07298648420045617, "grad_norm": 24.931503295898438, "learning_rate": 1e-06, "loss": 0.0041, "step": 782 }, { "clip_ratio/high_max": 0.0022471648298960645, "clip_ratio/high_mean": 0.0008939484578149859, "clip_ratio/low_mean": 0.001784996555215912, "clip_ratio/low_min": 0.0001075128311640583, "clip_ratio/region_mean": 0.0026789450348587707, "epoch": 0.07307981730045675, "grad_norm": 4.502617835998535, "learning_rate": 1e-06, "loss": 0.0676, "step": 783 }, { "clip_ratio/high_max": 0.002785466807836201, "clip_ratio/high_mean": 0.0010771551606012508, "clip_ratio/low_mean": 0.0012221929100633133, "clip_ratio/low_min": 0.00016796332602098119, "clip_ratio/region_mean": 0.0022993480597506277, "epoch": 0.07317315040045733, "grad_norm": 0.12705950438976288, "learning_rate": 1e-06, "loss": 0.0474, "step": 784 }, { "clip_ratio/high_max": 0.0023336016020039096, "clip_ratio/high_mean": 0.0010187950792897027, "clip_ratio/low_mean": 0.0013977449234516826, "clip_ratio/low_min": 0.00021942931198282167, "clip_ratio/region_mean": 0.0024165400172933005, "epoch": 0.07326648350045792, "grad_norm": 3.4466166496276855, "learning_rate": 1e-06, "loss": 0.0021, "step": 785 }, { "clip_ratio/high_max": 0.0023920931489556096, "clip_ratio/high_mean": 0.0009995540422096383, "clip_ratio/low_mean": 0.0014322755196189974, "clip_ratio/low_min": 0.0001961415673576994, "clip_ratio/region_mean": 0.0024318295982084237, "epoch": 0.0733598166004585, "grad_norm": 8375429.0, "learning_rate": 1e-06, "loss": 1643.3694, "step": 786 }, { "clip_ratio/high_max": 0.0022965621028561145, "clip_ratio/high_mean": 0.0010004942741943523, "clip_ratio/low_mean": 0.0015754296036902815, "clip_ratio/low_min": 0.0003203391970600933, "clip_ratio/region_mean": 0.00257592395064421, "epoch": 0.07345314970045909, "grad_norm": 0.21115601062774658, "learning_rate": 1e-06, "loss": 0.044, "step": 787 }, { "clip_ratio/high_max": 0.0022607045466429554, "clip_ratio/high_mean": 0.000918513280339539, "clip_ratio/low_mean": 0.0014542408353008796, "clip_ratio/low_min": 1.2921232155349571e-05, "clip_ratio/region_mean": 0.0023727541556581855, "epoch": 0.07354648280045967, "grad_norm": 47.85354232788086, "learning_rate": 1e-06, "loss": 0.0244, "step": 788 }, { "clip_ratio/high_max": 0.0024737106941756792, "clip_ratio/high_mean": 0.0010195445211138576, "clip_ratio/low_mean": 0.0013340751465875655, "clip_ratio/low_min": 0.00010331808425689815, "clip_ratio/region_mean": 0.0023536196604254656, "epoch": 0.07363981590046025, "grad_norm": 215.67294311523438, "learning_rate": 1e-06, "loss": 0.2172, "step": 789 }, { "clip_ratio/high_max": 0.0023018523133941926, "clip_ratio/high_mean": 0.001017206803226145, "clip_ratio/low_mean": 0.0013939339041826315, "clip_ratio/low_min": 0.00019272493318567285, "clip_ratio/region_mean": 0.0024111407255986705, "epoch": 0.07373314900046084, "grad_norm": 0.12516115605831146, "learning_rate": 1e-06, "loss": 0.0358, "step": 790 }, { "clip_ratio/high_max": 0.002252256228530314, "clip_ratio/high_mean": 0.0009084639605134726, "clip_ratio/low_mean": 0.0011517660204845015, "clip_ratio/low_min": 7.832294795662165e-05, "clip_ratio/region_mean": 0.002060229948256165, "epoch": 0.07382648210046142, "grad_norm": 0.1127663180232048, "learning_rate": 1e-06, "loss": 0.0354, "step": 791 }, { "clip_ratio/high_max": 0.002542590729717631, "clip_ratio/high_mean": 0.0010722878650994971, "clip_ratio/low_mean": 0.0013257544924272224, "clip_ratio/low_min": 4.888391231361311e-05, "clip_ratio/region_mean": 0.0023980423211469315, "epoch": 0.073919815200462, "grad_norm": 0.0983455628156662, "learning_rate": 1e-06, "loss": -0.0185, "step": 792 }, { "clip_ratio/high_max": 0.0027854084328282624, "clip_ratio/high_mean": 0.001093721828510752, "clip_ratio/low_mean": 0.001555481219838839, "clip_ratio/low_min": 0.0002449233388688299, "clip_ratio/region_mean": 0.0026492030228837393, "epoch": 0.07401314830046259, "grad_norm": 0.12017360329627991, "learning_rate": 1e-06, "loss": 0.0532, "step": 793 }, { "clip_ratio/high_max": 0.0026098071502929088, "clip_ratio/high_mean": 0.0009595419160177698, "clip_ratio/low_mean": 0.0013041621168667916, "clip_ratio/low_min": 4.399183399073081e-05, "clip_ratio/region_mean": 0.0022637040383415297, "epoch": 0.07410648140046316, "grad_norm": 0.10721056908369064, "learning_rate": 1e-06, "loss": 0.0017, "step": 794 }, { "clip_ratio/high_max": 0.0022860275639686733, "clip_ratio/high_mean": 0.001027212001645239, "clip_ratio/low_mean": 0.0014301472438091878, "clip_ratio/low_min": 0.00010816502617672086, "clip_ratio/region_mean": 0.002457359281834215, "epoch": 0.07419981450046374, "grad_norm": 0.111719511449337, "learning_rate": 1e-06, "loss": 0.0026, "step": 795 }, { "clip_ratio/high_max": 0.002570167423982639, "clip_ratio/high_mean": 0.0010109798404300818, "clip_ratio/low_mean": 0.0015996165020624176, "clip_ratio/low_min": 0.00018007378275797237, "clip_ratio/region_mean": 0.0026105962897418067, "epoch": 0.07429314760046433, "grad_norm": 0.12304951250553131, "learning_rate": 1e-06, "loss": 0.0082, "step": 796 }, { "clip_ratio/high_max": 0.0025197822105837986, "clip_ratio/high_mean": 0.0011801023247244302, "clip_ratio/low_mean": 0.0011522752756718546, "clip_ratio/low_min": 9.680084076535422e-05, "clip_ratio/region_mean": 0.0023323775531025603, "epoch": 0.07438648070046491, "grad_norm": 0.11943760514259338, "learning_rate": 1e-06, "loss": -0.0319, "step": 797 }, { "clip_ratio/high_max": 0.0021831479825777933, "clip_ratio/high_mean": 0.0010424584634165512, "clip_ratio/low_mean": 0.0015146094701776747, "clip_ratio/low_min": 0.0001680079658399336, "clip_ratio/region_mean": 0.0025570679135853425, "epoch": 0.0744798138004655, "grad_norm": 0.12011537700891495, "learning_rate": 1e-06, "loss": 0.0519, "step": 798 }, { "clip_ratio/high_max": 0.002574672755145002, "clip_ratio/high_mean": 0.000999525287625147, "clip_ratio/low_mean": 0.0013867078923794907, "clip_ratio/low_min": 9.164701805275399e-05, "clip_ratio/region_mean": 0.002386233158176765, "epoch": 0.07457314690046608, "grad_norm": 0.1203724592924118, "learning_rate": 1e-06, "loss": 0.0265, "step": 799 }, { "clip_ratio/high_max": 0.002303191984537989, "clip_ratio/high_mean": 0.0009787777307792567, "clip_ratio/low_mean": 0.0013409042840066832, "clip_ratio/low_min": 8.923406130634248e-05, "clip_ratio/region_mean": 0.0023196820038720034, "epoch": 0.07466648000046666, "grad_norm": 0.1313478797674179, "learning_rate": 1e-06, "loss": 0.0211, "step": 800 }, { "clip_ratio/high_max": 0.008327030911459588, "clip_ratio/high_mean": 0.0036666516316472553, "clip_ratio/low_mean": 0.002788988291285932, "clip_ratio/low_min": 0.0002465945744916098, "clip_ratio/region_mean": 0.006455639857449569, "completions/clipped_ratio": 0.013139997209821397, "completions/max_length": 4096.0, "completions/max_terminated_length": 4064.0, "completions/mean_length": 605.9010009765625, "completions/mean_terminated_length": 559.430419921875, "completions/min_length": 3.0, "completions/min_terminated_length": 3.0, "epoch": 0.07475981310046725, "grad_norm": 101064.3125, "learning_rate": 1e-06, "loss": 1733865.5, "num_tokens": 651537526.0, "reward": 0.5855277180671692, "reward_std": 0.19071511924266815, "rewards/simpleverify_reward/mean": 0.5855277180671692, "rewards/simpleverify_reward/std": 0.4926328957080841, "step": 801 }, { "clip_ratio/high_max": 0.0035391675191931427, "clip_ratio/high_mean": 0.0014897661749273539, "clip_ratio/low_mean": 0.00189780623986735, "clip_ratio/low_min": 0.00025063848806894384, "clip_ratio/region_mean": 0.003387572374776937, "epoch": 0.07485314620046783, "grad_norm": 19597.63671875, "learning_rate": 1e-06, "loss": 7289.4453, "step": 802 }, { "clip_ratio/high_max": 0.002509581536287442, "clip_ratio/high_mean": 0.0011021544814866502, "clip_ratio/low_mean": 0.001305746194702806, "clip_ratio/low_min": 6.73087697578012e-05, "clip_ratio/region_mean": 0.0024079006907413714, "epoch": 0.07494647930046841, "grad_norm": 23015.837890625, "learning_rate": 1e-06, "loss": 5.4413, "step": 803 }, { "clip_ratio/high_max": 0.002721167569688987, "clip_ratio/high_mean": 0.0011376955735613592, "clip_ratio/low_mean": 0.0014100532171141822, "clip_ratio/low_min": 0.00012447980952856597, "clip_ratio/region_mean": 0.0025477488597971387, "epoch": 0.075039812400469, "grad_norm": 0.4986385107040405, "learning_rate": 1e-06, "loss": 0.0291, "step": 804 }, { "clip_ratio/high_max": 0.0027081956941401586, "clip_ratio/high_mean": 0.0010714517775340937, "clip_ratio/low_mean": 0.0011555775909073418, "clip_ratio/low_min": 8.060287655098364e-05, "clip_ratio/region_mean": 0.0022270293193287216, "epoch": 0.07513314550046958, "grad_norm": 0.12783238291740417, "learning_rate": 1e-06, "loss": 0.038, "step": 805 }, { "clip_ratio/high_max": 0.002583015761047136, "clip_ratio/high_mean": 0.0010607417025312316, "clip_ratio/low_mean": 0.0012262309246580116, "clip_ratio/low_min": 0.0001517219661764102, "clip_ratio/region_mean": 0.0022869726381031796, "epoch": 0.07522647860047016, "grad_norm": 33.71275329589844, "learning_rate": 1e-06, "loss": 0.1202, "step": 806 }, { "clip_ratio/high_max": 0.002716500141104916, "clip_ratio/high_mean": 0.0010896660460275598, "clip_ratio/low_mean": 0.0012519440861069597, "clip_ratio/low_min": 0.00012644107846426778, "clip_ratio/region_mean": 0.0023416101685143076, "epoch": 0.07531981170047075, "grad_norm": 69.53738403320312, "learning_rate": 1e-06, "loss": 0.0335, "step": 807 }, { "clip_ratio/high_max": 0.0027018263426725753, "clip_ratio/high_mean": 0.001221783459186554, "clip_ratio/low_mean": 0.0010398273025202798, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002261610767163802, "epoch": 0.07541314480047133, "grad_norm": 0.14749875664710999, "learning_rate": 1e-06, "loss": -0.0101, "step": 808 }, { "clip_ratio/high_max": 0.00294557467350387, "clip_ratio/high_mean": 0.0012184558254375588, "clip_ratio/low_mean": 0.0014991872558312025, "clip_ratio/low_min": 5.789188253402244e-05, "clip_ratio/region_mean": 0.002717643088544719, "epoch": 0.07550647790047192, "grad_norm": 1811309.125, "learning_rate": 1e-06, "loss": 249.5219, "step": 809 }, { "clip_ratio/high_max": 0.002644067702931352, "clip_ratio/high_mean": 0.0011352808687661309, "clip_ratio/low_mean": 0.0013471494676196016, "clip_ratio/low_min": 0.00022321876895148307, "clip_ratio/region_mean": 0.0024824303327477537, "epoch": 0.0755998110004725, "grad_norm": 55.80194091796875, "learning_rate": 1e-06, "loss": 0.0318, "step": 810 }, { "clip_ratio/high_max": 0.0026288292938261293, "clip_ratio/high_mean": 0.0010290294430888025, "clip_ratio/low_mean": 0.0014469246525550261, "clip_ratio/low_min": 7.32876205802313e-05, "clip_ratio/region_mean": 0.002475954075634945, "epoch": 0.07569314410047308, "grad_norm": 1.3655775785446167, "learning_rate": 1e-06, "loss": 0.0674, "step": 811 }, { "clip_ratio/high_max": 0.002630128357850481, "clip_ratio/high_mean": 0.001060525046341354, "clip_ratio/low_mean": 0.001312859483732609, "clip_ratio/low_min": 0.0001577770426592906, "clip_ratio/region_mean": 0.002373384493694175, "epoch": 0.07578647720047367, "grad_norm": 1.169458031654358, "learning_rate": 1e-06, "loss": 0.0438, "step": 812 }, { "clip_ratio/high_max": 0.002709727705223486, "clip_ratio/high_mean": 0.0011132797808386385, "clip_ratio/low_mean": 0.0013109297105984297, "clip_ratio/low_min": 0.00010995767479471397, "clip_ratio/region_mean": 0.0024242095387307927, "epoch": 0.07587981030047425, "grad_norm": 0.12129896134138107, "learning_rate": 1e-06, "loss": 0.0113, "step": 813 }, { "clip_ratio/high_max": 0.0026854495008592494, "clip_ratio/high_mean": 0.0011012501836376032, "clip_ratio/low_mean": 0.001404240909323562, "clip_ratio/low_min": 0.00014764164734515361, "clip_ratio/region_mean": 0.0025054911166080274, "epoch": 0.07597314340047483, "grad_norm": 0.2171059250831604, "learning_rate": 1e-06, "loss": 0.0551, "step": 814 }, { "clip_ratio/high_max": 0.002810167999996338, "clip_ratio/high_mean": 0.0011666692116705235, "clip_ratio/low_mean": 0.0014704880968565703, "clip_ratio/low_min": 0.00014680441290693125, "clip_ratio/region_mean": 0.0026371573185315356, "epoch": 0.07606647650047542, "grad_norm": 0.165121927857399, "learning_rate": 1e-06, "loss": 0.0491, "step": 815 }, { "clip_ratio/high_max": 0.0034815813341992907, "clip_ratio/high_mean": 0.0013558651589846704, "clip_ratio/low_mean": 0.001244781360583147, "clip_ratio/low_min": 8.153441740432754e-05, "clip_ratio/region_mean": 0.0026006465195678174, "epoch": 0.076159809600476, "grad_norm": 0.1426796317100525, "learning_rate": 1e-06, "loss": -0.047, "step": 816 }, { "clip_ratio/high_max": 0.0026250017108395696, "clip_ratio/high_mean": 0.0011445680211181752, "clip_ratio/low_mean": 0.0012474640061554965, "clip_ratio/low_min": 0.00010768878382805269, "clip_ratio/region_mean": 0.002392031987255905, "epoch": 0.07625314270047658, "grad_norm": 0.14920775592327118, "learning_rate": 1e-06, "loss": 0.0132, "step": 817 }, { "clip_ratio/high_max": 0.002687466454517562, "clip_ratio/high_mean": 0.0011310209883959033, "clip_ratio/low_mean": 0.001391334462823579, "clip_ratio/low_min": 0.00012124189561291132, "clip_ratio/region_mean": 0.0025223554475815035, "epoch": 0.07634647580047717, "grad_norm": 0.10995069146156311, "learning_rate": 1e-06, "loss": 0.0018, "step": 818 }, { "clip_ratio/high_max": 0.002700510769500397, "clip_ratio/high_mean": 0.0009833559070102638, "clip_ratio/low_mean": 0.001493043004302308, "clip_ratio/low_min": 5.635131128656212e-05, "clip_ratio/region_mean": 0.0024763989058556035, "epoch": 0.07643980890047775, "grad_norm": 0.1840856671333313, "learning_rate": 1e-06, "loss": 0.0847, "step": 819 }, { "clip_ratio/high_max": 0.0029620448403875344, "clip_ratio/high_mean": 0.001259825916349655, "clip_ratio/low_mean": 0.0013704102639167104, "clip_ratio/low_min": 3.171020762238186e-05, "clip_ratio/region_mean": 0.0026302362311980687, "epoch": 0.07653314200047834, "grad_norm": 0.14069418609142303, "learning_rate": 1e-06, "loss": -0.0035, "step": 820 }, { "clip_ratio/high_max": 0.0025293509315815754, "clip_ratio/high_mean": 0.001065507911334862, "clip_ratio/low_mean": 0.0012286804376344662, "clip_ratio/low_min": 6.277704142121365e-05, "clip_ratio/region_mean": 0.0022941883289604448, "epoch": 0.07662647510047892, "grad_norm": 0.11177564412355423, "learning_rate": 1e-06, "loss": 0.0459, "step": 821 }, { "clip_ratio/high_max": 0.0022356047338689677, "clip_ratio/high_mean": 0.0010488571224414045, "clip_ratio/low_mean": 0.0014486389045487158, "clip_ratio/low_min": 0.00027421891172707547, "clip_ratio/region_mean": 0.0024974959524115548, "epoch": 0.0767198082004795, "grad_norm": 0.7017294764518738, "learning_rate": 1e-06, "loss": 0.0433, "step": 822 }, { "clip_ratio/high_max": 0.0027817498448712286, "clip_ratio/high_mean": 0.0011642079134617234, "clip_ratio/low_mean": 0.001469093287596479, "clip_ratio/low_min": 9.905035858537303e-05, "clip_ratio/region_mean": 0.002633301235619001, "epoch": 0.07681314130048009, "grad_norm": 32.07141876220703, "learning_rate": 1e-06, "loss": 0.0499, "step": 823 }, { "clip_ratio/high_max": 0.0024185820075217634, "clip_ratio/high_mean": 0.0010990880546160042, "clip_ratio/low_mean": 0.0013953841153124813, "clip_ratio/low_min": 0.0001409956985298777, "clip_ratio/region_mean": 0.002494472180842422, "epoch": 0.07690647440048066, "grad_norm": 0.13829414546489716, "learning_rate": 1e-06, "loss": 0.0312, "step": 824 }, { "clip_ratio/high_max": 0.00220758964860579, "clip_ratio/high_mean": 0.0010455196133989375, "clip_ratio/low_mean": 0.0016037230016081594, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002649242691404652, "epoch": 0.07699980750048124, "grad_norm": 0.35364866256713867, "learning_rate": 1e-06, "loss": 0.0271, "step": 825 }, { "clip_ratio/high_max": 0.002517393382731825, "clip_ratio/high_mean": 0.0010453175491420552, "clip_ratio/low_mean": 0.0015536792861894355, "clip_ratio/low_min": 0.00026197274837613804, "clip_ratio/region_mean": 0.002598996870801784, "epoch": 0.07709314060048184, "grad_norm": 1.539804220199585, "learning_rate": 1e-06, "loss": 0.0404, "step": 826 }, { "clip_ratio/high_max": 0.002512122860935051, "clip_ratio/high_mean": 0.0009638931824156316, "clip_ratio/low_mean": 0.0013821202337567229, "clip_ratio/low_min": 0.0001671765585342655, "clip_ratio/region_mean": 0.002346013432543259, "epoch": 0.07718647370048241, "grad_norm": 0.10920978337526321, "learning_rate": 1e-06, "loss": 0.0172, "step": 827 }, { "clip_ratio/high_max": 0.0025429152592550963, "clip_ratio/high_mean": 0.0011168379787704907, "clip_ratio/low_mean": 0.0011847098503494635, "clip_ratio/low_min": 1.5111218999663834e-05, "clip_ratio/region_mean": 0.002301547836395912, "epoch": 0.07727980680048299, "grad_norm": 6.9402337074279785, "learning_rate": 1e-06, "loss": 0.014, "step": 828 }, { "clip_ratio/high_max": 0.002954852840048261, "clip_ratio/high_mean": 0.0012403167311276775, "clip_ratio/low_mean": 0.001396391066009528, "clip_ratio/low_min": 0.000214487326957169, "clip_ratio/region_mean": 0.0026367077734903432, "epoch": 0.07737313990048358, "grad_norm": 0.11157985031604767, "learning_rate": 1e-06, "loss": -0.0154, "step": 829 }, { "clip_ratio/high_max": 0.002259429445985006, "clip_ratio/high_mean": 0.0009761795172380516, "clip_ratio/low_mean": 0.0015210828241833951, "clip_ratio/low_min": 1.156122834800044e-05, "clip_ratio/region_mean": 0.002497262386896182, "epoch": 0.07746647300048416, "grad_norm": 0.12391701340675354, "learning_rate": 1e-06, "loss": -0.0, "step": 830 }, { "clip_ratio/high_max": 0.0029618892367579974, "clip_ratio/high_mean": 0.001162765456683701, "clip_ratio/low_mean": 0.0018380040055490099, "clip_ratio/low_min": 4.736358823720366e-05, "clip_ratio/region_mean": 0.003000769480422605, "epoch": 0.07755980610048475, "grad_norm": 0.6005366444587708, "learning_rate": 1e-06, "loss": 0.0438, "step": 831 }, { "clip_ratio/high_max": 0.002424318459816277, "clip_ratio/high_mean": 0.0009576944721629843, "clip_ratio/low_mean": 0.0013155195047147572, "clip_ratio/low_min": 0.00012579851136251818, "clip_ratio/region_mean": 0.002273214020533487, "epoch": 0.07765313920048533, "grad_norm": 0.09429342299699783, "learning_rate": 1e-06, "loss": 0.0149, "step": 832 }, { "clip_ratio/high_max": 0.0026715394415077753, "clip_ratio/high_mean": 0.0010084235873364378, "clip_ratio/low_mean": 0.001314596220254316, "clip_ratio/low_min": 0.00011282059494988061, "clip_ratio/region_mean": 0.002323019791219849, "epoch": 0.07774647230048591, "grad_norm": 0.10347605496644974, "learning_rate": 1e-06, "loss": 0.0141, "step": 833 }, { "clip_ratio/high_max": 0.0024801087674859446, "clip_ratio/high_mean": 0.0009608512318663998, "clip_ratio/low_mean": 0.0013840262654412072, "clip_ratio/low_min": 9.610293000150705e-05, "clip_ratio/region_mean": 0.002344877430004999, "epoch": 0.0778398054004865, "grad_norm": 0.0953790545463562, "learning_rate": 1e-06, "loss": 0.0427, "step": 834 }, { "clip_ratio/high_max": 0.0029015839827479795, "clip_ratio/high_mean": 0.0010931900651485194, "clip_ratio/low_mean": 0.001293820434511872, "clip_ratio/low_min": 8.856584463501349e-05, "clip_ratio/region_mean": 0.002387010506936349, "epoch": 0.07793313850048708, "grad_norm": 0.11080574244260788, "learning_rate": 1e-06, "loss": -0.0247, "step": 835 }, { "clip_ratio/high_max": 0.0023929066155687906, "clip_ratio/high_mean": 0.001115258888603421, "clip_ratio/low_mean": 0.0014133512413536664, "clip_ratio/low_min": 6.461722387030022e-05, "clip_ratio/region_mean": 0.002528610159060918, "epoch": 0.07802647160048766, "grad_norm": 0.1153893694281578, "learning_rate": 1e-06, "loss": -0.0209, "step": 836 }, { "clip_ratio/high_max": 0.0022937074827495962, "clip_ratio/high_mean": 0.0010396977222626447, "clip_ratio/low_mean": 0.0012711027175100753, "clip_ratio/low_min": 3.580635893740691e-05, "clip_ratio/region_mean": 0.002310800460691098, "epoch": 0.07811980470048825, "grad_norm": 41.6697998046875, "learning_rate": 1e-06, "loss": 0.0717, "step": 837 }, { "clip_ratio/high_max": 0.002025997415330494, "clip_ratio/high_mean": 0.0008962771662481828, "clip_ratio/low_mean": 0.0011883129209309118, "clip_ratio/low_min": 1.5747040379210375e-05, "clip_ratio/region_mean": 0.002084590116282925, "epoch": 0.07821313780048883, "grad_norm": 0.1756000518798828, "learning_rate": 1e-06, "loss": 1.6431, "step": 838 }, { "clip_ratio/high_max": 0.0022138013337098528, "clip_ratio/high_mean": 0.0010341593697376084, "clip_ratio/low_mean": 0.0013606284592242446, "clip_ratio/low_min": 9.632536603021435e-05, "clip_ratio/region_mean": 0.002394787887169514, "epoch": 0.07830647090048942, "grad_norm": 0.247129425406456, "learning_rate": 1e-06, "loss": 0.0188, "step": 839 }, { "clip_ratio/high_max": 0.00232323283853475, "clip_ratio/high_mean": 0.0009981944422179367, "clip_ratio/low_mean": 0.0011628296851995401, "clip_ratio/low_min": 2.7746948035201058e-05, "clip_ratio/region_mean": 0.002161024145607371, "epoch": 0.07839980400049, "grad_norm": 0.10771391540765762, "learning_rate": 1e-06, "loss": 0.0204, "step": 840 }, { "clip_ratio/high_max": 0.0023740843244013377, "clip_ratio/high_mean": 0.0009991943079512566, "clip_ratio/low_mean": 0.0015144946592045017, "clip_ratio/low_min": 0.00012666471229749732, "clip_ratio/region_mean": 0.002513689032639377, "epoch": 0.07849313710049058, "grad_norm": 0.14446231722831726, "learning_rate": 1e-06, "loss": 0.0265, "step": 841 }, { "clip_ratio/high_max": 0.0024020333694352303, "clip_ratio/high_mean": 0.0009544108434056398, "clip_ratio/low_mean": 0.0013701232019229792, "clip_ratio/low_min": 5.891822092962684e-05, "clip_ratio/region_mean": 0.002324534085346386, "epoch": 0.07858647020049117, "grad_norm": 0.12900720536708832, "learning_rate": 1e-06, "loss": 0.0359, "step": 842 }, { "clip_ratio/high_max": 0.002759324728685897, "clip_ratio/high_mean": 0.001045010121742962, "clip_ratio/low_mean": 0.00148967353743501, "clip_ratio/low_min": 0.0001977697011170676, "clip_ratio/region_mean": 0.002534683677367866, "epoch": 0.07867980330049175, "grad_norm": 0.11388585716485977, "learning_rate": 1e-06, "loss": 0.0138, "step": 843 }, { "clip_ratio/high_max": 0.0022038313472876325, "clip_ratio/high_mean": 0.001007417988148518, "clip_ratio/low_mean": 0.001209892718179617, "clip_ratio/low_min": 9.760590819496429e-05, "clip_ratio/region_mean": 0.0022173106990521774, "epoch": 0.07877313640049233, "grad_norm": 0.13638710975646973, "learning_rate": 1e-06, "loss": -0.0179, "step": 844 }, { "clip_ratio/high_max": 0.0021493658314284403, "clip_ratio/high_mean": 0.0009132369596045464, "clip_ratio/low_mean": 0.0011534922778082546, "clip_ratio/low_min": 8.425274245382752e-05, "clip_ratio/region_mean": 0.0020667292701546103, "epoch": 0.07886646950049292, "grad_norm": 0.10076100379228592, "learning_rate": 1e-06, "loss": 0.0396, "step": 845 }, { "clip_ratio/high_max": 0.0025229732418665662, "clip_ratio/high_mean": 0.0010883177965297364, "clip_ratio/low_mean": 0.001250967357918853, "clip_ratio/low_min": 0.00013781040979665704, "clip_ratio/region_mean": 0.0023392851362586953, "epoch": 0.0789598026004935, "grad_norm": 0.11551445722579956, "learning_rate": 1e-06, "loss": 0.0064, "step": 846 }, { "clip_ratio/high_max": 0.0023669461043027695, "clip_ratio/high_mean": 0.0009316054456576239, "clip_ratio/low_mean": 0.0012426102512108628, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002174215725972317, "epoch": 0.07905313570049408, "grad_norm": 0.11276073008775711, "learning_rate": 1e-06, "loss": 0.0209, "step": 847 }, { "clip_ratio/high_max": 0.002016357317188522, "clip_ratio/high_mean": 0.0008894437523849774, "clip_ratio/low_mean": 0.0014651426463387907, "clip_ratio/low_min": 4.2508221667958423e-05, "clip_ratio/region_mean": 0.002354586438741535, "epoch": 0.07914646880049467, "grad_norm": 0.1048005074262619, "learning_rate": 1e-06, "loss": 0.052, "step": 848 }, { "clip_ratio/high_max": 0.0025408468427485786, "clip_ratio/high_mean": 0.001019345876557054, "clip_ratio/low_mean": 0.0014447482353716623, "clip_ratio/low_min": 7.6112450187793e-05, "clip_ratio/region_mean": 0.002464094097376801, "epoch": 0.07923980190049525, "grad_norm": 0.1779157817363739, "learning_rate": 1e-06, "loss": 0.017, "step": 849 }, { "clip_ratio/high_max": 0.0025285063311457634, "clip_ratio/high_mean": 0.0010080963365908246, "clip_ratio/low_mean": 0.0011521328415255994, "clip_ratio/low_min": 5.75060412302264e-05, "clip_ratio/region_mean": 0.0021602292472380213, "epoch": 0.07933313500049584, "grad_norm": 0.1164686307311058, "learning_rate": 1e-06, "loss": 0.0365, "step": 850 }, { "clip_ratio/high_max": 0.002545826879213564, "clip_ratio/high_mean": 0.0011510423755680677, "clip_ratio/low_mean": 0.00121481356836739, "clip_ratio/low_min": 0.00014498043037747266, "clip_ratio/region_mean": 0.0023658558930037543, "epoch": 0.07942646810049642, "grad_norm": 0.10660649091005325, "learning_rate": 1e-06, "loss": -0.0008, "step": 851 }, { "clip_ratio/high_max": 0.002732219531026203, "clip_ratio/high_mean": 0.0011672659693431342, "clip_ratio/low_mean": 0.0013485058007063344, "clip_ratio/low_min": 0.00011175960844411748, "clip_ratio/region_mean": 0.002515771753678564, "epoch": 0.079519801200497, "grad_norm": 0.14384303987026215, "learning_rate": 1e-06, "loss": -0.0027, "step": 852 }, { "clip_ratio/high_max": 0.002412931527942419, "clip_ratio/high_mean": 0.000984912263447768, "clip_ratio/low_mean": 0.001190990034956485, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0021759023074992, "epoch": 0.07961313430049759, "grad_norm": 0.2093774378299713, "learning_rate": 1e-06, "loss": 0.0394, "step": 853 }, { "clip_ratio/high_max": 0.0021997795774950646, "clip_ratio/high_mean": 0.0008447825930488762, "clip_ratio/low_mean": 0.0010668647373677231, "clip_ratio/low_min": 5.8082654504687525e-05, "clip_ratio/region_mean": 0.0019116473049507476, "epoch": 0.07970646740049817, "grad_norm": 0.10855469107627869, "learning_rate": 1e-06, "loss": 0.0238, "step": 854 }, { "clip_ratio/high_max": 0.0021874937920074444, "clip_ratio/high_mean": 0.0008731650068511954, "clip_ratio/low_mean": 0.0013044651514064753, "clip_ratio/low_min": 0.0001932637478603283, "clip_ratio/region_mean": 0.0021776301946374588, "epoch": 0.07979980050049874, "grad_norm": 0.12426037341356277, "learning_rate": 1e-06, "loss": 0.0225, "step": 855 }, { "clip_ratio/high_max": 0.0026216671540169045, "clip_ratio/high_mean": 0.0009840668353717774, "clip_ratio/low_mean": 0.001274728088901611, "clip_ratio/low_min": 0.00019591110958572244, "clip_ratio/region_mean": 0.0022587949206354097, "epoch": 0.07989313360049934, "grad_norm": 0.12642495334148407, "learning_rate": 1e-06, "loss": 0.0216, "step": 856 }, { "clip_ratio/high_max": 0.002338920116017107, "clip_ratio/high_mean": 0.0009410486327396939, "clip_ratio/low_mean": 0.0012541345859062858, "clip_ratio/low_min": 6.535313696076628e-05, "clip_ratio/region_mean": 0.00219518318772316, "epoch": 0.07998646670049991, "grad_norm": 0.11876281350851059, "learning_rate": 1e-06, "loss": 0.0316, "step": 857 }, { "clip_ratio/high_max": 0.0022746940376237035, "clip_ratio/high_mean": 0.0010265095697832294, "clip_ratio/low_mean": 0.0011364503880031407, "clip_ratio/low_min": 6.768035109416815e-05, "clip_ratio/region_mean": 0.002162959979614243, "epoch": 0.08007979980050049, "grad_norm": 0.30983495712280273, "learning_rate": 1e-06, "loss": -0.0163, "step": 858 }, { "clip_ratio/high_max": 0.0020234958465152886, "clip_ratio/high_mean": 0.0009981917828554288, "clip_ratio/low_mean": 0.0012964901616214775, "clip_ratio/low_min": 0.00011418876238167286, "clip_ratio/region_mean": 0.0022946819008211605, "epoch": 0.08017313290050108, "grad_norm": 0.16812914609909058, "learning_rate": 1e-06, "loss": 0.0122, "step": 859 }, { "clip_ratio/high_max": 0.0021906105321249925, "clip_ratio/high_mean": 0.00092416816187324, "clip_ratio/low_mean": 0.0013499877786671277, "clip_ratio/low_min": 0.00013236531958682463, "clip_ratio/region_mean": 0.0022741559296264313, "epoch": 0.08026646600050166, "grad_norm": 0.10505940020084381, "learning_rate": 1e-06, "loss": 0.0525, "step": 860 }, { "clip_ratio/high_max": 0.0021604920984827913, "clip_ratio/high_mean": 0.0008747429728828138, "clip_ratio/low_mean": 0.001236018375493586, "clip_ratio/low_min": 6.750577631464694e-05, "clip_ratio/region_mean": 0.0021107612919877283, "epoch": 0.08035979910050225, "grad_norm": 0.11796213686466217, "learning_rate": 1e-06, "loss": 0.0107, "step": 861 }, { "clip_ratio/high_max": 0.0022350820872816257, "clip_ratio/high_mean": 0.0009030160654219799, "clip_ratio/low_mean": 0.001405017279466847, "clip_ratio/low_min": 0.0001911307881528046, "clip_ratio/region_mean": 0.002308033363078721, "epoch": 0.08045313220050283, "grad_norm": 0.12946298718452454, "learning_rate": 1e-06, "loss": 0.06, "step": 862 }, { "clip_ratio/high_max": 0.0022636927678831853, "clip_ratio/high_mean": 0.0009499923035036772, "clip_ratio/low_mean": 0.001223393981490517, "clip_ratio/low_min": 4.709288350568386e-05, "clip_ratio/region_mean": 0.0021733862449764274, "epoch": 0.08054646530050341, "grad_norm": 0.11522744596004486, "learning_rate": 1e-06, "loss": -0.0019, "step": 863 }, { "clip_ratio/high_max": 0.002344078846363118, "clip_ratio/high_mean": 0.0010036355979536893, "clip_ratio/low_mean": 0.0011440496382419951, "clip_ratio/low_min": 0.00010483691221452318, "clip_ratio/region_mean": 0.002147685234376695, "epoch": 0.080639798400504, "grad_norm": 0.10673494637012482, "learning_rate": 1e-06, "loss": -0.0162, "step": 864 }, { "clip_ratio/high_max": 0.001942062302987324, "clip_ratio/high_mean": 0.0008828175168673624, "clip_ratio/low_mean": 0.001247084503120277, "clip_ratio/low_min": 9.555863107379992e-05, "clip_ratio/region_mean": 0.0021299020008882508, "epoch": 0.08073313150050458, "grad_norm": 0.11050200462341309, "learning_rate": 1e-06, "loss": 0.0402, "step": 865 }, { "clip_ratio/high_max": 0.0023977595774340443, "clip_ratio/high_mean": 0.0009578078825143166, "clip_ratio/low_mean": 0.0013287999026942998, "clip_ratio/low_min": 6.0110775848443154e-05, "clip_ratio/region_mean": 0.002286607770656701, "epoch": 0.08082646460050516, "grad_norm": 0.11569397896528244, "learning_rate": 1e-06, "loss": 0.0594, "step": 866 }, { "clip_ratio/high_max": 0.002359185309614986, "clip_ratio/high_mean": 0.00098547071320354, "clip_ratio/low_mean": 0.0012392127791827079, "clip_ratio/low_min": 6.680681690340862e-05, "clip_ratio/region_mean": 0.00222468345600646, "epoch": 0.08091979770050575, "grad_norm": 0.13643470406532288, "learning_rate": 1e-06, "loss": 0.0266, "step": 867 }, { "clip_ratio/high_max": 0.002664159081177786, "clip_ratio/high_mean": 0.0011410006263758987, "clip_ratio/low_mean": 0.0013658201496582478, "clip_ratio/low_min": 0.00010390758052380988, "clip_ratio/region_mean": 0.002506820783310104, "epoch": 0.08101313080050633, "grad_norm": 0.11330758035182953, "learning_rate": 1e-06, "loss": 0.0466, "step": 868 }, { "clip_ratio/high_max": 0.0024894229354686104, "clip_ratio/high_mean": 0.0009335126342193689, "clip_ratio/low_mean": 0.0011458650660642888, "clip_ratio/low_min": 9.861582293524407e-05, "clip_ratio/region_mean": 0.0020793776930077, "epoch": 0.08110646390050691, "grad_norm": 0.10997138172388077, "learning_rate": 1e-06, "loss": 0.0371, "step": 869 }, { "clip_ratio/high_max": 0.00247035260690609, "clip_ratio/high_mean": 0.0009542091356706806, "clip_ratio/low_mean": 0.001411927652952727, "clip_ratio/low_min": 0.00012325794887146913, "clip_ratio/region_mean": 0.0023661367886234075, "epoch": 0.0811997970005075, "grad_norm": 0.6086474061012268, "learning_rate": 1e-06, "loss": 0.0503, "step": 870 }, { "clip_ratio/high_max": 0.002391709218500182, "clip_ratio/high_mean": 0.0010303383496648166, "clip_ratio/low_mean": 0.0012374086254567374, "clip_ratio/low_min": 0.00011426570927142166, "clip_ratio/region_mean": 0.002267746989673469, "epoch": 0.08129313010050808, "grad_norm": 0.20141176879405975, "learning_rate": 1e-06, "loss": 0.0318, "step": 871 }, { "clip_ratio/high_max": 0.002784316850011237, "clip_ratio/high_mean": 0.001004057063255459, "clip_ratio/low_mean": 0.0012415803412295645, "clip_ratio/low_min": 0.00020129359381826362, "clip_ratio/region_mean": 0.002245637442683801, "epoch": 0.08138646320050867, "grad_norm": 0.11298583447933197, "learning_rate": 1e-06, "loss": 0.0521, "step": 872 }, { "clip_ratio/high_max": 0.002325666748220101, "clip_ratio/high_mean": 0.0009547723821015097, "clip_ratio/low_mean": 0.0010779980766528752, "clip_ratio/low_min": 7.225132048915839e-05, "clip_ratio/region_mean": 0.002032770476944279, "epoch": 0.08147979630050925, "grad_norm": 0.10313452035188675, "learning_rate": 1e-06, "loss": 0.0098, "step": 873 }, { "clip_ratio/high_max": 0.0022977184162300546, "clip_ratio/high_mean": 0.0009579784327797825, "clip_ratio/low_mean": 0.0014666750066680834, "clip_ratio/low_min": 0.00013325582403922454, "clip_ratio/region_mean": 0.0024246534158010036, "epoch": 0.08157312940050983, "grad_norm": 0.11318870633840561, "learning_rate": 1e-06, "loss": 0.0191, "step": 874 }, { "clip_ratio/high_max": 0.0020165925234323367, "clip_ratio/high_mean": 0.0008996675678645261, "clip_ratio/low_mean": 0.0010485840903129429, "clip_ratio/low_min": 0.0001311471551161958, "clip_ratio/region_mean": 0.0019482515854178928, "epoch": 0.08166646250051042, "grad_norm": 0.24277649819850922, "learning_rate": 1e-06, "loss": 0.0427, "step": 875 }, { "clip_ratio/high_max": 0.002348750742385164, "clip_ratio/high_mean": 0.0010094275949086295, "clip_ratio/low_mean": 0.0011026529255104833, "clip_ratio/low_min": 0.00020930047594447387, "clip_ratio/region_mean": 0.0021120804667589255, "epoch": 0.081759795600511, "grad_norm": 0.18388061225414276, "learning_rate": 1e-06, "loss": 0.0057, "step": 876 }, { "clip_ratio/high_max": 0.002254111175716389, "clip_ratio/high_mean": 0.0009372075182909612, "clip_ratio/low_mean": 0.0012236012735229451, "clip_ratio/low_min": 2.2698384782415815e-05, "clip_ratio/region_mean": 0.002160808799089864, "epoch": 0.08185312870051158, "grad_norm": 0.44638508558273315, "learning_rate": 1e-06, "loss": 0.052, "step": 877 }, { "clip_ratio/high_max": 0.0022413329061237164, "clip_ratio/high_mean": 0.0008774938178248703, "clip_ratio/low_mean": 0.001086651798686944, "clip_ratio/low_min": 4.1848927139653824e-05, "clip_ratio/region_mean": 0.0019641455946839415, "epoch": 0.08194646180051217, "grad_norm": 0.10878467559814453, "learning_rate": 1e-06, "loss": 0.0729, "step": 878 }, { "clip_ratio/high_max": 0.0020848131462116726, "clip_ratio/high_mean": 0.0009510009604127845, "clip_ratio/low_mean": 0.0010740179859567434, "clip_ratio/low_min": 8.13105789347901e-05, "clip_ratio/region_mean": 0.002025018933636602, "epoch": 0.08203979490051275, "grad_norm": 0.10448770225048065, "learning_rate": 1e-06, "loss": 0.041, "step": 879 }, { "clip_ratio/high_max": 0.0026984763753716834, "clip_ratio/high_mean": 0.0009850375881796936, "clip_ratio/low_mean": 0.00127541557958466, "clip_ratio/low_min": 0.0001279512762266677, "clip_ratio/region_mean": 0.0022604531186516397, "epoch": 0.08213312800051333, "grad_norm": 0.1236085444688797, "learning_rate": 1e-06, "loss": 0.0666, "step": 880 }, { "clip_ratio/high_max": 0.002340782779356232, "clip_ratio/high_mean": 0.000971670230228483, "clip_ratio/low_mean": 0.001378974488034146, "clip_ratio/low_min": 9.547714034852106e-05, "clip_ratio/region_mean": 0.002350644746911712, "epoch": 0.08222646110051392, "grad_norm": 0.15762795507907867, "learning_rate": 1e-06, "loss": 0.0475, "step": 881 }, { "clip_ratio/high_max": 0.002312309210537933, "clip_ratio/high_mean": 0.0008442941016255645, "clip_ratio/low_mean": 0.0012366292176011484, "clip_ratio/low_min": 8.033882477320731e-05, "clip_ratio/region_mean": 0.0020809233174077235, "epoch": 0.0823197942005145, "grad_norm": 0.37249353528022766, "learning_rate": 1e-06, "loss": 0.0508, "step": 882 }, { "clip_ratio/high_max": 0.002722452103625983, "clip_ratio/high_mean": 0.0010439098841743544, "clip_ratio/low_mean": 0.0012929949116369244, "clip_ratio/low_min": 0.00016100466564239468, "clip_ratio/region_mean": 0.0023369047848973423, "epoch": 0.08241312730051509, "grad_norm": 0.11022623628377914, "learning_rate": 1e-06, "loss": 0.0354, "step": 883 }, { "clip_ratio/high_max": 0.0022517828037962317, "clip_ratio/high_mean": 0.0009518332735751756, "clip_ratio/low_mean": 0.0014104233232501429, "clip_ratio/low_min": 0.00014663810725323856, "clip_ratio/region_mean": 0.0023622565786354244, "epoch": 0.08250646040051567, "grad_norm": 0.1112028956413269, "learning_rate": 1e-06, "loss": 0.0155, "step": 884 }, { "clip_ratio/high_max": 0.0021193179054534994, "clip_ratio/high_mean": 0.0009566934895701706, "clip_ratio/low_mean": 0.0011692178995872382, "clip_ratio/low_min": 1.4643861504737288e-05, "clip_ratio/region_mean": 0.002125911407347303, "epoch": 0.08259979350051624, "grad_norm": 2.0604805946350098, "learning_rate": 1e-06, "loss": 0.0236, "step": 885 }, { "clip_ratio/high_max": 0.0026521035033511, "clip_ratio/high_mean": 0.0011164571224071551, "clip_ratio/low_mean": 0.0011642363642749842, "clip_ratio/low_min": 6.635881891270401e-05, "clip_ratio/region_mean": 0.002280693472130224, "epoch": 0.08269312660051684, "grad_norm": 0.1110633835196495, "learning_rate": 1e-06, "loss": 0.0288, "step": 886 }, { "clip_ratio/high_max": 0.0023651731098652817, "clip_ratio/high_mean": 0.0010694454467738979, "clip_ratio/low_mean": 0.0010925703354587313, "clip_ratio/low_min": 0.00010994618696713587, "clip_ratio/region_mean": 0.002162015822250396, "epoch": 0.08278645970051741, "grad_norm": 0.11572252959012985, "learning_rate": 1e-06, "loss": -0.0124, "step": 887 }, { "clip_ratio/high_max": 0.0020946152944816276, "clip_ratio/high_mean": 0.0008596991428930778, "clip_ratio/low_mean": 0.0014544814221153501, "clip_ratio/low_min": 0.00018069351790472865, "clip_ratio/region_mean": 0.002314180543180555, "epoch": 0.08287979280051799, "grad_norm": 0.10051079094409943, "learning_rate": 1e-06, "loss": 0.0728, "step": 888 }, { "clip_ratio/high_max": 0.0026639462521416135, "clip_ratio/high_mean": 0.0009583651408320293, "clip_ratio/low_mean": 0.0010621514811646193, "clip_ratio/low_min": 3.611003194237128e-05, "clip_ratio/region_mean": 0.0020205165637889877, "epoch": 0.08297312590051859, "grad_norm": 0.48735469579696655, "learning_rate": 1e-06, "loss": 0.0049, "step": 889 }, { "clip_ratio/high_max": 0.0024260861464426853, "clip_ratio/high_mean": 0.0010849874379346147, "clip_ratio/low_mean": 0.0011253281318204245, "clip_ratio/low_min": 9.578489925843314e-05, "clip_ratio/region_mean": 0.002210315542470198, "epoch": 0.08306645900051916, "grad_norm": 0.10304472595453262, "learning_rate": 1e-06, "loss": -0.0107, "step": 890 }, { "clip_ratio/high_max": 0.002215232787420973, "clip_ratio/high_mean": 0.0008943595203163568, "clip_ratio/low_mean": 0.0013783484700979898, "clip_ratio/low_min": 0.00017142461001640186, "clip_ratio/region_mean": 0.002272708032251103, "epoch": 0.08315979210051976, "grad_norm": 0.14530344307422638, "learning_rate": 1e-06, "loss": 0.0759, "step": 891 }, { "clip_ratio/high_max": 0.002144011094060261, "clip_ratio/high_mean": 0.0009147500486506033, "clip_ratio/low_mean": 0.001410330027283635, "clip_ratio/low_min": 0.00013872673389414558, "clip_ratio/region_mean": 0.0023250801168615, "epoch": 0.08325312520052033, "grad_norm": 0.1049463078379631, "learning_rate": 1e-06, "loss": 0.017, "step": 892 }, { "clip_ratio/high_max": 0.002171644187910715, "clip_ratio/high_mean": 0.0008838319972710451, "clip_ratio/low_mean": 0.0011495420785649912, "clip_ratio/low_min": 7.570941579615464e-05, "clip_ratio/region_mean": 0.002033374104939867, "epoch": 0.08334645830052091, "grad_norm": 0.11366205662488937, "learning_rate": 1e-06, "loss": 0.0225, "step": 893 }, { "clip_ratio/high_max": 0.0022619613155256957, "clip_ratio/high_mean": 0.0009261451195925474, "clip_ratio/low_mean": 0.0011723415009328164, "clip_ratio/low_min": 0.00023839016830606852, "clip_ratio/region_mean": 0.0020984866496291943, "epoch": 0.0834397914005215, "grad_norm": 0.1208035871386528, "learning_rate": 1e-06, "loss": 0.0788, "step": 894 }, { "clip_ratio/high_max": 0.0024429210934613366, "clip_ratio/high_mean": 0.001026021394864074, "clip_ratio/low_mean": 0.0012642092697205953, "clip_ratio/low_min": 0.00015144837198022287, "clip_ratio/region_mean": 0.0022902306882315315, "epoch": 0.08353312450052208, "grad_norm": 379.2685241699219, "learning_rate": 1e-06, "loss": 0.1067, "step": 895 }, { "clip_ratio/high_max": 0.0023735536306048743, "clip_ratio/high_mean": 0.0009185903909383342, "clip_ratio/low_mean": 0.0011832196032628417, "clip_ratio/low_min": 0.00012658051218750188, "clip_ratio/region_mean": 0.0021018100014771335, "epoch": 0.08362645760052266, "grad_norm": 0.12086167186498642, "learning_rate": 1e-06, "loss": 0.0477, "step": 896 }, { "clip_ratio/high_max": 0.0024008142354432493, "clip_ratio/high_mean": 0.0010434840241941856, "clip_ratio/low_mean": 0.0011500921591505175, "clip_ratio/low_min": 3.950086284021381e-05, "clip_ratio/region_mean": 0.0021935761396889575, "epoch": 0.08371979070052325, "grad_norm": 1498173.625, "learning_rate": 1e-06, "loss": 63.8014, "step": 897 }, { "clip_ratio/high_max": 0.0023613000084878877, "clip_ratio/high_mean": 0.0010318738313799258, "clip_ratio/low_mean": 0.0011949530489800964, "clip_ratio/low_min": 7.660968276468338e-05, "clip_ratio/region_mean": 0.0022268268730840646, "epoch": 0.08381312380052383, "grad_norm": 0.11082210391759872, "learning_rate": 1e-06, "loss": 0.0482, "step": 898 }, { "clip_ratio/high_max": 0.0024838549143169075, "clip_ratio/high_mean": 0.0010703700281737838, "clip_ratio/low_mean": 0.0010024417460954282, "clip_ratio/low_min": 9.429518922843272e-05, "clip_ratio/region_mean": 0.002072811759717297, "epoch": 0.08390645690052441, "grad_norm": 0.09950433671474457, "learning_rate": 1e-06, "loss": 0.0036, "step": 899 }, { "clip_ratio/high_max": 0.002268324842589209, "clip_ratio/high_mean": 0.0008833024767227471, "clip_ratio/low_mean": 0.0014224285587260965, "clip_ratio/low_min": 5.858836902916664e-05, "clip_ratio/region_mean": 0.0023057309954310767, "epoch": 0.083999790000525, "grad_norm": 33.7497673034668, "learning_rate": 1e-06, "loss": 0.0399, "step": 900 }, { "clip_ratio/high_max": 0.0023017935891402885, "clip_ratio/high_mean": 0.0009715641572256573, "clip_ratio/low_mean": 0.0010959513783745933, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002067515510134399, "epoch": 0.08409312310052558, "grad_norm": 0.11014106124639511, "learning_rate": 1e-06, "loss": -0.0124, "step": 901 }, { "clip_ratio/high_max": 0.0023868624994065613, "clip_ratio/high_mean": 0.0010377027647336945, "clip_ratio/low_mean": 0.0012187745851406362, "clip_ratio/low_min": 9.586280702933436e-05, "clip_ratio/region_mean": 0.0022564773389603943, "epoch": 0.08418645620052617, "grad_norm": 14.505834579467773, "learning_rate": 1e-06, "loss": 0.0214, "step": 902 }, { "clip_ratio/high_max": 0.002561476501796278, "clip_ratio/high_mean": 0.0009719652730382222, "clip_ratio/low_mean": 0.0014163736377668101, "clip_ratio/low_min": 0.00020562474037433276, "clip_ratio/region_mean": 0.0023883389294496737, "epoch": 0.08427978930052675, "grad_norm": 0.2170296460390091, "learning_rate": 1e-06, "loss": 0.0711, "step": 903 }, { "clip_ratio/high_max": 0.0026172805883106776, "clip_ratio/high_mean": 0.0010360503692936618, "clip_ratio/low_mean": 0.0015687572195020039, "clip_ratio/low_min": 0.00010546327393967658, "clip_ratio/region_mean": 0.0026048076106235385, "epoch": 0.08437312240052733, "grad_norm": 0.1139143705368042, "learning_rate": 1e-06, "loss": 0.0434, "step": 904 }, { "clip_ratio/high_max": 0.002303227629454341, "clip_ratio/high_mean": 0.0008711810296517797, "clip_ratio/low_mean": 0.0012094752910343232, "clip_ratio/low_min": 0.00011234086014155764, "clip_ratio/region_mean": 0.002080656311591156, "epoch": 0.08446645550052792, "grad_norm": 0.11276731640100479, "learning_rate": 1e-06, "loss": 0.0464, "step": 905 }, { "clip_ratio/high_max": 0.0022801099403295666, "clip_ratio/high_mean": 0.000999399217107566, "clip_ratio/low_mean": 0.001317723417741945, "clip_ratio/low_min": 0.00016726861576898955, "clip_ratio/region_mean": 0.0023171226785052568, "epoch": 0.0845597886005285, "grad_norm": 0.11814355850219727, "learning_rate": 1e-06, "loss": 0.0196, "step": 906 }, { "clip_ratio/high_max": 0.0023774189176037908, "clip_ratio/high_mean": 0.0010647673479979858, "clip_ratio/low_mean": 0.0011631750385276973, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0022279423428699374, "epoch": 0.08465312170052908, "grad_norm": 0.11206215620040894, "learning_rate": 1e-06, "loss": -0.0277, "step": 907 }, { "clip_ratio/high_max": 0.0019795977241301443, "clip_ratio/high_mean": 0.0008602875859651249, "clip_ratio/low_mean": 0.0011299404268356739, "clip_ratio/low_min": 9.358750321553089e-05, "clip_ratio/region_mean": 0.00199022804008564, "epoch": 0.08474645480052967, "grad_norm": 0.10877736657857895, "learning_rate": 1e-06, "loss": 0.0172, "step": 908 }, { "clip_ratio/high_max": 0.002549438962887507, "clip_ratio/high_mean": 0.0010372703109169379, "clip_ratio/low_mean": 0.0015779779023432638, "clip_ratio/low_min": 3.47672903444618e-05, "clip_ratio/region_mean": 0.0026152482532779686, "epoch": 0.08483978790053025, "grad_norm": 0.16966469585895538, "learning_rate": 1e-06, "loss": 0.0128, "step": 909 }, { "clip_ratio/high_max": 0.002389495442912448, "clip_ratio/high_mean": 0.0009297276428696932, "clip_ratio/low_mean": 0.0012547899823402986, "clip_ratio/low_min": 0.0001997924155148212, "clip_ratio/region_mean": 0.0021845176015631296, "epoch": 0.08493312100053083, "grad_norm": 0.10381964594125748, "learning_rate": 1e-06, "loss": 0.0097, "step": 910 }, { "clip_ratio/high_max": 0.0023043394103297032, "clip_ratio/high_mean": 0.0010334826147300191, "clip_ratio/low_mean": 0.0011366517528585973, "clip_ratio/low_min": 0.00013663454137713416, "clip_ratio/region_mean": 0.002170134444895666, "epoch": 0.08502645410053142, "grad_norm": 0.11094743013381958, "learning_rate": 1e-06, "loss": -0.0152, "step": 911 }, { "clip_ratio/high_max": 0.002263615173433209, "clip_ratio/high_mean": 0.0009786826885829214, "clip_ratio/low_mean": 0.001256348070455715, "clip_ratio/low_min": 6.04115666646976e-05, "clip_ratio/region_mean": 0.002235030733572785, "epoch": 0.085119787200532, "grad_norm": 0.3648679554462433, "learning_rate": 1e-06, "loss": 0.0285, "step": 912 }, { "clip_ratio/high_max": 0.002099015509884339, "clip_ratio/high_mean": 0.0008645005109428894, "clip_ratio/low_mean": 0.0014571586689271498, "clip_ratio/low_min": 6.089243834139779e-05, "clip_ratio/region_mean": 0.002321659201697912, "epoch": 0.08521312030053259, "grad_norm": 0.17229481041431427, "learning_rate": 1e-06, "loss": 0.0345, "step": 913 }, { "clip_ratio/high_max": 0.0026263648542226292, "clip_ratio/high_mean": 0.0010387585789430887, "clip_ratio/low_mean": 0.0014933757956896443, "clip_ratio/low_min": 0.00016493399198225234, "clip_ratio/region_mean": 0.0025321344364783727, "epoch": 0.08530645340053317, "grad_norm": 0.16887377202510834, "learning_rate": 1e-06, "loss": 0.0099, "step": 914 }, { "clip_ratio/high_max": 0.002455261277646059, "clip_ratio/high_mean": 0.0009095840850932291, "clip_ratio/low_mean": 0.0013396572103374638, "clip_ratio/low_min": 0.00018635055130289402, "clip_ratio/region_mean": 0.0022492413118015975, "epoch": 0.08539978650053374, "grad_norm": 0.21369841694831848, "learning_rate": 1e-06, "loss": 0.0314, "step": 915 }, { "clip_ratio/high_max": 0.002347531550185522, "clip_ratio/high_mean": 0.000951391493799747, "clip_ratio/low_mean": 0.0013145883413017145, "clip_ratio/low_min": 0.00027512239284988027, "clip_ratio/region_mean": 0.0022659798560198396, "epoch": 0.08549311960053434, "grad_norm": 0.12547805905342102, "learning_rate": 1e-06, "loss": 0.0364, "step": 916 }, { "clip_ratio/high_max": 0.0024531892631785013, "clip_ratio/high_mean": 0.0010786664515762823, "clip_ratio/low_mean": 0.001318588243520935, "clip_ratio/low_min": 7.283358354470693e-05, "clip_ratio/region_mean": 0.002397254698735196, "epoch": 0.08558645270053492, "grad_norm": 0.31122174859046936, "learning_rate": 1e-06, "loss": 0.022, "step": 917 }, { "clip_ratio/high_max": 0.0021741417513112538, "clip_ratio/high_mean": 0.0008625397240393795, "clip_ratio/low_mean": 0.0013882866951462347, "clip_ratio/low_min": 0.00020721524924738333, "clip_ratio/region_mean": 0.0022508264155476354, "epoch": 0.0856797858005355, "grad_norm": 14.696321487426758, "learning_rate": 1e-06, "loss": 0.067, "step": 918 }, { "clip_ratio/high_max": 0.0029387307731667534, "clip_ratio/high_mean": 0.0011057026604248676, "clip_ratio/low_mean": 0.0010671899253793526, "clip_ratio/low_min": 8.397746842092602e-05, "clip_ratio/region_mean": 0.002172892571252305, "epoch": 0.08577311890053609, "grad_norm": 0.11167183518409729, "learning_rate": 1e-06, "loss": 0.0048, "step": 919 }, { "clip_ratio/high_max": 0.0026019670185633004, "clip_ratio/high_mean": 0.0009697596506157424, "clip_ratio/low_mean": 0.0013117019971105037, "clip_ratio/low_min": 9.068149483937304e-05, "clip_ratio/region_mean": 0.002281461660459172, "epoch": 0.08586645200053666, "grad_norm": 0.10400262475013733, "learning_rate": 1e-06, "loss": 0.0267, "step": 920 }, { "clip_ratio/high_max": 0.002282887253386434, "clip_ratio/high_mean": 0.0010124723266926594, "clip_ratio/low_mean": 0.0010988165413436946, "clip_ratio/low_min": 8.7904143583728e-05, "clip_ratio/region_mean": 0.002111288864398375, "epoch": 0.08595978510053724, "grad_norm": 0.12238967418670654, "learning_rate": 1e-06, "loss": 0.0104, "step": 921 }, { "clip_ratio/high_max": 0.0025052935670828447, "clip_ratio/high_mean": 0.0009398449365107808, "clip_ratio/low_mean": 0.0011888236840604804, "clip_ratio/low_min": 7.10683179931948e-05, "clip_ratio/region_mean": 0.002128668587829452, "epoch": 0.08605311820053783, "grad_norm": 0.2997726500034332, "learning_rate": 1e-06, "loss": 0.0306, "step": 922 }, { "clip_ratio/high_max": 0.0023128594511945266, "clip_ratio/high_mean": 0.0009728028544486733, "clip_ratio/low_mean": 0.0013744936695729848, "clip_ratio/low_min": 0.00013301632316142786, "clip_ratio/region_mean": 0.0023472965112887323, "epoch": 0.08614645130053841, "grad_norm": 0.11684460937976837, "learning_rate": 1e-06, "loss": 0.0267, "step": 923 }, { "clip_ratio/high_max": 0.002228422265034169, "clip_ratio/high_mean": 0.0007883524613134796, "clip_ratio/low_mean": 0.0013270641284179874, "clip_ratio/low_min": 0.00013172165472497, "clip_ratio/region_mean": 0.0021154166097403504, "epoch": 0.086239784400539, "grad_norm": 0.1094125509262085, "learning_rate": 1e-06, "loss": 0.0322, "step": 924 }, { "clip_ratio/high_max": 0.0022775792931497563, "clip_ratio/high_mean": 0.0008262887890850834, "clip_ratio/low_mean": 0.0013585323904408142, "clip_ratio/low_min": 0.0001292492997890804, "clip_ratio/region_mean": 0.0021848212054464966, "epoch": 0.08633311750053958, "grad_norm": 0.21094855666160583, "learning_rate": 1e-06, "loss": 0.0853, "step": 925 }, { "clip_ratio/high_max": 0.0025841073838819284, "clip_ratio/high_mean": 0.0010466866369824857, "clip_ratio/low_mean": 0.0009866291911748704, "clip_ratio/low_min": 5.644580778607633e-05, "clip_ratio/region_mean": 0.002033315809967462, "epoch": 0.08642645060054016, "grad_norm": 0.11110901832580566, "learning_rate": 1e-06, "loss": -0.0147, "step": 926 }, { "clip_ratio/high_max": 0.002120171455317177, "clip_ratio/high_mean": 0.000920732965823845, "clip_ratio/low_mean": 0.0011272655938228127, "clip_ratio/low_min": 7.595165152451955e-05, "clip_ratio/region_mean": 0.0020479985105339438, "epoch": 0.08651978370054075, "grad_norm": 0.3409970998764038, "learning_rate": 1e-06, "loss": 0.0258, "step": 927 }, { "clip_ratio/high_max": 0.002264707873109728, "clip_ratio/high_mean": 0.0009114696367760189, "clip_ratio/low_mean": 0.0011135901513625868, "clip_ratio/low_min": 8.535096822015475e-05, "clip_ratio/region_mean": 0.0020250597735866904, "epoch": 0.08661311680054133, "grad_norm": 0.11092744022607803, "learning_rate": 1e-06, "loss": 0.0072, "step": 928 }, { "clip_ratio/high_max": 0.0029259155489853583, "clip_ratio/high_mean": 0.0011901314464921597, "clip_ratio/low_mean": 0.0010359873558627442, "clip_ratio/low_min": 5.0713119435386034e-05, "clip_ratio/region_mean": 0.002226118740509264, "completions/clipped_ratio": 0.012433733258928603, "completions/max_length": 4096.0, "completions/max_terminated_length": 4092.0, "completions/mean_length": 606.3265991210938, "completions/mean_terminated_length": 562.3905639648438, "completions/min_length": 3.0, "completions/min_terminated_length": 3.0, "epoch": 0.08670644990054191, "grad_norm": 658.1107788085938, "learning_rate": 1e-06, "loss": 920.7662, "num_tokens": 732468601.0, "reward": 0.581856906414032, "reward_std": 0.19840924441814423, "rewards/simpleverify_reward/mean": 0.5818568468093872, "rewards/simpleverify_reward/std": 0.4932560920715332, "step": 929 }, { "clip_ratio/high_max": 0.0034058319142786786, "clip_ratio/high_mean": 0.0011287498200545087, "clip_ratio/low_mean": 0.0011267033260082826, "clip_ratio/low_min": 0.00014275316425482742, "clip_ratio/region_mean": 0.002255453153338749, "epoch": 0.0867997830005425, "grad_norm": 80630872.0, "learning_rate": 1e-06, "loss": 15628.332, "step": 930 }, { "clip_ratio/high_max": 0.002698423617403023, "clip_ratio/high_mean": 0.0011878734112542588, "clip_ratio/low_mean": 0.0009699943511805031, "clip_ratio/low_min": 5.638257516693557e-05, "clip_ratio/region_mean": 0.0021578677478828467, "epoch": 0.08689311610054308, "grad_norm": 1000.2466430664062, "learning_rate": 1e-06, "loss": 3.1991, "step": 931 }, { "clip_ratio/high_max": 0.0027802466865978204, "clip_ratio/high_mean": 0.0012451053080440033, "clip_ratio/low_mean": 0.000949066143220989, "clip_ratio/low_min": 7.313903097383445e-05, "clip_ratio/region_mean": 0.0021941714658169076, "epoch": 0.08698644920054366, "grad_norm": 3595.9853515625, "learning_rate": 1e-06, "loss": 12.6799, "step": 932 }, { "clip_ratio/high_max": 0.0028793093442800455, "clip_ratio/high_mean": 0.001297881508435239, "clip_ratio/low_mean": 0.0010068388110084925, "clip_ratio/low_min": 0.00019474184227874503, "clip_ratio/region_mean": 0.002304720335814636, "epoch": 0.08707978230054425, "grad_norm": 39064.96875, "learning_rate": 1e-06, "loss": 6.4856, "step": 933 }, { "clip_ratio/high_max": 0.002898460501455702, "clip_ratio/high_mean": 0.0012248599632584956, "clip_ratio/low_mean": 0.0010910550081462134, "clip_ratio/low_min": 0.0001481485724070808, "clip_ratio/region_mean": 0.0023159149641287513, "epoch": 0.08717311540054483, "grad_norm": 144.14561462402344, "learning_rate": 1e-06, "loss": 0.0765, "step": 934 }, { "clip_ratio/high_max": 0.002663971608853899, "clip_ratio/high_mean": 0.001215269701788202, "clip_ratio/low_mean": 0.001067837296432117, "clip_ratio/low_min": 0.00011083995923399925, "clip_ratio/region_mean": 0.002283106980030425, "epoch": 0.08726644850054542, "grad_norm": 1607.0867919921875, "learning_rate": 1e-06, "loss": 16.6825, "step": 935 }, { "clip_ratio/high_max": 0.0027065208123531193, "clip_ratio/high_mean": 0.001296229129366111, "clip_ratio/low_mean": 0.0010284048130415613, "clip_ratio/low_min": 0.00011034236740670167, "clip_ratio/region_mean": 0.0023246338751050644, "epoch": 0.087359781600546, "grad_norm": 10.217474937438965, "learning_rate": 1e-06, "loss": -0.0226, "step": 936 }, { "clip_ratio/high_max": 0.002616501340526156, "clip_ratio/high_mean": 0.001097550337362918, "clip_ratio/low_mean": 0.0013795242011838127, "clip_ratio/low_min": 0.00017977006882574642, "clip_ratio/region_mean": 0.002477074544003699, "epoch": 0.08745311470054658, "grad_norm": 25.305007934570312, "learning_rate": 1e-06, "loss": 0.0776, "step": 937 }, { "clip_ratio/high_max": 0.002169427643821109, "clip_ratio/high_mean": 0.0009409247904841322, "clip_ratio/low_mean": 0.0011030634632334113, "clip_ratio/low_min": 5.6443348512402736e-05, "clip_ratio/region_mean": 0.002043988242803607, "epoch": 0.08754644780054717, "grad_norm": 3.802832841873169, "learning_rate": 1e-06, "loss": 0.061, "step": 938 }, { "clip_ratio/high_max": 0.0026284974301233888, "clip_ratio/high_mean": 0.0010768719366751611, "clip_ratio/low_mean": 0.0011288458008493762, "clip_ratio/low_min": 6.795461376896128e-05, "clip_ratio/region_mean": 0.002205717726610601, "epoch": 0.08763978090054775, "grad_norm": 0.12048052251338959, "learning_rate": 1e-06, "loss": 0.0127, "step": 939 }, { "clip_ratio/high_max": 0.0027050889329984784, "clip_ratio/high_mean": 0.0011865939704875927, "clip_ratio/low_mean": 0.0011583276555029443, "clip_ratio/low_min": 0.00012447621702449396, "clip_ratio/region_mean": 0.00234492161689559, "epoch": 0.08773311400054833, "grad_norm": 0.7302131652832031, "learning_rate": 1e-06, "loss": -0.0277, "step": 940 }, { "clip_ratio/high_max": 0.002358895471843425, "clip_ratio/high_mean": 0.0009820188606681768, "clip_ratio/low_mean": 0.0014514831309497822, "clip_ratio/low_min": 1.2700670595222618e-05, "clip_ratio/region_mean": 0.0024335019843420014, "epoch": 0.08782644710054892, "grad_norm": 0.5652409791946411, "learning_rate": 1e-06, "loss": 0.0729, "step": 941 }, { "clip_ratio/high_max": 0.0026311205874662846, "clip_ratio/high_mean": 0.0011160112917423248, "clip_ratio/low_mean": 0.0015240918219205923, "clip_ratio/low_min": 0.0001546228668303229, "clip_ratio/region_mean": 0.002640103062731214, "epoch": 0.0879197802005495, "grad_norm": 1.129449725151062, "learning_rate": 1e-06, "loss": 0.0362, "step": 942 }, { "clip_ratio/high_max": 0.0025298894397565164, "clip_ratio/high_mean": 0.0011450928814156214, "clip_ratio/low_mean": 0.0016939030574576464, "clip_ratio/low_min": 0.00011486561743367929, "clip_ratio/region_mean": 0.0028389959115884267, "epoch": 0.08801311330055009, "grad_norm": 58.09937286376953, "learning_rate": 1e-06, "loss": 0.0677, "step": 943 }, { "clip_ratio/high_max": 0.002646366374392528, "clip_ratio/high_mean": 0.0011582256483961828, "clip_ratio/low_mean": 0.0017861177475424483, "clip_ratio/low_min": 0.000292622606139048, "clip_ratio/region_mean": 0.002944343454146292, "epoch": 0.08810644640055067, "grad_norm": 1.307054042816162, "learning_rate": 1e-06, "loss": 0.0508, "step": 944 }, { "clip_ratio/high_max": 0.0027794577763415873, "clip_ratio/high_mean": 0.0011400759758544154, "clip_ratio/low_mean": 0.0017639938741922379, "clip_ratio/low_min": 1.544735459901858e-05, "clip_ratio/region_mean": 0.0029040698427706957, "epoch": 0.08819977950055125, "grad_norm": 0.4146556854248047, "learning_rate": 1e-06, "loss": 0.021, "step": 945 }, { "clip_ratio/high_max": 0.002447194310661871, "clip_ratio/high_mean": 0.0011346678329573479, "clip_ratio/low_mean": 0.0019092099682893604, "clip_ratio/low_min": 0.0003389314042578917, "clip_ratio/region_mean": 0.0030438778194366023, "epoch": 0.08829311260055184, "grad_norm": 0.1193733960390091, "learning_rate": 1e-06, "loss": 0.0492, "step": 946 }, { "clip_ratio/high_max": 0.0026018462594947778, "clip_ratio/high_mean": 0.0010851564475160558, "clip_ratio/low_mean": 0.0016620873211650178, "clip_ratio/low_min": 7.24562141840579e-05, "clip_ratio/region_mean": 0.0027472437723190524, "epoch": 0.08838644570055242, "grad_norm": 0.15501439571380615, "learning_rate": 1e-06, "loss": 0.0134, "step": 947 }, { "clip_ratio/high_max": 0.002631387527799234, "clip_ratio/high_mean": 0.0012107435322832316, "clip_ratio/low_mean": 0.0014972333374316804, "clip_ratio/low_min": 0.00011391548332539969, "clip_ratio/region_mean": 0.002707976855162997, "epoch": 0.088479778800553, "grad_norm": 0.13161900639533997, "learning_rate": 1e-06, "loss": 0.0132, "step": 948 }, { "clip_ratio/high_max": 0.0028324291270109825, "clip_ratio/high_mean": 0.0010979366124956869, "clip_ratio/low_mean": 0.002003681380301714, "clip_ratio/low_min": 0.00025417295364604797, "clip_ratio/region_mean": 0.003101617985521443, "epoch": 0.08857311190055359, "grad_norm": 18580.810546875, "learning_rate": 1e-06, "loss": 2.7264, "step": 949 }, { "clip_ratio/high_max": 0.0023946603178046644, "clip_ratio/high_mean": 0.0010583803159534, "clip_ratio/low_mean": 0.0015508402575505897, "clip_ratio/low_min": 8.250276187027339e-05, "clip_ratio/region_mean": 0.002609220566228032, "epoch": 0.08866644500055416, "grad_norm": 0.15283074975013733, "learning_rate": 1e-06, "loss": 0.0191, "step": 950 }, { "clip_ratio/high_max": 0.0025217513102688827, "clip_ratio/high_mean": 0.0009943358272721525, "clip_ratio/low_mean": 0.0014997832622611895, "clip_ratio/low_min": 0.00019928392430301756, "clip_ratio/region_mean": 0.002494119071343448, "epoch": 0.08875977810055474, "grad_norm": 0.10834085941314697, "learning_rate": 1e-06, "loss": 0.0395, "step": 951 }, { "clip_ratio/high_max": 0.002701760757190641, "clip_ratio/high_mean": 0.0011713325293385424, "clip_ratio/low_mean": 0.002080030579236336, "clip_ratio/low_min": 0.00021892567792747286, "clip_ratio/region_mean": 0.003251363188610412, "epoch": 0.08885311120055533, "grad_norm": 0.4292258322238922, "learning_rate": 1e-06, "loss": 0.0409, "step": 952 }, { "clip_ratio/high_max": 0.003101715679804329, "clip_ratio/high_mean": 0.0012029106983391102, "clip_ratio/low_mean": 0.0015936565723677631, "clip_ratio/low_min": 0.00010612279402266722, "clip_ratio/region_mean": 0.0027965671979472972, "epoch": 0.08894644430055591, "grad_norm": 0.1338549107313156, "learning_rate": 1e-06, "loss": 0.0263, "step": 953 }, { "clip_ratio/high_max": 0.0030695839086547494, "clip_ratio/high_mean": 0.0011215854592592223, "clip_ratio/low_mean": 0.0015305491069739219, "clip_ratio/low_min": 6.853561990283197e-05, "clip_ratio/region_mean": 0.00265213457896607, "epoch": 0.0890397774005565, "grad_norm": 0.12780126929283142, "learning_rate": 1e-06, "loss": 0.0094, "step": 954 }, { "clip_ratio/high_max": 0.00236971069170977, "clip_ratio/high_mean": 0.0010469768167240545, "clip_ratio/low_mean": 0.0014215620467439294, "clip_ratio/low_min": 0.00011546862333489116, "clip_ratio/region_mean": 0.0024685388925718144, "epoch": 0.08913311050055708, "grad_norm": 0.1021844893693924, "learning_rate": 1e-06, "loss": 0.0311, "step": 955 }, { "clip_ratio/high_max": 0.0025919151084963232, "clip_ratio/high_mean": 0.001123786694734008, "clip_ratio/low_mean": 0.001994975758861983, "clip_ratio/low_min": 0.00014724188258696813, "clip_ratio/region_mean": 0.0031187623972073197, "epoch": 0.08922644360055766, "grad_norm": 1233.413330078125, "learning_rate": 1e-06, "loss": 0.1991, "step": 956 }, { "clip_ratio/high_max": 0.002833966318576131, "clip_ratio/high_mean": 0.00111185597597796, "clip_ratio/low_mean": 0.0018087287826347165, "clip_ratio/low_min": 0.00017405671223968966, "clip_ratio/region_mean": 0.0029205848113633692, "epoch": 0.08931977670055825, "grad_norm": 0.10991702228784561, "learning_rate": 1e-06, "loss": 0.0416, "step": 957 }, { "clip_ratio/high_max": 0.0027754683178500272, "clip_ratio/high_mean": 0.0012341040346655063, "clip_ratio/low_mean": 0.001622908308490878, "clip_ratio/low_min": 7.870357512729242e-05, "clip_ratio/region_mean": 0.002857012426829897, "epoch": 0.08941310980055883, "grad_norm": 48.008544921875, "learning_rate": 1e-06, "loss": 0.0172, "step": 958 }, { "clip_ratio/high_max": 0.0023099228856153786, "clip_ratio/high_mean": 0.0010256685272906907, "clip_ratio/low_mean": 0.001928425299411174, "clip_ratio/low_min": 0.00013614695126307197, "clip_ratio/region_mean": 0.0029540938994614407, "epoch": 0.08950644290055941, "grad_norm": 0.15738925337791443, "learning_rate": 1e-06, "loss": 0.0717, "step": 959 }, { "clip_ratio/high_max": 0.002546827479818603, "clip_ratio/high_mean": 0.0012053147747792536, "clip_ratio/low_mean": 0.001494118641858222, "clip_ratio/low_min": 0.00012901766058348585, "clip_ratio/region_mean": 0.002699433476664126, "epoch": 0.08959977600056, "grad_norm": 0.12766140699386597, "learning_rate": 1e-06, "loss": -0.0442, "step": 960 }, { "clip_ratio/high_max": 0.002865133195882663, "clip_ratio/high_mean": 0.0012718562138616107, "clip_ratio/low_mean": 0.001514236930233892, "clip_ratio/low_min": 3.713743444677675e-05, "clip_ratio/region_mean": 0.0027860930567840114, "epoch": 0.08969310910056058, "grad_norm": 0.1301388293504715, "learning_rate": 1e-06, "loss": -0.0345, "step": 961 }, { "clip_ratio/high_max": 0.0029480006778612733, "clip_ratio/high_mean": 0.0012709887923847418, "clip_ratio/low_mean": 0.0014145162604108918, "clip_ratio/low_min": 5.115070234751329e-05, "clip_ratio/region_mean": 0.00268550512555521, "epoch": 0.08978644220056116, "grad_norm": 0.11750379204750061, "learning_rate": 1e-06, "loss": -0.0044, "step": 962 }, { "clip_ratio/high_max": 0.0025278916145907715, "clip_ratio/high_mean": 0.0009999431968026329, "clip_ratio/low_mean": 0.0017290908508584835, "clip_ratio/low_min": 0.000227160857320996, "clip_ratio/region_mean": 0.002729034094954841, "epoch": 0.08987977530056175, "grad_norm": 0.11249005794525146, "learning_rate": 1e-06, "loss": 0.0437, "step": 963 }, { "clip_ratio/high_max": 0.002835528561263345, "clip_ratio/high_mean": 0.0011164538191223983, "clip_ratio/low_mean": 0.0017168393824249506, "clip_ratio/low_min": 7.206099780887598e-05, "clip_ratio/region_mean": 0.0028332932051853277, "epoch": 0.08997310840056233, "grad_norm": 0.11565061658620834, "learning_rate": 1e-06, "loss": 0.0065, "step": 964 }, { "clip_ratio/high_max": 0.0025258780078729615, "clip_ratio/high_mean": 0.001039100403431803, "clip_ratio/low_mean": 0.001595698318851646, "clip_ratio/low_min": 4.252992403053213e-05, "clip_ratio/region_mean": 0.00263479878049111, "epoch": 0.09006644150056292, "grad_norm": 0.6675903797149658, "learning_rate": 1e-06, "loss": 0.0344, "step": 965 }, { "clip_ratio/high_max": 0.0024378097114095, "clip_ratio/high_mean": 0.0010198693780694157, "clip_ratio/low_mean": 0.0014414132929232437, "clip_ratio/low_min": 7.952272335387534e-05, "clip_ratio/region_mean": 0.0024612826455268078, "epoch": 0.0901597746005635, "grad_norm": 0.10590285062789917, "learning_rate": 1e-06, "loss": -0.0029, "step": 966 }, { "clip_ratio/high_max": 0.002850593977200333, "clip_ratio/high_mean": 0.0011228631774429232, "clip_ratio/low_mean": 0.0016011863881431054, "clip_ratio/low_min": 6.342563392536249e-05, "clip_ratio/region_mean": 0.0027240496638114564, "epoch": 0.09025310770056408, "grad_norm": 176.32424926757812, "learning_rate": 1e-06, "loss": 0.0384, "step": 967 }, { "clip_ratio/high_max": 0.0026763011555885896, "clip_ratio/high_mean": 0.0010797035210998729, "clip_ratio/low_mean": 0.0016338121786247939, "clip_ratio/low_min": 0.00021653350222550216, "clip_ratio/region_mean": 0.0027135156487929635, "epoch": 0.09034644080056467, "grad_norm": 0.12173662334680557, "learning_rate": 1e-06, "loss": 0.0706, "step": 968 }, { "clip_ratio/high_max": 0.0023993426584638655, "clip_ratio/high_mean": 0.0011845326589536853, "clip_ratio/low_mean": 0.0016304294622386806, "clip_ratio/low_min": 0.00020578582552843727, "clip_ratio/region_mean": 0.002814962157572154, "epoch": 0.09043977390056525, "grad_norm": 0.11350627988576889, "learning_rate": 1e-06, "loss": 0.0065, "step": 969 }, { "clip_ratio/high_max": 0.0029194879898568615, "clip_ratio/high_mean": 0.0011662225224426948, "clip_ratio/low_mean": 0.0016439703431387898, "clip_ratio/low_min": 0.00023466075435862876, "clip_ratio/region_mean": 0.002810192854667548, "epoch": 0.09053310700056583, "grad_norm": 0.12750056385993958, "learning_rate": 1e-06, "loss": 0.0071, "step": 970 }, { "clip_ratio/high_max": 0.0023622112057637423, "clip_ratio/high_mean": 0.0010675961821107194, "clip_ratio/low_mean": 0.001563400134727999, "clip_ratio/low_min": 0.00018963728234666632, "clip_ratio/region_mean": 0.002630996285006404, "epoch": 0.09062644010056642, "grad_norm": 0.12649251520633698, "learning_rate": 1e-06, "loss": 0.0365, "step": 971 }, { "clip_ratio/high_max": 0.002768394253507722, "clip_ratio/high_mean": 0.0010969074101012666, "clip_ratio/low_mean": 0.0018255294125992805, "clip_ratio/low_min": 0.0001825466893023986, "clip_ratio/region_mean": 0.002922436877270229, "epoch": 0.090719773200567, "grad_norm": 1.3245729207992554, "learning_rate": 1e-06, "loss": 0.0403, "step": 972 }, { "clip_ratio/high_max": 0.0027267038167337887, "clip_ratio/high_mean": 0.0011833486423711292, "clip_ratio/low_mean": 0.00142173962740344, "clip_ratio/low_min": 6.574588041985407e-05, "clip_ratio/region_mean": 0.0026050882806885056, "epoch": 0.09081310630056758, "grad_norm": 0.3616175055503845, "learning_rate": 1e-06, "loss": -0.0108, "step": 973 }, { "clip_ratio/high_max": 0.0025984255189541727, "clip_ratio/high_mean": 0.0011566717439563945, "clip_ratio/low_mean": 0.0014222343779692892, "clip_ratio/low_min": 8.970064664026722e-05, "clip_ratio/region_mean": 0.002578906118287705, "epoch": 0.09090643940056817, "grad_norm": 0.11126550287008286, "learning_rate": 1e-06, "loss": 0.0367, "step": 974 }, { "clip_ratio/high_max": 0.0024302278252434917, "clip_ratio/high_mean": 0.0010615752034937032, "clip_ratio/low_mean": 0.001568183972267434, "clip_ratio/low_min": 0.00013949740286989254, "clip_ratio/region_mean": 0.0026297592266928405, "epoch": 0.09099977250056875, "grad_norm": 0.3150697946548462, "learning_rate": 1e-06, "loss": -0.0113, "step": 975 }, { "clip_ratio/high_max": 0.0028099539122194983, "clip_ratio/high_mean": 0.0011007877983502112, "clip_ratio/low_mean": 0.0017331377748632804, "clip_ratio/low_min": 0.00019785277436312754, "clip_ratio/region_mean": 0.0028339256459730677, "epoch": 0.09109310560056934, "grad_norm": 0.5172707438468933, "learning_rate": 1e-06, "loss": 0.0533, "step": 976 }, { "clip_ratio/high_max": 0.0024785328350844793, "clip_ratio/high_mean": 0.000984700021945173, "clip_ratio/low_mean": 0.0014817291230428964, "clip_ratio/low_min": 5.411065740190679e-05, "clip_ratio/region_mean": 0.0024664291049703024, "epoch": 0.09118643870056992, "grad_norm": 0.817656934261322, "learning_rate": 1e-06, "loss": 0.0412, "step": 977 }, { "clip_ratio/high_max": 0.002422324123472208, "clip_ratio/high_mean": 0.0009541405452182516, "clip_ratio/low_mean": 0.0015630449270247482, "clip_ratio/low_min": 0.000106721445263247, "clip_ratio/region_mean": 0.0025171855013468303, "epoch": 0.0912797718005705, "grad_norm": 41.40570831298828, "learning_rate": 1e-06, "loss": 0.0888, "step": 978 }, { "clip_ratio/high_max": 0.0018276834380230866, "clip_ratio/high_mean": 0.0008153427370416466, "clip_ratio/low_mean": 0.0015795414037711453, "clip_ratio/low_min": 0.00015140006962610641, "clip_ratio/region_mean": 0.0023948841626406647, "epoch": 0.09137310490057109, "grad_norm": 0.1352696567773819, "learning_rate": 1e-06, "loss": 0.1003, "step": 979 }, { "clip_ratio/high_max": 0.00261139221402118, "clip_ratio/high_mean": 0.0010639927131705917, "clip_ratio/low_mean": 0.0012667731461988296, "clip_ratio/low_min": 6.958876019780291e-05, "clip_ratio/region_mean": 0.002330765884835273, "epoch": 0.09146643800057166, "grad_norm": 0.22210972011089325, "learning_rate": 1e-06, "loss": 0.0286, "step": 980 }, { "clip_ratio/high_max": 0.002344028114748653, "clip_ratio/high_mean": 0.0011078451334469719, "clip_ratio/low_mean": 0.0013005328000872396, "clip_ratio/low_min": 0.0002134958303940948, "clip_ratio/region_mean": 0.0024083779571810737, "epoch": 0.09155977110057224, "grad_norm": 0.290182888507843, "learning_rate": 1e-06, "loss": 0.024, "step": 981 }, { "clip_ratio/high_max": 0.002362330043979455, "clip_ratio/high_mean": 0.0010354335499869194, "clip_ratio/low_mean": 0.0015081310884852428, "clip_ratio/low_min": 3.981289410148747e-05, "clip_ratio/region_mean": 0.0025435646675759926, "epoch": 0.09165310420057284, "grad_norm": 0.11489500850439072, "learning_rate": 1e-06, "loss": 0.0073, "step": 982 }, { "clip_ratio/high_max": 0.0024011111963773146, "clip_ratio/high_mean": 0.0010692693904275075, "clip_ratio/low_mean": 0.0012818294126191176, "clip_ratio/low_min": 8.829537273413735e-05, "clip_ratio/region_mean": 0.0023510988685302436, "epoch": 0.09174643730057341, "grad_norm": 0.8046979904174805, "learning_rate": 1e-06, "loss": -0.0101, "step": 983 }, { "clip_ratio/high_max": 0.0024106841083266772, "clip_ratio/high_mean": 0.0011014197516487911, "clip_ratio/low_mean": 0.0015164341930358205, "clip_ratio/low_min": 0.00013213888951213448, "clip_ratio/region_mean": 0.0026178538682870567, "epoch": 0.09183977040057399, "grad_norm": 0.998933732509613, "learning_rate": 1e-06, "loss": 0.0584, "step": 984 }, { "clip_ratio/high_max": 0.002503044313925784, "clip_ratio/high_mean": 0.0009758497963048285, "clip_ratio/low_mean": 0.0015415022608067375, "clip_ratio/low_min": 0.00011525819900271017, "clip_ratio/region_mean": 0.002517351989808958, "epoch": 0.09193310350057458, "grad_norm": 0.12569668889045715, "learning_rate": 1e-06, "loss": 0.0756, "step": 985 }, { "clip_ratio/high_max": 0.002325701185327489, "clip_ratio/high_mean": 0.0010083155830216128, "clip_ratio/low_mean": 0.0014818382514931727, "clip_ratio/low_min": 0.00012115579193050507, "clip_ratio/region_mean": 0.0024901538345147856, "epoch": 0.09202643660057516, "grad_norm": 0.1126972883939743, "learning_rate": 1e-06, "loss": 0.0079, "step": 986 }, { "clip_ratio/high_max": 0.002616191588458605, "clip_ratio/high_mean": 0.0010832682146428851, "clip_ratio/low_mean": 0.001513116068963427, "clip_ratio/low_min": 0.00022573885507881641, "clip_ratio/region_mean": 0.00259638435090892, "epoch": 0.09211976970057575, "grad_norm": 0.6002849340438843, "learning_rate": 1e-06, "loss": 0.0282, "step": 987 }, { "clip_ratio/high_max": 0.002155070767912548, "clip_ratio/high_mean": 0.0009797469447221374, "clip_ratio/low_mean": 0.00128266272804467, "clip_ratio/low_min": 3.6421747608983424e-05, "clip_ratio/region_mean": 0.002262409638206009, "epoch": 0.09221310280057633, "grad_norm": 0.31428274512290955, "learning_rate": 1e-06, "loss": 0.016, "step": 988 }, { "clip_ratio/high_max": 0.0022386796335922554, "clip_ratio/high_mean": 0.0009786887821974233, "clip_ratio/low_mean": 0.0012545868448796682, "clip_ratio/low_min": 7.038202784315217e-05, "clip_ratio/region_mean": 0.002233275656180922, "epoch": 0.09230643590057691, "grad_norm": 0.20584438741207123, "learning_rate": 1e-06, "loss": 0.0209, "step": 989 }, { "clip_ratio/high_max": 0.0023859011635067873, "clip_ratio/high_mean": 0.0010055982675112318, "clip_ratio/low_mean": 0.0012287222143640975, "clip_ratio/low_min": 5.5242478993022814e-05, "clip_ratio/region_mean": 0.0022343204545904882, "epoch": 0.0923997690005775, "grad_norm": 0.11135967075824738, "learning_rate": 1e-06, "loss": 0.039, "step": 990 }, { "clip_ratio/high_max": 0.0025239391688955948, "clip_ratio/high_mean": 0.0011136974571854807, "clip_ratio/low_mean": 0.0014366712784976698, "clip_ratio/low_min": 6.92541307216743e-05, "clip_ratio/region_mean": 0.0025503687793388963, "epoch": 0.09249310210057808, "grad_norm": 0.103728286921978, "learning_rate": 1e-06, "loss": 0.0186, "step": 991 }, { "clip_ratio/high_max": 0.002382981878326973, "clip_ratio/high_mean": 0.001031460136800888, "clip_ratio/low_mean": 0.0014163679115881678, "clip_ratio/low_min": 0.00012731249626085628, "clip_ratio/region_mean": 0.0024478280611219816, "epoch": 0.09258643520057866, "grad_norm": 0.24044102430343628, "learning_rate": 1e-06, "loss": 0.0198, "step": 992 }, { "clip_ratio/high_max": 0.0026172061407123692, "clip_ratio/high_mean": 0.0009421429385838564, "clip_ratio/low_mean": 0.001335981534793973, "clip_ratio/low_min": 0.00010563249452388845, "clip_ratio/region_mean": 0.002278124484291766, "epoch": 0.09267976830057925, "grad_norm": 0.10154741257429123, "learning_rate": 1e-06, "loss": 0.007, "step": 993 }, { "clip_ratio/high_max": 0.0028142588707851246, "clip_ratio/high_mean": 0.00115209560317453, "clip_ratio/low_mean": 0.0011584869298530975, "clip_ratio/low_min": 9.399178816238418e-05, "clip_ratio/region_mean": 0.0023105825457605533, "epoch": 0.09277310140057983, "grad_norm": 0.1097271591424942, "learning_rate": 1e-06, "loss": -0.0076, "step": 994 }, { "clip_ratio/high_max": 0.00263357119547436, "clip_ratio/high_mean": 0.0010795430353027768, "clip_ratio/low_mean": 0.0012707334881270071, "clip_ratio/low_min": 0.00012705187509709504, "clip_ratio/region_mean": 0.002350276510696858, "epoch": 0.09286643450058042, "grad_norm": 0.12392475455999374, "learning_rate": 1e-06, "loss": 0.0032, "step": 995 }, { "clip_ratio/high_max": 0.0021766968857264146, "clip_ratio/high_mean": 0.0010299709501850884, "clip_ratio/low_mean": 0.0013107002450851724, "clip_ratio/low_min": 7.586887659272179e-05, "clip_ratio/region_mean": 0.0023406712061841972, "epoch": 0.092959767600581, "grad_norm": 0.1134057343006134, "learning_rate": 1e-06, "loss": 0.0385, "step": 996 }, { "clip_ratio/high_max": 0.002529985169530846, "clip_ratio/high_mean": 0.001061465440216125, "clip_ratio/low_mean": 0.0014184961328282952, "clip_ratio/low_min": 2.5298522814409807e-05, "clip_ratio/region_mean": 0.0024799615057418123, "epoch": 0.09305310070058158, "grad_norm": 1.5392310619354248, "learning_rate": 1e-06, "loss": 0.016, "step": 997 }, { "clip_ratio/high_max": 0.0022402687682188116, "clip_ratio/high_mean": 0.0009063351972145028, "clip_ratio/low_mean": 0.0013951569526398089, "clip_ratio/low_min": 0.00018309052393306047, "clip_ratio/region_mean": 0.0023014921171125025, "epoch": 0.09314643380058217, "grad_norm": 0.2566203474998474, "learning_rate": 1e-06, "loss": 0.0497, "step": 998 }, { "clip_ratio/high_max": 0.0029917960782768205, "clip_ratio/high_mean": 0.0011093155371781904, "clip_ratio/low_mean": 0.0015334379640989937, "clip_ratio/low_min": 0.0001135174334194744, "clip_ratio/region_mean": 0.002642753519467078, "epoch": 0.09323976690058275, "grad_norm": 0.4379431903362274, "learning_rate": 1e-06, "loss": 0.0308, "step": 999 }, { "clip_ratio/high_max": 0.002529861143557355, "clip_ratio/high_mean": 0.0010291887756466167, "clip_ratio/low_mean": 0.0011212038825760828, "clip_ratio/low_min": 7.376048597507179e-05, "clip_ratio/region_mean": 0.0021503927127923816, "epoch": 0.09333310000058333, "grad_norm": 0.10886456072330475, "learning_rate": 1e-06, "loss": 0.0256, "step": 1000 }, { "clip_ratio/high_max": 0.00273510672559496, "clip_ratio/high_mean": 0.0010997926201525843, "clip_ratio/low_mean": 0.0014155381613818463, "clip_ratio/low_min": 0.00010484305948921246, "clip_ratio/region_mean": 0.0025153306996799074, "epoch": 0.09342643310058392, "grad_norm": 43.277496337890625, "learning_rate": 1e-06, "loss": 0.0458, "step": 1001 }, { "clip_ratio/high_max": 0.0024636176676722243, "clip_ratio/high_mean": 0.00110682215745328, "clip_ratio/low_mean": 0.001600837513251463, "clip_ratio/low_min": 0.00012456223976187175, "clip_ratio/region_mean": 0.002707659667066764, "epoch": 0.0935197662005845, "grad_norm": 0.9413765072822571, "learning_rate": 1e-06, "loss": 0.0069, "step": 1002 }, { "clip_ratio/high_max": 0.002407715051958803, "clip_ratio/high_mean": 0.0010900501874857582, "clip_ratio/low_mean": 0.0015839362204133067, "clip_ratio/low_min": 0.00021959385594527703, "clip_ratio/region_mean": 0.002673986404261086, "epoch": 0.09361309930058508, "grad_norm": 0.10675783455371857, "learning_rate": 1e-06, "loss": 0.0268, "step": 1003 }, { "clip_ratio/high_max": 0.002793648098304402, "clip_ratio/high_mean": 0.0010685122288123239, "clip_ratio/low_mean": 0.0017600591927475762, "clip_ratio/low_min": 0.00014434717922995333, "clip_ratio/region_mean": 0.0028285713924560696, "epoch": 0.09370643240058567, "grad_norm": 0.10406289994716644, "learning_rate": 1e-06, "loss": 0.0586, "step": 1004 }, { "clip_ratio/high_max": 0.0024772695105639286, "clip_ratio/high_mean": 0.001065781765646534, "clip_ratio/low_mean": 0.0014227735082386062, "clip_ratio/low_min": 0.00032432557964057196, "clip_ratio/region_mean": 0.0024885552484192885, "epoch": 0.09379976550058625, "grad_norm": 0.11525371670722961, "learning_rate": 1e-06, "loss": 0.0469, "step": 1005 }, { "clip_ratio/high_max": 0.0025516322639305145, "clip_ratio/high_mean": 0.0011065759535995312, "clip_ratio/low_mean": 0.001271146484214114, "clip_ratio/low_min": 0.00014019234367879108, "clip_ratio/region_mean": 0.002377722419623751, "epoch": 0.09389309860058684, "grad_norm": 0.11251242458820343, "learning_rate": 1e-06, "loss": -0.0035, "step": 1006 }, { "clip_ratio/high_max": 0.002637171965034213, "clip_ratio/high_mean": 0.0011244721918046707, "clip_ratio/low_mean": 0.0015851949101488572, "clip_ratio/low_min": 0.0001297859453188721, "clip_ratio/region_mean": 0.0027096670819446445, "epoch": 0.09398643170058742, "grad_norm": 0.15932558476924896, "learning_rate": 1e-06, "loss": 0.0122, "step": 1007 }, { "clip_ratio/high_max": 0.0025412279428564943, "clip_ratio/high_mean": 0.001115857761760708, "clip_ratio/low_mean": 0.001457088717870647, "clip_ratio/low_min": 5.219623926677741e-05, "clip_ratio/region_mean": 0.0025729465050972067, "epoch": 0.094079764800588, "grad_norm": 0.1427977830171585, "learning_rate": 1e-06, "loss": 0.0375, "step": 1008 }, { "clip_ratio/high_max": 0.0024408898389083333, "clip_ratio/high_mean": 0.0010944160057988483, "clip_ratio/low_mean": 0.0013344156905077398, "clip_ratio/low_min": 8.224411249102559e-05, "clip_ratio/region_mean": 0.0024288316490128636, "epoch": 0.09417309790058859, "grad_norm": 0.12036091089248657, "learning_rate": 1e-06, "loss": -0.0247, "step": 1009 }, { "clip_ratio/high_max": 0.0027292422091704793, "clip_ratio/high_mean": 0.0012204352824483067, "clip_ratio/low_mean": 0.001409094922564691, "clip_ratio/low_min": 0.00027870587655343115, "clip_ratio/region_mean": 0.0026295301868231036, "epoch": 0.09426643100058917, "grad_norm": 1.0272916555404663, "learning_rate": 1e-06, "loss": -0.0266, "step": 1010 }, { "clip_ratio/high_max": 0.0021359871607273817, "clip_ratio/high_mean": 0.0009000364843814168, "clip_ratio/low_mean": 0.0014012872234161478, "clip_ratio/low_min": 7.746000665065367e-05, "clip_ratio/region_mean": 0.002301323736901395, "epoch": 0.09435976410058974, "grad_norm": 0.10471700131893158, "learning_rate": 1e-06, "loss": 0.0078, "step": 1011 }, { "clip_ratio/high_max": 0.002750959429249633, "clip_ratio/high_mean": 0.0010999839396390598, "clip_ratio/low_mean": 0.0016230285182246007, "clip_ratio/low_min": 4.658384932554327e-05, "clip_ratio/region_mean": 0.002723012526985258, "epoch": 0.09445309720059034, "grad_norm": 0.15478749573230743, "learning_rate": 1e-06, "loss": 0.0206, "step": 1012 }, { "clip_ratio/high_max": 0.0022105910247773863, "clip_ratio/high_mean": 0.0009340969882032368, "clip_ratio/low_mean": 0.0017193727508129086, "clip_ratio/low_min": 0.00027211768792767543, "clip_ratio/region_mean": 0.0026534697244642302, "epoch": 0.09454643030059091, "grad_norm": 0.21624431014060974, "learning_rate": 1e-06, "loss": 0.0804, "step": 1013 }, { "clip_ratio/high_max": 0.002289614134497242, "clip_ratio/high_mean": 0.00099690524439211, "clip_ratio/low_mean": 0.0016533989928575465, "clip_ratio/low_min": 0.0002077474164252635, "clip_ratio/region_mean": 0.0026503042681724764, "epoch": 0.09463976340059149, "grad_norm": 0.15156012773513794, "learning_rate": 1e-06, "loss": 0.0416, "step": 1014 }, { "clip_ratio/high_max": 0.00254740835953271, "clip_ratio/high_mean": 0.001091957510652719, "clip_ratio/low_mean": 0.0013925210241723107, "clip_ratio/low_min": 7.374403958237963e-05, "clip_ratio/region_mean": 0.002484478522092104, "epoch": 0.09473309650059208, "grad_norm": 0.36712679266929626, "learning_rate": 1e-06, "loss": 0.0462, "step": 1015 }, { "clip_ratio/high_max": 0.002153635083232075, "clip_ratio/high_mean": 0.000936221453230246, "clip_ratio/low_mean": 0.001566298284160439, "clip_ratio/low_min": 0.0002341756844543852, "clip_ratio/region_mean": 0.0025025196955539286, "epoch": 0.09482642960059266, "grad_norm": 0.19550590217113495, "learning_rate": 1e-06, "loss": 0.0636, "step": 1016 }, { "clip_ratio/high_max": 0.0023482123142457567, "clip_ratio/high_mean": 0.0009431124162802007, "clip_ratio/low_mean": 0.0014192474154697265, "clip_ratio/low_min": 0.00010341357301513199, "clip_ratio/region_mean": 0.002362359802646097, "epoch": 0.09491976270059325, "grad_norm": 0.23598073422908783, "learning_rate": 1e-06, "loss": 0.0211, "step": 1017 }, { "clip_ratio/high_max": 0.0027490474603837356, "clip_ratio/high_mean": 0.0010602396905596834, "clip_ratio/low_mean": 0.001699717657174915, "clip_ratio/low_min": 0.00023243802570505068, "clip_ratio/region_mean": 0.002759957358648535, "epoch": 0.09501309580059383, "grad_norm": 0.7179739475250244, "learning_rate": 1e-06, "loss": 0.0415, "step": 1018 }, { "clip_ratio/high_max": 0.002744025732681621, "clip_ratio/high_mean": 0.001063942392647732, "clip_ratio/low_mean": 0.0014018245601619128, "clip_ratio/low_min": 0.00014277450191002572, "clip_ratio/region_mean": 0.0024657669055159204, "epoch": 0.09510642890059441, "grad_norm": 0.1302684247493744, "learning_rate": 1e-06, "loss": 0.0016, "step": 1019 }, { "clip_ratio/high_max": 0.0024234028824139386, "clip_ratio/high_mean": 0.0010589727171463892, "clip_ratio/low_mean": 0.0016759285281295888, "clip_ratio/low_min": 5.259621684672311e-05, "clip_ratio/region_mean": 0.0027349011943442747, "epoch": 0.095199762000595, "grad_norm": 0.20395095646381378, "learning_rate": 1e-06, "loss": 0.0386, "step": 1020 }, { "clip_ratio/high_max": 0.0026095060384250246, "clip_ratio/high_mean": 0.0009672060405137017, "clip_ratio/low_mean": 0.0015556010039290413, "clip_ratio/low_min": 3.430506694712676e-05, "clip_ratio/region_mean": 0.0025228070953744464, "epoch": 0.09529309510059558, "grad_norm": 0.1260862648487091, "learning_rate": 1e-06, "loss": 0.0377, "step": 1021 }, { "clip_ratio/high_max": 0.0027780027958215214, "clip_ratio/high_mean": 0.0011060325232392643, "clip_ratio/low_mean": 0.0015691542794229463, "clip_ratio/low_min": 0.00020678279452113202, "clip_ratio/region_mean": 0.002675186849955935, "epoch": 0.09538642820059616, "grad_norm": 0.1276027113199234, "learning_rate": 1e-06, "loss": 0.0177, "step": 1022 }, { "clip_ratio/high_max": 0.0025821499148150906, "clip_ratio/high_mean": 0.0011101393720309716, "clip_ratio/low_mean": 0.0013182413022150286, "clip_ratio/low_min": 0.0001094849712899304, "clip_ratio/region_mean": 0.0024283806851599365, "epoch": 0.09547976130059675, "grad_norm": 1.5559098720550537, "learning_rate": 1e-06, "loss": 0.0159, "step": 1023 }, { "clip_ratio/high_max": 0.0022457818704424426, "clip_ratio/high_mean": 0.0010378893275628798, "clip_ratio/low_mean": 0.0014103770954534411, "clip_ratio/low_min": 8.981925384432543e-05, "clip_ratio/region_mean": 0.0024482663720846176, "epoch": 0.09557309440059733, "grad_norm": 0.10609094053506851, "learning_rate": 1e-06, "loss": 0.0027, "step": 1024 }, { "clip_ratio/high_max": 0.0028740140041918494, "clip_ratio/high_mean": 0.0011669460982375313, "clip_ratio/low_mean": 0.0016561232987442054, "clip_ratio/low_min": 0.0002337534406251507, "clip_ratio/region_mean": 0.002823069429723546, "epoch": 0.09566642750059791, "grad_norm": 0.17098350822925568, "learning_rate": 1e-06, "loss": 0.0291, "step": 1025 }, { "clip_ratio/high_max": 0.0022569885732082184, "clip_ratio/high_mean": 0.0009531265550322132, "clip_ratio/low_mean": 0.001427520764991641, "clip_ratio/low_min": 0.0001411345147062093, "clip_ratio/region_mean": 0.00238064733275678, "epoch": 0.0957597606005985, "grad_norm": 0.11011157184839249, "learning_rate": 1e-06, "loss": 0.0316, "step": 1026 }, { "clip_ratio/high_max": 0.0026150273261009715, "clip_ratio/high_mean": 0.0010006312004406936, "clip_ratio/low_mean": 0.0014856450470688287, "clip_ratio/low_min": 0.0001168062153737992, "clip_ratio/region_mean": 0.0024862762875272892, "epoch": 0.09585309370059908, "grad_norm": 0.7985674142837524, "learning_rate": 1e-06, "loss": 0.0138, "step": 1027 }, { "clip_ratio/high_max": 0.0026076947142428253, "clip_ratio/high_mean": 0.0010348516934755025, "clip_ratio/low_mean": 0.001373599618091248, "clip_ratio/low_min": 5.559255259868223e-05, "clip_ratio/region_mean": 0.0024084513643174432, "epoch": 0.09594642680059967, "grad_norm": 0.12488450109958649, "learning_rate": 1e-06, "loss": -0.0003, "step": 1028 }, { "clip_ratio/high_max": 0.0024484193927492015, "clip_ratio/high_mean": 0.0010525511352170724, "clip_ratio/low_mean": 0.0017963830769076594, "clip_ratio/low_min": 0.0001597672871866962, "clip_ratio/region_mean": 0.002848934345820453, "epoch": 0.09603975990060025, "grad_norm": 0.16543079912662506, "learning_rate": 1e-06, "loss": 0.0302, "step": 1029 }, { "clip_ratio/high_max": 0.002799492249323521, "clip_ratio/high_mean": 0.0011888555345649365, "clip_ratio/low_mean": 0.0010550299421083764, "clip_ratio/low_min": 2.1349958842620254e-05, "clip_ratio/region_mean": 0.002243885443022009, "epoch": 0.09613309300060083, "grad_norm": 0.11115060746669769, "learning_rate": 1e-06, "loss": -0.0416, "step": 1030 }, { "clip_ratio/high_max": 0.002123236368788639, "clip_ratio/high_mean": 0.0009793448716663988, "clip_ratio/low_mean": 0.0012815610425604973, "clip_ratio/low_min": 8.41796818349394e-05, "clip_ratio/region_mean": 0.002260905850562267, "epoch": 0.09622642610060142, "grad_norm": 0.11418361961841583, "learning_rate": 1e-06, "loss": -0.0207, "step": 1031 }, { "clip_ratio/high_max": 0.0026167353498749435, "clip_ratio/high_mean": 0.0011643699035630561, "clip_ratio/low_mean": 0.0014643017857451923, "clip_ratio/low_min": 8.175284892786294e-05, "clip_ratio/region_mean": 0.0026286716529284604, "epoch": 0.096319759200602, "grad_norm": 6.70403528213501, "learning_rate": 1e-06, "loss": 0.0167, "step": 1032 }, { "clip_ratio/high_max": 0.0023788318503648043, "clip_ratio/high_mean": 0.001081062542652944, "clip_ratio/low_mean": 0.001619060025404906, "clip_ratio/low_min": 9.49936656979844e-05, "clip_ratio/region_mean": 0.002700122509850189, "epoch": 0.09641309230060258, "grad_norm": 0.11737395823001862, "learning_rate": 1e-06, "loss": 0.0376, "step": 1033 }, { "clip_ratio/high_max": 0.0024865820232662372, "clip_ratio/high_mean": 0.0010667846618162002, "clip_ratio/low_mean": 0.0015340850986831356, "clip_ratio/low_min": 0.00015708232058386784, "clip_ratio/region_mean": 0.0026008698259829544, "epoch": 0.09650642540060317, "grad_norm": 0.11282369494438171, "learning_rate": 1e-06, "loss": 0.011, "step": 1034 }, { "clip_ratio/high_max": 0.0024976714412332512, "clip_ratio/high_mean": 0.0011453477200120687, "clip_ratio/low_mean": 0.001466803752919077, "clip_ratio/low_min": 9.28330046008341e-05, "clip_ratio/region_mean": 0.0026121515038539656, "epoch": 0.09659975850060375, "grad_norm": 2.127736806869507, "learning_rate": 1e-06, "loss": 0.0361, "step": 1035 }, { "clip_ratio/high_max": 0.002254596503917128, "clip_ratio/high_mean": 0.0010046278730442282, "clip_ratio/low_mean": 0.001678002932749223, "clip_ratio/low_min": 0.0002276703821735282, "clip_ratio/region_mean": 0.0026826307584997267, "epoch": 0.09669309160060433, "grad_norm": 0.1636512130498886, "learning_rate": 1e-06, "loss": 0.0349, "step": 1036 }, { "clip_ratio/high_max": 0.0026917438226519153, "clip_ratio/high_mean": 0.0010972836971632205, "clip_ratio/low_mean": 0.0012886290951428236, "clip_ratio/low_min": 8.287899800052401e-05, "clip_ratio/region_mean": 0.002385912826866843, "epoch": 0.09678642470060492, "grad_norm": 0.10557028651237488, "learning_rate": 1e-06, "loss": 0.016, "step": 1037 }, { "clip_ratio/high_max": 0.0021997827006998705, "clip_ratio/high_mean": 0.0009391536532348255, "clip_ratio/low_mean": 0.0017274617857765406, "clip_ratio/low_min": 0.00034551463068055455, "clip_ratio/region_mean": 0.0026666154371923767, "epoch": 0.0968797578006055, "grad_norm": 0.12786470353603363, "learning_rate": 1e-06, "loss": 0.0214, "step": 1038 }, { "clip_ratio/high_max": 0.0024857334574335255, "clip_ratio/high_mean": 0.0009328208161605289, "clip_ratio/low_mean": 0.0014514240938297007, "clip_ratio/low_min": 3.765109431697056e-05, "clip_ratio/region_mean": 0.0023842448863433674, "epoch": 0.09697309090060609, "grad_norm": 0.1199268326163292, "learning_rate": 1e-06, "loss": 0.0579, "step": 1039 }, { "clip_ratio/high_max": 0.0027313846221659333, "clip_ratio/high_mean": 0.001156201626145048, "clip_ratio/low_mean": 0.001556612209242303, "clip_ratio/low_min": 6.832637154730037e-05, "clip_ratio/region_mean": 0.0027128137517138384, "epoch": 0.09706642400060667, "grad_norm": 0.13708259165287018, "learning_rate": 1e-06, "loss": -0.006, "step": 1040 }, { "clip_ratio/high_max": 0.0024884929007384926, "clip_ratio/high_mean": 0.0010645560869306792, "clip_ratio/low_mean": 0.0014780164892727043, "clip_ratio/low_min": 0.00013456806209433125, "clip_ratio/region_mean": 0.002542572532547638, "epoch": 0.09715975710060724, "grad_norm": 0.12088126689195633, "learning_rate": 1e-06, "loss": -0.0005, "step": 1041 }, { "clip_ratio/high_max": 0.002451437816489488, "clip_ratio/high_mean": 0.0010310456145816715, "clip_ratio/low_mean": 0.0017180982686113566, "clip_ratio/low_min": 0.00020637681336665992, "clip_ratio/region_mean": 0.002749143859546166, "epoch": 0.09725309020060784, "grad_norm": 0.12734945118427277, "learning_rate": 1e-06, "loss": 0.0598, "step": 1042 }, { "clip_ratio/high_max": 0.0024859129698597826, "clip_ratio/high_mean": 0.0009966537127183983, "clip_ratio/low_mean": 0.0019490449649310904, "clip_ratio/low_min": 0.00012581245209730696, "clip_ratio/region_mean": 0.002945698710391298, "epoch": 0.09734642330060841, "grad_norm": 3.4777934551239014, "learning_rate": 1e-06, "loss": 0.042, "step": 1043 }, { "clip_ratio/high_max": 0.0023609272102476098, "clip_ratio/high_mean": 0.0010063399295177078, "clip_ratio/low_mean": 0.0016369394943467341, "clip_ratio/low_min": 0.00016572749882470816, "clip_ratio/region_mean": 0.0026432793820276856, "epoch": 0.09743975640060899, "grad_norm": 0.19785448908805847, "learning_rate": 1e-06, "loss": 0.0825, "step": 1044 }, { "clip_ratio/high_max": 0.0019945313106290996, "clip_ratio/high_mean": 0.0008731581419851864, "clip_ratio/low_mean": 0.0016990704716590699, "clip_ratio/low_min": 0.0002243034559796797, "clip_ratio/region_mean": 0.002572228586359415, "epoch": 0.09753308950060958, "grad_norm": 0.13721875846385956, "learning_rate": 1e-06, "loss": 0.052, "step": 1045 }, { "clip_ratio/high_max": 0.0022703743015881628, "clip_ratio/high_mean": 0.0009585947482264601, "clip_ratio/low_mean": 0.0017468062724219635, "clip_ratio/low_min": 0.00012227893603267148, "clip_ratio/region_mean": 0.002705401049752254, "epoch": 0.09762642260061016, "grad_norm": 0.1105123832821846, "learning_rate": 1e-06, "loss": 0.0589, "step": 1046 }, { "clip_ratio/high_max": 0.002762889016594272, "clip_ratio/high_mean": 0.0010930320095212664, "clip_ratio/low_mean": 0.0017191348488267977, "clip_ratio/low_min": 0.0002919521739386255, "clip_ratio/region_mean": 0.002812166785588488, "epoch": 0.09771975570061076, "grad_norm": 3.411590337753296, "learning_rate": 1e-06, "loss": 0.0427, "step": 1047 }, { "clip_ratio/high_max": 0.002408101012406405, "clip_ratio/high_mean": 0.0011082909841206856, "clip_ratio/low_mean": 0.0014730837247043382, "clip_ratio/low_min": 6.010919787513558e-05, "clip_ratio/region_mean": 0.0025813747488427907, "epoch": 0.09781308880061133, "grad_norm": 0.1541268527507782, "learning_rate": 1e-06, "loss": -0.0011, "step": 1048 }, { "clip_ratio/high_max": 0.0025276943488279358, "clip_ratio/high_mean": 0.0010492534638615325, "clip_ratio/low_mean": 0.0015621190177625977, "clip_ratio/low_min": 0.0002017591305047972, "clip_ratio/region_mean": 0.0026113725107279606, "epoch": 0.09790642190061191, "grad_norm": 0.7781962156295776, "learning_rate": 1e-06, "loss": 0.0434, "step": 1049 }, { "clip_ratio/high_max": 0.0020950286561856046, "clip_ratio/high_mean": 0.0010302228583896067, "clip_ratio/low_mean": 0.001510117119323695, "clip_ratio/low_min": 0.0001770286335158744, "clip_ratio/region_mean": 0.0025403399995411746, "epoch": 0.0979997550006125, "grad_norm": 1.9998055696487427, "learning_rate": 1e-06, "loss": 0.029, "step": 1050 }, { "clip_ratio/high_max": 0.0023859030116000213, "clip_ratio/high_mean": 0.0011139920188725227, "clip_ratio/low_mean": 0.001309117658820469, "clip_ratio/low_min": 7.262455801537726e-05, "clip_ratio/region_mean": 0.002423109697701875, "epoch": 0.09809308810061308, "grad_norm": 0.10248728841543198, "learning_rate": 1e-06, "loss": -0.0248, "step": 1051 }, { "clip_ratio/high_max": 0.002476397618011106, "clip_ratio/high_mean": 0.0010308366472600028, "clip_ratio/low_mean": 0.0016942097463470418, "clip_ratio/low_min": 0.00016970499382296111, "clip_ratio/region_mean": 0.0027250464117969386, "epoch": 0.09818642120061366, "grad_norm": 0.8370645642280579, "learning_rate": 1e-06, "loss": 0.0219, "step": 1052 }, { "clip_ratio/high_max": 0.002790626353089465, "clip_ratio/high_mean": 0.0010914618642345886, "clip_ratio/low_mean": 0.0014964292495278642, "clip_ratio/low_min": 0.00016038402463891543, "clip_ratio/region_mean": 0.0025878911474137567, "epoch": 0.09827975430061425, "grad_norm": 0.12005890160799026, "learning_rate": 1e-06, "loss": 0.0197, "step": 1053 }, { "clip_ratio/high_max": 0.002567402145359665, "clip_ratio/high_mean": 0.0010272291983710602, "clip_ratio/low_mean": 0.0017165825884148944, "clip_ratio/low_min": 0.00010195728646067437, "clip_ratio/region_mean": 0.0027438118268037215, "epoch": 0.09837308740061483, "grad_norm": 0.3239002823829651, "learning_rate": 1e-06, "loss": 0.0266, "step": 1054 }, { "clip_ratio/high_max": 0.002806235548632685, "clip_ratio/high_mean": 0.0011477070383989485, "clip_ratio/low_mean": 0.0015595470067637507, "clip_ratio/low_min": 0.00023611872438777937, "clip_ratio/region_mean": 0.002707254076085519, "epoch": 0.09846642050061541, "grad_norm": 0.11918503791093826, "learning_rate": 1e-06, "loss": 0.0322, "step": 1055 }, { "clip_ratio/high_max": 0.0020969543620594777, "clip_ratio/high_mean": 0.0009565349537297152, "clip_ratio/low_mean": 0.0016179051890503615, "clip_ratio/low_min": 0.000247909571044147, "clip_ratio/region_mean": 0.002574440120952204, "epoch": 0.098559753600616, "grad_norm": 0.34376922249794006, "learning_rate": 1e-06, "loss": 0.0296, "step": 1056 }, { "clip_ratio/high_max": 0.0026545548280410003, "clip_ratio/high_mean": 0.001156085809270735, "clip_ratio/low_mean": 0.0009134982683463022, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002069584108539857, "completions/clipped_ratio": 0.011631556919642905, "completions/max_length": 4096.0, "completions/max_terminated_length": 4095.0, "completions/mean_length": 600.4644775390625, "completions/mean_terminated_length": 559.3274536132812, "completions/min_length": 4.0, "completions/min_terminated_length": 4.0, "epoch": 0.09865308670061658, "grad_norm": 19376.447265625, "learning_rate": 1e-06, "loss": 543.0246, "num_tokens": 812901776.0, "reward": 0.581551730632782, "reward_std": 0.1905154287815094, "rewards/simpleverify_reward/mean": 0.5815516710281372, "rewards/simpleverify_reward/std": 0.4933067262172699, "step": 1057 }, { "clip_ratio/high_max": 0.0027160520912730135, "clip_ratio/high_mean": 0.001073583527613664, "clip_ratio/low_mean": 0.0010691176757973153, "clip_ratio/low_min": 7.919052404758986e-05, "clip_ratio/region_mean": 0.002142701174307149, "epoch": 0.09874641980061717, "grad_norm": 66346643456.0, "learning_rate": 1e-06, "loss": 10626219.0, "step": 1058 }, { "clip_ratio/high_max": 0.0030447074823314324, "clip_ratio/high_mean": 0.0011915833711100277, "clip_ratio/low_mean": 0.0009738258686411427, "clip_ratio/low_min": 4.312604050937807e-05, "clip_ratio/region_mean": 0.0021654092997778207, "epoch": 0.09883975290061775, "grad_norm": 89.51912689208984, "learning_rate": 1e-06, "loss": 0.1202, "step": 1059 }, { "clip_ratio/high_max": 0.0026044099940918386, "clip_ratio/high_mean": 0.0011421316157793626, "clip_ratio/low_mean": 0.0010248394537484273, "clip_ratio/low_min": 0.00012890936523035634, "clip_ratio/region_mean": 0.0021669710549758747, "epoch": 0.09893308600061833, "grad_norm": 70.45943450927734, "learning_rate": 1e-06, "loss": 0.0545, "step": 1060 }, { "clip_ratio/high_max": 0.0026771254342747852, "clip_ratio/high_mean": 0.0011095823210780509, "clip_ratio/low_mean": 0.001226841739480733, "clip_ratio/low_min": 0.00021057507183286361, "clip_ratio/region_mean": 0.0023364240551018156, "epoch": 0.09902641910061892, "grad_norm": 158818.5, "learning_rate": 1e-06, "loss": 18.1625, "step": 1061 }, { "clip_ratio/high_max": 0.0028003475308651105, "clip_ratio/high_mean": 0.0011146185552206589, "clip_ratio/low_mean": 0.0010491998546058312, "clip_ratio/low_min": 0.00010102821215696167, "clip_ratio/region_mean": 0.0021638184334733523, "epoch": 0.0991197522006195, "grad_norm": 254.01470947265625, "learning_rate": 1e-06, "loss": 0.7729, "step": 1062 }, { "clip_ratio/high_max": 0.0029343472051550634, "clip_ratio/high_mean": 0.001039605114783626, "clip_ratio/low_mean": 0.0010474810605956009, "clip_ratio/low_min": 2.8839199330832344e-05, "clip_ratio/region_mean": 0.0020870862063020468, "epoch": 0.09921308530062008, "grad_norm": 91345.3515625, "learning_rate": 1e-06, "loss": 39.5631, "step": 1063 }, { "clip_ratio/high_max": 0.0027216048329137266, "clip_ratio/high_mean": 0.0011726452285074629, "clip_ratio/low_mean": 0.0010962007545458619, "clip_ratio/low_min": 8.783598241279833e-05, "clip_ratio/region_mean": 0.002268845957587473, "epoch": 0.09930641840062067, "grad_norm": 0.4986717402935028, "learning_rate": 1e-06, "loss": 0.0023, "step": 1064 }, { "clip_ratio/high_max": 0.0031283964417525567, "clip_ratio/high_mean": 0.001216457396367332, "clip_ratio/low_mean": 0.0012029144691041438, "clip_ratio/low_min": 9.832020987232681e-05, "clip_ratio/region_mean": 0.0024193718636524864, "epoch": 0.09939975150062125, "grad_norm": 11.444372177124023, "learning_rate": 1e-06, "loss": 0.0429, "step": 1065 }, { "clip_ratio/high_max": 0.0027087220732937567, "clip_ratio/high_mean": 0.0011457652872195467, "clip_ratio/low_mean": 0.001239418390468927, "clip_ratio/low_min": 0.00011277338217041688, "clip_ratio/region_mean": 0.0023851836886024103, "epoch": 0.09949308460062183, "grad_norm": 0.13417837023735046, "learning_rate": 1e-06, "loss": 0.0297, "step": 1066 }, { "clip_ratio/high_max": 0.0024075435285340063, "clip_ratio/high_mean": 0.0010678663129510824, "clip_ratio/low_mean": 0.0011138090485474095, "clip_ratio/low_min": 7.473115692846477e-05, "clip_ratio/region_mean": 0.0021816753142047673, "epoch": 0.09958641770062242, "grad_norm": 0.11461979895830154, "learning_rate": 1e-06, "loss": 0.0398, "step": 1067 }, { "clip_ratio/high_max": 0.0030587448563892394, "clip_ratio/high_mean": 0.001280219843465602, "clip_ratio/low_mean": 0.00138875340053346, "clip_ratio/low_min": 0.00014916058535163756, "clip_ratio/region_mean": 0.0026689731821534224, "epoch": 0.099679750800623, "grad_norm": 82.8593978881836, "learning_rate": 1e-06, "loss": 0.0404, "step": 1068 }, { "clip_ratio/high_max": 0.0026132461498491466, "clip_ratio/high_mean": 0.0010389417220721953, "clip_ratio/low_mean": 0.0012845349720009835, "clip_ratio/low_min": 9.349343963549472e-05, "clip_ratio/region_mean": 0.0023234767286339775, "epoch": 0.09977308390062359, "grad_norm": 63.38811111450195, "learning_rate": 1e-06, "loss": 0.0582, "step": 1069 }, { "clip_ratio/high_max": 0.0028535951023513917, "clip_ratio/high_mean": 0.001180681476398604, "clip_ratio/low_mean": 0.0015972310829965863, "clip_ratio/low_min": 0.00032978928356897086, "clip_ratio/region_mean": 0.0027779125375673175, "epoch": 0.09986641700062417, "grad_norm": 0.11527776718139648, "learning_rate": 1e-06, "loss": 0.0521, "step": 1070 }, { "clip_ratio/high_max": 0.0027820640680147335, "clip_ratio/high_mean": 0.0011365437912900234, "clip_ratio/low_mean": 0.001479933565860847, "clip_ratio/low_min": 0.00015167714263952803, "clip_ratio/region_mean": 0.0026164773662458174, "epoch": 0.09995975010062474, "grad_norm": 4.616727352142334, "learning_rate": 1e-06, "loss": 0.0673, "step": 1071 }, { "clip_ratio/high_max": 0.002767252881312743, "clip_ratio/high_mean": 0.0011979056762356777, "clip_ratio/low_mean": 0.0013403817683865782, "clip_ratio/low_min": 3.452340661169728e-05, "clip_ratio/region_mean": 0.002538287459174171, "epoch": 0.10005308320062534, "grad_norm": 0.4958588182926178, "learning_rate": 1e-06, "loss": 0.0332, "step": 1072 }, { "clip_ratio/high_max": 0.0024288301792694256, "clip_ratio/high_mean": 0.0010714032978285104, "clip_ratio/low_mean": 0.0016034773434512317, "clip_ratio/low_min": 0.00019295911351946415, "clip_ratio/region_mean": 0.00267488067765953, "epoch": 0.10014641630062592, "grad_norm": 0.11871877312660217, "learning_rate": 1e-06, "loss": 0.026, "step": 1073 }, { "clip_ratio/high_max": 0.0027953477183473296, "clip_ratio/high_mean": 0.0011518957808220875, "clip_ratio/low_mean": 0.0014157051155052613, "clip_ratio/low_min": 0.00021085304251755588, "clip_ratio/region_mean": 0.0025676008881418966, "epoch": 0.1002397494006265, "grad_norm": 0.1406594067811966, "learning_rate": 1e-06, "loss": 0.0442, "step": 1074 }, { "clip_ratio/high_max": 0.0030373085974133573, "clip_ratio/high_mean": 0.001308071288804058, "clip_ratio/low_mean": 0.0016581290401518345, "clip_ratio/low_min": 0.0002750885123532498, "clip_ratio/region_mean": 0.0029662003071280196, "epoch": 0.10033308250062709, "grad_norm": 0.12567895650863647, "learning_rate": 1e-06, "loss": 0.0292, "step": 1075 }, { "clip_ratio/high_max": 0.003033902816241607, "clip_ratio/high_mean": 0.0010880818226723932, "clip_ratio/low_mean": 0.0017523367241665255, "clip_ratio/low_min": 0.00022834419814898865, "clip_ratio/region_mean": 0.0028404185868566856, "epoch": 0.10042641560062766, "grad_norm": 8.243338584899902, "learning_rate": 1e-06, "loss": 0.0674, "step": 1076 }, { "clip_ratio/high_max": 0.003005389546160586, "clip_ratio/high_mean": 0.0012947889335919172, "clip_ratio/low_mean": 0.0017762050847522914, "clip_ratio/low_min": 8.96064484550152e-05, "clip_ratio/region_mean": 0.0030709940619999543, "epoch": 0.10051974870062824, "grad_norm": 0.4808621406555176, "learning_rate": 1e-06, "loss": 0.0332, "step": 1077 }, { "clip_ratio/high_max": 0.003207053247024305, "clip_ratio/high_mean": 0.001290653926844243, "clip_ratio/low_mean": 0.0013424466233118437, "clip_ratio/low_min": 0.00015511627316300292, "clip_ratio/region_mean": 0.002633100564708002, "epoch": 0.10061308180062883, "grad_norm": 0.43675824999809265, "learning_rate": 1e-06, "loss": -0.0176, "step": 1078 }, { "clip_ratio/high_max": 0.002586764945590403, "clip_ratio/high_mean": 0.0011011242331733229, "clip_ratio/low_mean": 0.0015010885035735555, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002602212734927889, "epoch": 0.10070641490062941, "grad_norm": 15.26230525970459, "learning_rate": 1e-06, "loss": 0.0389, "step": 1079 }, { "clip_ratio/high_max": 0.0032891732480493374, "clip_ratio/high_mean": 0.0012608786255441373, "clip_ratio/low_mean": 0.0017761425551725551, "clip_ratio/low_min": 0.00014223435573512688, "clip_ratio/region_mean": 0.003037021138879936, "epoch": 0.10079974800063, "grad_norm": 9.487072944641113, "learning_rate": 1e-06, "loss": 0.0118, "step": 1080 }, { "clip_ratio/high_max": 0.002668848777830135, "clip_ratio/high_mean": 0.001214121362863807, "clip_ratio/low_mean": 0.0013416658512142021, "clip_ratio/low_min": 3.760221261472907e-05, "clip_ratio/region_mean": 0.002555787214078009, "epoch": 0.10089308110063058, "grad_norm": 0.14420552551746368, "learning_rate": 1e-06, "loss": -0.0286, "step": 1081 }, { "clip_ratio/high_max": 0.0030078307099756785, "clip_ratio/high_mean": 0.001245437262696214, "clip_ratio/low_mean": 0.0016068250370153692, "clip_ratio/low_min": 0.0001333706150035141, "clip_ratio/region_mean": 0.002852262361557223, "epoch": 0.10098641420063116, "grad_norm": 384.564208984375, "learning_rate": 1e-06, "loss": 0.067, "step": 1082 }, { "clip_ratio/high_max": 0.0027996929820801597, "clip_ratio/high_mean": 0.001112127891246928, "clip_ratio/low_mean": 0.0017498916749900673, "clip_ratio/low_min": 0.0001482537891206448, "clip_ratio/region_mean": 0.0028620195807889104, "epoch": 0.10107974730063175, "grad_norm": 0.15250636637210846, "learning_rate": 1e-06, "loss": 0.0326, "step": 1083 }, { "clip_ratio/high_max": 0.002414898910501506, "clip_ratio/high_mean": 0.00102338167198468, "clip_ratio/low_mean": 0.0014487097869277932, "clip_ratio/low_min": 5.268881068332121e-05, "clip_ratio/region_mean": 0.002472091458912473, "epoch": 0.10117308040063233, "grad_norm": 0.10656650364398956, "learning_rate": 1e-06, "loss": 0.0528, "step": 1084 }, { "clip_ratio/high_max": 0.002807499789923895, "clip_ratio/high_mean": 0.001080316462321207, "clip_ratio/low_mean": 0.0015384740800072905, "clip_ratio/low_min": 0.00013388932711677626, "clip_ratio/region_mean": 0.0026187905386905186, "epoch": 0.10126641350063291, "grad_norm": 0.11980854719877243, "learning_rate": 1e-06, "loss": 0.0461, "step": 1085 }, { "clip_ratio/high_max": 0.0025518119509797543, "clip_ratio/high_mean": 0.0010949425886792596, "clip_ratio/low_mean": 0.001347809306025738, "clip_ratio/low_min": 8.174173126462847e-05, "clip_ratio/region_mean": 0.002442751923808828, "epoch": 0.1013597466006335, "grad_norm": 0.10463058203458786, "learning_rate": 1e-06, "loss": 0.0182, "step": 1086 }, { "clip_ratio/high_max": 0.002697751857340336, "clip_ratio/high_mean": 0.0010930513381026685, "clip_ratio/low_mean": 0.001615939756447915, "clip_ratio/low_min": 0.00015889149381109746, "clip_ratio/region_mean": 0.002708991101826541, "epoch": 0.10145307970063408, "grad_norm": 3.475515604019165, "learning_rate": 1e-06, "loss": 0.0354, "step": 1087 }, { "clip_ratio/high_max": 0.002832560167007614, "clip_ratio/high_mean": 0.0012938145173393423, "clip_ratio/low_mean": 0.0016758460442360956, "clip_ratio/low_min": 0.0003195853187207831, "clip_ratio/region_mean": 0.0029696605852223, "epoch": 0.10154641280063466, "grad_norm": 8.802103042602539, "learning_rate": 1e-06, "loss": 0.718, "step": 1088 }, { "clip_ratio/high_max": 0.0029150086775189266, "clip_ratio/high_mean": 0.0012245245488884393, "clip_ratio/low_mean": 0.0015771983307786286, "clip_ratio/low_min": 0.00011248974260524847, "clip_ratio/region_mean": 0.002801722817821428, "epoch": 0.10163974590063525, "grad_norm": 24.858625411987305, "learning_rate": 1e-06, "loss": 0.0467, "step": 1089 }, { "clip_ratio/high_max": 0.0029586773162009194, "clip_ratio/high_mean": 0.0012060261287842877, "clip_ratio/low_mean": 0.0014964021011110162, "clip_ratio/low_min": 0.00019901075575035065, "clip_ratio/region_mean": 0.002702428311749827, "epoch": 0.10173307900063583, "grad_norm": 0.24916265904903412, "learning_rate": 1e-06, "loss": 0.0032, "step": 1090 }, { "clip_ratio/high_max": 0.002863522953703068, "clip_ratio/high_mean": 0.0011764052433136385, "clip_ratio/low_mean": 0.0016461819650430698, "clip_ratio/low_min": 6.304127418843564e-05, "clip_ratio/region_mean": 0.0028225872592884116, "epoch": 0.10182641210063642, "grad_norm": 0.31088370084762573, "learning_rate": 1e-06, "loss": 0.0063, "step": 1091 }, { "clip_ratio/high_max": 0.002600468993477989, "clip_ratio/high_mean": 0.0010884742841881234, "clip_ratio/low_mean": 0.0014470604583038948, "clip_ratio/low_min": 0.00013499182750820182, "clip_ratio/region_mean": 0.0025355347534059547, "epoch": 0.101919745200637, "grad_norm": 0.3091491460800171, "learning_rate": 1e-06, "loss": 0.0225, "step": 1092 }, { "clip_ratio/high_max": 0.0025902636552928016, "clip_ratio/high_mean": 0.0011193327482033055, "clip_ratio/low_mean": 0.0015518261843681103, "clip_ratio/low_min": 9.310906716564205e-05, "clip_ratio/region_mean": 0.002671158901648596, "epoch": 0.10201307830063758, "grad_norm": 0.11468497663736343, "learning_rate": 1e-06, "loss": 0.0218, "step": 1093 }, { "clip_ratio/high_max": 0.0028588921268237755, "clip_ratio/high_mean": 0.0010601534995657858, "clip_ratio/low_mean": 0.001494357522460632, "clip_ratio/low_min": 0.0001017522627080325, "clip_ratio/region_mean": 0.0025545109892846085, "epoch": 0.10210641140063817, "grad_norm": 0.10773865133523941, "learning_rate": 1e-06, "loss": 0.0226, "step": 1094 }, { "clip_ratio/high_max": 0.0023960199396242388, "clip_ratio/high_mean": 0.0010427312627143692, "clip_ratio/low_mean": 0.0017747869387676474, "clip_ratio/low_min": 0.0001251918583875522, "clip_ratio/region_mean": 0.002817518194206059, "epoch": 0.10219974450063875, "grad_norm": 4.2700042724609375, "learning_rate": 1e-06, "loss": 0.066, "step": 1095 }, { "clip_ratio/high_max": 0.0024806067522149533, "clip_ratio/high_mean": 0.0011294237865513423, "clip_ratio/low_mean": 0.0014996684694779105, "clip_ratio/low_min": 0.00018065202675643377, "clip_ratio/region_mean": 0.0026290922760381363, "epoch": 0.10229307760063933, "grad_norm": 17.473588943481445, "learning_rate": 1e-06, "loss": 0.024, "step": 1096 }, { "clip_ratio/high_max": 0.00234174382785568, "clip_ratio/high_mean": 0.0010099523788085207, "clip_ratio/low_mean": 0.0015370887176686665, "clip_ratio/low_min": 0.00012869787133240607, "clip_ratio/region_mean": 0.0025470411419519223, "epoch": 0.10238641070063992, "grad_norm": 2654.74755859375, "learning_rate": 1e-06, "loss": 0.6887, "step": 1097 }, { "clip_ratio/high_max": 0.00292250463826349, "clip_ratio/high_mean": 0.0012656719081860501, "clip_ratio/low_mean": 0.0014556308160535991, "clip_ratio/low_min": 0.0001274228488910012, "clip_ratio/region_mean": 0.0027213027569814585, "epoch": 0.1024797438006405, "grad_norm": 0.1260678917169571, "learning_rate": 1e-06, "loss": -0.0008, "step": 1098 }, { "clip_ratio/high_max": 0.002370185095060151, "clip_ratio/high_mean": 0.0010993133673764532, "clip_ratio/low_mean": 0.0017103398276958615, "clip_ratio/low_min": 0.00018475731303624343, "clip_ratio/region_mean": 0.002809653196891304, "epoch": 0.10257307690064109, "grad_norm": 0.15316139161586761, "learning_rate": 1e-06, "loss": 0.006, "step": 1099 }, { "clip_ratio/high_max": 0.002844724753231276, "clip_ratio/high_mean": 0.0011724418509402312, "clip_ratio/low_mean": 0.0017135146263171919, "clip_ratio/low_min": 9.519406557956245e-05, "clip_ratio/region_mean": 0.002885956477257423, "epoch": 0.10266641000064167, "grad_norm": 0.5944458842277527, "learning_rate": 1e-06, "loss": 0.0064, "step": 1100 }, { "clip_ratio/high_max": 0.002321340980415698, "clip_ratio/high_mean": 0.0010224503366771387, "clip_ratio/low_mean": 0.0015442519179487135, "clip_ratio/low_min": 4.808463199879043e-05, "clip_ratio/region_mean": 0.002566702227341011, "epoch": 0.10275974310064225, "grad_norm": 0.32268863916397095, "learning_rate": 1e-06, "loss": 0.0417, "step": 1101 }, { "clip_ratio/high_max": 0.0024788510345388204, "clip_ratio/high_mean": 0.0010483164223842323, "clip_ratio/low_mean": 0.0014292606501840055, "clip_ratio/low_min": 0.00012038344266329659, "clip_ratio/region_mean": 0.0024775770434644073, "epoch": 0.10285307620064284, "grad_norm": 0.13296392560005188, "learning_rate": 1e-06, "loss": 0.0215, "step": 1102 }, { "clip_ratio/high_max": 0.0025340823813166935, "clip_ratio/high_mean": 0.000980192637143773, "clip_ratio/low_mean": 0.0014907277763995808, "clip_ratio/low_min": 0.00026823886946658604, "clip_ratio/region_mean": 0.0024709203935344703, "epoch": 0.10294640930064342, "grad_norm": 0.11375102400779724, "learning_rate": 1e-06, "loss": 0.0589, "step": 1103 }, { "clip_ratio/high_max": 0.003034240158740431, "clip_ratio/high_mean": 0.001298970168136293, "clip_ratio/low_mean": 0.001752458370901877, "clip_ratio/low_min": 0.0002350144823139999, "clip_ratio/region_mean": 0.0030514284590026364, "epoch": 0.103039742400644, "grad_norm": 156.73641967773438, "learning_rate": 1e-06, "loss": -0.0042, "step": 1104 }, { "clip_ratio/high_max": 0.0029413750671665184, "clip_ratio/high_mean": 0.0011398695169191342, "clip_ratio/low_mean": 0.001502103445091052, "clip_ratio/low_min": 0.00013782737732981332, "clip_ratio/region_mean": 0.0026419729547342286, "epoch": 0.10313307550064459, "grad_norm": 0.37896502017974854, "learning_rate": 1e-06, "loss": 0.0192, "step": 1105 }, { "clip_ratio/high_max": 0.0026544243446551263, "clip_ratio/high_mean": 0.001177929876575945, "clip_ratio/low_mean": 0.0014402927045011893, "clip_ratio/low_min": 0.00012283551404834725, "clip_ratio/region_mean": 0.0026182225919910707, "epoch": 0.10322640860064516, "grad_norm": 1.1917195320129395, "learning_rate": 1e-06, "loss": 0.0251, "step": 1106 }, { "clip_ratio/high_max": 0.002472171137924306, "clip_ratio/high_mean": 0.0010606614087009802, "clip_ratio/low_mean": 0.0015006561370682903, "clip_ratio/low_min": 4.6068795199971646e-05, "clip_ratio/region_mean": 0.0025613175239413977, "epoch": 0.10331974170064574, "grad_norm": 0.8193889856338501, "learning_rate": 1e-06, "loss": -0.0188, "step": 1107 }, { "clip_ratio/high_max": 0.002694274371606298, "clip_ratio/high_mean": 0.0012016507316729985, "clip_ratio/low_mean": 0.0014590852879337035, "clip_ratio/low_min": 7.493342491216026e-05, "clip_ratio/region_mean": 0.0026607360268826596, "epoch": 0.10341307480064633, "grad_norm": 0.11526229232549667, "learning_rate": 1e-06, "loss": 0.0302, "step": 1108 }, { "clip_ratio/high_max": 0.0029282200121087953, "clip_ratio/high_mean": 0.0011730564910976682, "clip_ratio/low_mean": 0.0015058741664688569, "clip_ratio/low_min": 0.0002026395704888273, "clip_ratio/region_mean": 0.0026789306502905674, "epoch": 0.10350640790064691, "grad_norm": 1.113185167312622, "learning_rate": 1e-06, "loss": 0.0095, "step": 1109 }, { "clip_ratio/high_max": 0.0026138090615859255, "clip_ratio/high_mean": 0.001089922470782767, "clip_ratio/low_mean": 0.001945942571182968, "clip_ratio/low_min": 0.00017403253968950594, "clip_ratio/region_mean": 0.0030358650838024914, "epoch": 0.1035997410006475, "grad_norm": 0.13811784982681274, "learning_rate": 1e-06, "loss": 0.032, "step": 1110 }, { "clip_ratio/high_max": 0.002650943184562493, "clip_ratio/high_mean": 0.0010927115199592663, "clip_ratio/low_mean": 0.0015771086000313517, "clip_ratio/low_min": 3.997352905571461e-05, "clip_ratio/region_mean": 0.0026698200963437557, "epoch": 0.10369307410064808, "grad_norm": 0.2351246327161789, "learning_rate": 1e-06, "loss": 0.0447, "step": 1111 }, { "clip_ratio/high_max": 0.002422716272121761, "clip_ratio/high_mean": 0.0011060118122259155, "clip_ratio/low_mean": 0.0014811184373684227, "clip_ratio/low_min": 0.00022183147939358605, "clip_ratio/region_mean": 0.0025871302641462535, "epoch": 0.10378640720064866, "grad_norm": 0.12535996735095978, "learning_rate": 1e-06, "loss": 0.0197, "step": 1112 }, { "clip_ratio/high_max": 0.0023927312504383735, "clip_ratio/high_mean": 0.0010091246222145855, "clip_ratio/low_mean": 0.0014695629761263262, "clip_ratio/low_min": 0.0001592255212017335, "clip_ratio/region_mean": 0.0024786875874269754, "epoch": 0.10387974030064925, "grad_norm": 0.12993058562278748, "learning_rate": 1e-06, "loss": 0.0345, "step": 1113 }, { "clip_ratio/high_max": 0.0025049443138414063, "clip_ratio/high_mean": 0.0010873453593376325, "clip_ratio/low_mean": 0.0014161132276058197, "clip_ratio/low_min": 0.00022265576717472868, "clip_ratio/region_mean": 0.0025034585560206324, "epoch": 0.10397307340064983, "grad_norm": 198.1874542236328, "learning_rate": 1e-06, "loss": 0.1569, "step": 1114 }, { "clip_ratio/high_max": 0.0025274532235926017, "clip_ratio/high_mean": 0.0010891232195717748, "clip_ratio/low_mean": 0.001649478330364218, "clip_ratio/low_min": 8.891508332453668e-05, "clip_ratio/region_mean": 0.0027386015790398233, "epoch": 0.10406640650065041, "grad_norm": 2.943773031234741, "learning_rate": 1e-06, "loss": 0.0281, "step": 1115 }, { "clip_ratio/high_max": 0.0029280090529937297, "clip_ratio/high_mean": 0.0011059882654080866, "clip_ratio/low_mean": 0.0012266908634046558, "clip_ratio/low_min": 4.638930386136053e-05, "clip_ratio/region_mean": 0.002332679068786092, "epoch": 0.104159739600651, "grad_norm": 0.1100318655371666, "learning_rate": 1e-06, "loss": -0.0026, "step": 1116 }, { "clip_ratio/high_max": 0.0023475680936826393, "clip_ratio/high_mean": 0.0009771306649781764, "clip_ratio/low_mean": 0.0017699179370538332, "clip_ratio/low_min": 0.0002464298340782989, "clip_ratio/region_mean": 0.002747048609307967, "epoch": 0.10425307270065158, "grad_norm": 0.38106921315193176, "learning_rate": 1e-06, "loss": 0.0599, "step": 1117 }, { "clip_ratio/high_max": 0.002319193896255456, "clip_ratio/high_mean": 0.000981407620201935, "clip_ratio/low_mean": 0.0013981730035084183, "clip_ratio/low_min": 5.6276252507814206e-05, "clip_ratio/region_mean": 0.002379580633714795, "epoch": 0.10434640580065216, "grad_norm": 0.1079186275601387, "learning_rate": 1e-06, "loss": 0.0076, "step": 1118 }, { "clip_ratio/high_max": 0.0022938608599361032, "clip_ratio/high_mean": 0.0010277015317115001, "clip_ratio/low_mean": 0.0014022658688190859, "clip_ratio/low_min": 0.00016592146130278707, "clip_ratio/region_mean": 0.002429967345960904, "epoch": 0.10443973890065275, "grad_norm": 0.1319061815738678, "learning_rate": 1e-06, "loss": -0.0142, "step": 1119 }, { "clip_ratio/high_max": 0.00252931920113042, "clip_ratio/high_mean": 0.001000481710434542, "clip_ratio/low_mean": 0.0015538172083324753, "clip_ratio/low_min": 7.660049413971137e-05, "clip_ratio/region_mean": 0.0025542989606037736, "epoch": 0.10453307200065333, "grad_norm": 0.19144785404205322, "learning_rate": 1e-06, "loss": 0.034, "step": 1120 }, { "clip_ratio/high_max": 0.0025256265507778153, "clip_ratio/high_mean": 0.0011509384930832312, "clip_ratio/low_mean": 0.0012117600381316151, "clip_ratio/low_min": 0.00010051626668428071, "clip_ratio/region_mean": 0.0023626985857845284, "epoch": 0.10462640510065392, "grad_norm": 0.10469915717840195, "learning_rate": 1e-06, "loss": -0.0294, "step": 1121 }, { "clip_ratio/high_max": 0.0025710243280627765, "clip_ratio/high_mean": 0.0011006846179952845, "clip_ratio/low_mean": 0.001546981613500975, "clip_ratio/low_min": 0.00013654349822900258, "clip_ratio/region_mean": 0.002647666187840514, "epoch": 0.1047197382006545, "grad_norm": 20701.240234375, "learning_rate": 1e-06, "loss": 8.8434, "step": 1122 }, { "clip_ratio/high_max": 0.0025527682519168593, "clip_ratio/high_mean": 0.0011990619896096177, "clip_ratio/low_mean": 0.0012008287139906315, "clip_ratio/low_min": 5.96671643506852e-05, "clip_ratio/region_mean": 0.0023998907054192387, "epoch": 0.10481307130065508, "grad_norm": 0.1038522720336914, "learning_rate": 1e-06, "loss": -0.0236, "step": 1123 }, { "clip_ratio/high_max": 0.002656278600625228, "clip_ratio/high_mean": 0.0011056520343117882, "clip_ratio/low_mean": 0.0015240982902469113, "clip_ratio/low_min": 0.00014422373169509228, "clip_ratio/region_mean": 0.002629750299092848, "epoch": 0.10490640440065567, "grad_norm": 0.19483205676078796, "learning_rate": 1e-06, "loss": 0.049, "step": 1124 }, { "clip_ratio/high_max": 0.0022810882874182425, "clip_ratio/high_mean": 0.000966720861470094, "clip_ratio/low_mean": 0.001735957139317179, "clip_ratio/low_min": 4.5520849198510405e-05, "clip_ratio/region_mean": 0.0027026780444430187, "epoch": 0.10499973750065625, "grad_norm": 0.32446226477622986, "learning_rate": 1e-06, "loss": 0.0434, "step": 1125 }, { "clip_ratio/high_max": 0.0030878846446285024, "clip_ratio/high_mean": 0.001148663121057325, "clip_ratio/low_mean": 0.001595439069205895, "clip_ratio/low_min": 1.989378324651625e-05, "clip_ratio/region_mean": 0.0027441021884442307, "epoch": 0.10509307060065683, "grad_norm": 5.806354999542236, "learning_rate": 1e-06, "loss": 0.0267, "step": 1126 }, { "clip_ratio/high_max": 0.0023994951916392893, "clip_ratio/high_mean": 0.0010409827846160624, "clip_ratio/low_mean": 0.0014425911049329443, "clip_ratio/low_min": 0.00021041771924501518, "clip_ratio/region_mean": 0.002483573873178102, "epoch": 0.10518640370065742, "grad_norm": 0.12018430233001709, "learning_rate": 1e-06, "loss": 0.0338, "step": 1127 }, { "clip_ratio/high_max": 0.002376298616582062, "clip_ratio/high_mean": 0.001055398657626938, "clip_ratio/low_mean": 0.0015098216863407288, "clip_ratio/low_min": 7.986284163052915e-05, "clip_ratio/region_mean": 0.0025652203112258576, "epoch": 0.105279736800658, "grad_norm": 0.21106520295143127, "learning_rate": 1e-06, "loss": 0.0281, "step": 1128 }, { "clip_ratio/high_max": 0.0025821992894634604, "clip_ratio/high_mean": 0.0010473803413333371, "clip_ratio/low_mean": 0.00155523468129104, "clip_ratio/low_min": 0.00015527075447607785, "clip_ratio/region_mean": 0.002602615022624377, "epoch": 0.10537306990065858, "grad_norm": 27.19087028503418, "learning_rate": 1e-06, "loss": 0.0173, "step": 1129 }, { "clip_ratio/high_max": 0.002762049283774104, "clip_ratio/high_mean": 0.0010838034431799315, "clip_ratio/low_mean": 0.0013598512650787598, "clip_ratio/low_min": 0.00014690825628349558, "clip_ratio/region_mean": 0.002443654702801723, "epoch": 0.10546640300065917, "grad_norm": 1.0987893342971802, "learning_rate": 1e-06, "loss": 0.0023, "step": 1130 }, { "clip_ratio/high_max": 0.002577096442109905, "clip_ratio/high_mean": 0.0010172219954256434, "clip_ratio/low_mean": 0.001542750764201628, "clip_ratio/low_min": 4.602594708558172e-05, "clip_ratio/region_mean": 0.0025599726795917377, "epoch": 0.10555973610065975, "grad_norm": 0.1445363610982895, "learning_rate": 1e-06, "loss": 0.057, "step": 1131 }, { "clip_ratio/high_max": 0.002608786140626762, "clip_ratio/high_mean": 0.0011486086150398478, "clip_ratio/low_mean": 0.0013332708876987454, "clip_ratio/low_min": 7.936724432511255e-05, "clip_ratio/region_mean": 0.0024818794991006143, "epoch": 0.10565306920066034, "grad_norm": 0.12205726653337479, "learning_rate": 1e-06, "loss": 0.0062, "step": 1132 }, { "clip_ratio/high_max": 0.0024981431488413364, "clip_ratio/high_mean": 0.0011118799484393094, "clip_ratio/low_mean": 0.0017443321048631333, "clip_ratio/low_min": 0.00023727012739982456, "clip_ratio/region_mean": 0.0028562120787682943, "epoch": 0.10574640230066092, "grad_norm": 0.11857272684574127, "learning_rate": 1e-06, "loss": 0.0321, "step": 1133 }, { "clip_ratio/high_max": 0.00250393919122871, "clip_ratio/high_mean": 0.0011594168172450736, "clip_ratio/low_mean": 0.0015158178721321747, "clip_ratio/low_min": 0.00015182946481218096, "clip_ratio/region_mean": 0.002675234754860867, "epoch": 0.1058397354006615, "grad_norm": 0.10825088620185852, "learning_rate": 1e-06, "loss": 0.0004, "step": 1134 }, { "clip_ratio/high_max": 0.0023394157979055308, "clip_ratio/high_mean": 0.0010343670437578112, "clip_ratio/low_mean": 0.0016300256866088603, "clip_ratio/low_min": 0.00015745593918836676, "clip_ratio/region_mean": 0.002664392741280608, "epoch": 0.10593306850066209, "grad_norm": 0.12784479558467865, "learning_rate": 1e-06, "loss": 0.0381, "step": 1135 }, { "clip_ratio/high_max": 0.0024102604875224642, "clip_ratio/high_mean": 0.0010591411883069668, "clip_ratio/low_mean": 0.0014244805825001094, "clip_ratio/low_min": 6.143748396425508e-05, "clip_ratio/region_mean": 0.002483621734427288, "epoch": 0.10602640160066266, "grad_norm": 22.513933181762695, "learning_rate": 1e-06, "loss": 784.9518, "step": 1136 }, { "clip_ratio/high_max": 0.0025080766790779307, "clip_ratio/high_mean": 0.001036096400639508, "clip_ratio/low_mean": 0.0016372616446460597, "clip_ratio/low_min": 0.00010924560046987608, "clip_ratio/region_mean": 0.0026733580671134405, "epoch": 0.10611973470066324, "grad_norm": 0.20479612052440643, "learning_rate": 1e-06, "loss": 0.0356, "step": 1137 }, { "clip_ratio/high_max": 0.002433931964333169, "clip_ratio/high_mean": 0.001040183586155763, "clip_ratio/low_mean": 0.001634392701816978, "clip_ratio/low_min": 0.00013433927233563736, "clip_ratio/region_mean": 0.0026745763098006137, "epoch": 0.10621306780066384, "grad_norm": 0.6353483200073242, "learning_rate": 1e-06, "loss": 0.0113, "step": 1138 }, { "clip_ratio/high_max": 0.002414541973848827, "clip_ratio/high_mean": 0.0010201572677033255, "clip_ratio/low_mean": 0.0016940726636676118, "clip_ratio/low_min": 0.00011109549177490408, "clip_ratio/region_mean": 0.002714229980483651, "epoch": 0.10630640090066441, "grad_norm": 0.12792839109897614, "learning_rate": 1e-06, "loss": 0.026, "step": 1139 }, { "clip_ratio/high_max": 0.0025791230946197174, "clip_ratio/high_mean": 0.0011314996918372344, "clip_ratio/low_mean": 0.0016081087960628793, "clip_ratio/low_min": 0.0001330619943473721, "clip_ratio/region_mean": 0.0027396084915380925, "epoch": 0.10639973400066499, "grad_norm": 0.12060131132602692, "learning_rate": 1e-06, "loss": 0.0106, "step": 1140 }, { "clip_ratio/high_max": 0.00268881548254285, "clip_ratio/high_mean": 0.0010181948255194584, "clip_ratio/low_mean": 0.0015097438590601087, "clip_ratio/low_min": 0.00018763658954412676, "clip_ratio/region_mean": 0.0025279386900365353, "epoch": 0.10649306710066558, "grad_norm": 0.12730512022972107, "learning_rate": 1e-06, "loss": 0.0413, "step": 1141 }, { "clip_ratio/high_max": 0.002521760769013781, "clip_ratio/high_mean": 0.0009629589967516949, "clip_ratio/low_mean": 0.00130176794664294, "clip_ratio/low_min": 0.00010137976823898498, "clip_ratio/region_mean": 0.0022647269724984653, "epoch": 0.10658640020066616, "grad_norm": 0.11136295646429062, "learning_rate": 1e-06, "loss": 0.0203, "step": 1142 }, { "clip_ratio/high_max": 0.0024047069709922653, "clip_ratio/high_mean": 0.0010565934135229327, "clip_ratio/low_mean": 0.0015082901481946465, "clip_ratio/low_min": 0.0001308686623815447, "clip_ratio/region_mean": 0.0025648835508036427, "epoch": 0.10667973330066675, "grad_norm": 0.8497651219367981, "learning_rate": 1e-06, "loss": 0.0421, "step": 1143 }, { "clip_ratio/high_max": 0.0023329517789534293, "clip_ratio/high_mean": 0.001110437424358679, "clip_ratio/low_mean": 0.0014975852136558387, "clip_ratio/low_min": 0.0001335406723228516, "clip_ratio/region_mean": 0.0026080226962221786, "epoch": 0.10677306640066733, "grad_norm": 1.4519132375717163, "learning_rate": 1e-06, "loss": 0.0187, "step": 1144 }, { "clip_ratio/high_max": 0.0023515520224464126, "clip_ratio/high_mean": 0.0010293052000633907, "clip_ratio/low_mean": 0.0014958283209125511, "clip_ratio/low_min": 9.770324322744273e-05, "clip_ratio/region_mean": 0.0025251335100620054, "epoch": 0.10686639950066791, "grad_norm": 0.8970760703086853, "learning_rate": 1e-06, "loss": 0.0071, "step": 1145 }, { "clip_ratio/high_max": 0.002454465240589343, "clip_ratio/high_mean": 0.0010758003845694475, "clip_ratio/low_mean": 0.0013590464859589702, "clip_ratio/low_min": 0.00013524835230782628, "clip_ratio/region_mean": 0.0024348468868993223, "epoch": 0.1069597326006685, "grad_norm": 0.12648969888687134, "learning_rate": 1e-06, "loss": 0.0148, "step": 1146 }, { "clip_ratio/high_max": 0.0028738573819282465, "clip_ratio/high_mean": 0.0012112499316572212, "clip_ratio/low_mean": 0.0015169698745012283, "clip_ratio/low_min": 0.00018402168188913492, "clip_ratio/region_mean": 0.002728219798882492, "epoch": 0.10705306570066908, "grad_norm": 0.1184571161866188, "learning_rate": 1e-06, "loss": -0.0386, "step": 1147 }, { "clip_ratio/high_max": 0.0023294651982723735, "clip_ratio/high_mean": 0.0010594530995149398, "clip_ratio/low_mean": 0.0014024254342075437, "clip_ratio/low_min": 9.011303518491331e-05, "clip_ratio/region_mean": 0.0024618785027996637, "epoch": 0.10714639880066966, "grad_norm": 0.12048061192035675, "learning_rate": 1e-06, "loss": 0.0131, "step": 1148 }, { "clip_ratio/high_max": 0.0027656443417072296, "clip_ratio/high_mean": 0.0011772946272685658, "clip_ratio/low_mean": 0.0014784574959776364, "clip_ratio/low_min": 0.00010673908491298789, "clip_ratio/region_mean": 0.002655752112332266, "epoch": 0.10723973190067025, "grad_norm": 0.830747127532959, "learning_rate": 1e-06, "loss": 0.0089, "step": 1149 }, { "clip_ratio/high_max": 0.0026511835676501505, "clip_ratio/high_mean": 0.0011788259653258137, "clip_ratio/low_mean": 0.0014782018515688833, "clip_ratio/low_min": 0.00012411908028298058, "clip_ratio/region_mean": 0.0026570278423605487, "epoch": 0.10733306500067083, "grad_norm": 0.1409129649400711, "learning_rate": 1e-06, "loss": 0.0074, "step": 1150 }, { "clip_ratio/high_max": 0.002441094140522182, "clip_ratio/high_mean": 0.000984349702775944, "clip_ratio/low_mean": 0.0016933773549681064, "clip_ratio/low_min": 0.00011370659649401205, "clip_ratio/region_mean": 0.002677727010450326, "epoch": 0.10742639810067142, "grad_norm": 0.2805463373661041, "learning_rate": 1e-06, "loss": 0.0283, "step": 1151 }, { "clip_ratio/high_max": 0.0022611791573581286, "clip_ratio/high_mean": 0.0009394842272740789, "clip_ratio/low_mean": 0.0015625487103534397, "clip_ratio/low_min": 0.00027780121035902994, "clip_ratio/region_mean": 0.0025020329558174126, "epoch": 0.107519731200672, "grad_norm": 0.14808697998523712, "learning_rate": 1e-06, "loss": 0.0403, "step": 1152 }, { "clip_ratio/high_max": 0.0021056226978544146, "clip_ratio/high_mean": 0.0009584882609487977, "clip_ratio/low_mean": 0.0013512465993699152, "clip_ratio/low_min": 0.00017055282023648033, "clip_ratio/region_mean": 0.002309734816662967, "epoch": 0.10761306430067258, "grad_norm": 5.437313079833984, "learning_rate": 1e-06, "loss": 0.0179, "step": 1153 }, { "clip_ratio/high_max": 0.0022523833831655793, "clip_ratio/high_mean": 0.0009239434857590823, "clip_ratio/low_mean": 0.001556642342620762, "clip_ratio/low_min": 9.151407357421704e-05, "clip_ratio/region_mean": 0.002480585826560855, "epoch": 0.10770639740067317, "grad_norm": 14.293571472167969, "learning_rate": 1e-06, "loss": 0.0196, "step": 1154 }, { "clip_ratio/high_max": 0.002250166449812241, "clip_ratio/high_mean": 0.0009183474248857237, "clip_ratio/low_mean": 0.0015268499701051041, "clip_ratio/low_min": 0.00010505731370358262, "clip_ratio/region_mean": 0.0024451974168187007, "epoch": 0.10779973050067375, "grad_norm": 0.14646099507808685, "learning_rate": 1e-06, "loss": 0.0621, "step": 1155 }, { "clip_ratio/high_max": 0.002269261323817773, "clip_ratio/high_mean": 0.0009304488157795276, "clip_ratio/low_mean": 0.0015893066047283355, "clip_ratio/low_min": 8.157099182426464e-05, "clip_ratio/region_mean": 0.0025197554277838208, "epoch": 0.10789306360067433, "grad_norm": 0.13117019832134247, "learning_rate": 1e-06, "loss": 0.0334, "step": 1156 }, { "clip_ratio/high_max": 0.0029169384433771484, "clip_ratio/high_mean": 0.001134302481659688, "clip_ratio/low_mean": 0.0015523278343607672, "clip_ratio/low_min": 4.92832205054583e-05, "clip_ratio/region_mean": 0.002686630265088752, "epoch": 0.10798639670067492, "grad_norm": 0.12029363214969635, "learning_rate": 1e-06, "loss": 0.0229, "step": 1157 }, { "clip_ratio/high_max": 0.0027234578265051823, "clip_ratio/high_mean": 0.0011646097063930938, "clip_ratio/low_mean": 0.001637611749174539, "clip_ratio/low_min": 0.00013192225014790893, "clip_ratio/region_mean": 0.002802221497404389, "epoch": 0.1080797298006755, "grad_norm": 0.11221719533205032, "learning_rate": 1e-06, "loss": 0.0139, "step": 1158 }, { "clip_ratio/high_max": 0.002248412412882317, "clip_ratio/high_mean": 0.0009550651757308515, "clip_ratio/low_mean": 0.001644369007408386, "clip_ratio/low_min": 0.00015320246711780783, "clip_ratio/region_mean": 0.002599434163130354, "epoch": 0.10817306290067608, "grad_norm": 0.14077673852443695, "learning_rate": 1e-06, "loss": 0.0219, "step": 1159 }, { "clip_ratio/high_max": 0.0023488279548473656, "clip_ratio/high_mean": 0.0010281364166075946, "clip_ratio/low_mean": 0.001378965473122662, "clip_ratio/low_min": 0.00018107322830474004, "clip_ratio/region_mean": 0.0024071018997346982, "epoch": 0.10826639600067667, "grad_norm": 0.1234094649553299, "learning_rate": 1e-06, "loss": 0.0215, "step": 1160 }, { "clip_ratio/high_max": 0.0029186873071012087, "clip_ratio/high_mean": 0.0013197747648518998, "clip_ratio/low_mean": 0.001438031855286681, "clip_ratio/low_min": 4.919927232549526e-05, "clip_ratio/region_mean": 0.002757806665613316, "epoch": 0.10835972910067725, "grad_norm": 8.374993324279785, "learning_rate": 1e-06, "loss": 0.0106, "step": 1161 }, { "clip_ratio/high_max": 0.0026132753409910947, "clip_ratio/high_mean": 0.0011815812140412163, "clip_ratio/low_mean": 0.0014921426809451077, "clip_ratio/low_min": 8.586989042669302e-05, "clip_ratio/region_mean": 0.002673723880434409, "epoch": 0.10845306220067784, "grad_norm": 0.22813093662261963, "learning_rate": 1e-06, "loss": -0.0217, "step": 1162 }, { "clip_ratio/high_max": 0.0024783123517408967, "clip_ratio/high_mean": 0.0010287514778610785, "clip_ratio/low_mean": 0.0017533973550598603, "clip_ratio/low_min": 0.00018909599839389557, "clip_ratio/region_mean": 0.002782148789265193, "epoch": 0.10854639530067842, "grad_norm": 0.1336006224155426, "learning_rate": 1e-06, "loss": 0.0153, "step": 1163 }, { "clip_ratio/high_max": 0.002699063756153919, "clip_ratio/high_mean": 0.0011499940046633128, "clip_ratio/low_mean": 0.001743998502206523, "clip_ratio/low_min": 0.00022184449335327372, "clip_ratio/region_mean": 0.0028939924886799417, "epoch": 0.108639728400679, "grad_norm": 0.674048125743866, "learning_rate": 1e-06, "loss": 0.0541, "step": 1164 }, { "clip_ratio/high_max": 0.002533651153498795, "clip_ratio/high_mean": 0.0010122582461917773, "clip_ratio/low_mean": 0.0017590508541616146, "clip_ratio/low_min": 6.615089478145819e-05, "clip_ratio/region_mean": 0.0027713090894394554, "epoch": 0.10873306150067959, "grad_norm": 0.12195254862308502, "learning_rate": 1e-06, "loss": 0.0283, "step": 1165 }, { "clip_ratio/high_max": 0.0024692603037692606, "clip_ratio/high_mean": 0.0010916392639046535, "clip_ratio/low_mean": 0.0018298918912478257, "clip_ratio/low_min": 9.400539602211211e-05, "clip_ratio/region_mean": 0.002921531136962585, "epoch": 0.10882639460068017, "grad_norm": 2.757406234741211, "learning_rate": 1e-06, "loss": 0.0236, "step": 1166 }, { "clip_ratio/high_max": 0.0023226002595038153, "clip_ratio/high_mean": 0.0009791282209334895, "clip_ratio/low_mean": 0.0015479675093956757, "clip_ratio/low_min": 0.00011426099536038237, "clip_ratio/region_mean": 0.0025270956903113984, "epoch": 0.10891972770068074, "grad_norm": 0.11199580132961273, "learning_rate": 1e-06, "loss": 0.0307, "step": 1167 }, { "clip_ratio/high_max": 0.002611228861496784, "clip_ratio/high_mean": 0.0011405101722630206, "clip_ratio/low_mean": 0.0015856686950428411, "clip_ratio/low_min": 0.00012575715663842857, "clip_ratio/region_mean": 0.0027261788709438406, "epoch": 0.10901306080068134, "grad_norm": 0.11596285551786423, "learning_rate": 1e-06, "loss": 0.0287, "step": 1168 }, { "clip_ratio/high_max": 0.0023346685338765383, "clip_ratio/high_mean": 0.0009945332367351511, "clip_ratio/low_mean": 0.001598068334715208, "clip_ratio/low_min": 0.00036743910823133774, "clip_ratio/region_mean": 0.002592601544165518, "epoch": 0.10910639390068191, "grad_norm": 0.12039542943239212, "learning_rate": 1e-06, "loss": 0.0317, "step": 1169 }, { "clip_ratio/high_max": 0.0026123062198166735, "clip_ratio/high_mean": 0.0010124552572960965, "clip_ratio/low_mean": 0.0018334796695853584, "clip_ratio/low_min": 0.00023767389211570844, "clip_ratio/region_mean": 0.0028459349341574125, "epoch": 0.10919972700068249, "grad_norm": 0.19903850555419922, "learning_rate": 1e-06, "loss": 0.0603, "step": 1170 }, { "clip_ratio/high_max": 0.0026690758240874857, "clip_ratio/high_mean": 0.0011615412622631993, "clip_ratio/low_mean": 0.0014739277758053504, "clip_ratio/low_min": 4.657471799873747e-05, "clip_ratio/region_mean": 0.0026354690635344014, "epoch": 0.10929306010068308, "grad_norm": 0.11112910509109497, "learning_rate": 1e-06, "loss": 0.02, "step": 1171 }, { "clip_ratio/high_max": 0.002784635333227925, "clip_ratio/high_mean": 0.0011873923722305335, "clip_ratio/low_mean": 0.001487686295149615, "clip_ratio/low_min": 0.00019459550912870327, "clip_ratio/region_mean": 0.0026750786710181274, "epoch": 0.10938639320068366, "grad_norm": 3.0499300956726074, "learning_rate": 1e-06, "loss": -0.0218, "step": 1172 }, { "clip_ratio/high_max": 0.002443659381242469, "clip_ratio/high_mean": 0.0010049440934380982, "clip_ratio/low_mean": 0.0019322255539009348, "clip_ratio/low_min": 9.919327567331493e-05, "clip_ratio/region_mean": 0.002937169629149139, "epoch": 0.10947972630068425, "grad_norm": 0.2186526507139206, "learning_rate": 1e-06, "loss": 0.0215, "step": 1173 }, { "clip_ratio/high_max": 0.0025458042582613416, "clip_ratio/high_mean": 0.0010951334043056704, "clip_ratio/low_mean": 0.0019436176262388472, "clip_ratio/low_min": 0.00015485615585930645, "clip_ratio/region_mean": 0.003038751005078666, "epoch": 0.10957305940068483, "grad_norm": 0.2321746051311493, "learning_rate": 1e-06, "loss": -0.0063, "step": 1174 }, { "clip_ratio/high_max": 0.0024702243426872883, "clip_ratio/high_mean": 0.0009409493031853344, "clip_ratio/low_mean": 0.0015604922155034728, "clip_ratio/low_min": 0.00010564335389062762, "clip_ratio/region_mean": 0.0025014415368787013, "epoch": 0.10966639250068541, "grad_norm": 0.11201521754264832, "learning_rate": 1e-06, "loss": 0.0323, "step": 1175 }, { "clip_ratio/high_max": 0.002274602258694358, "clip_ratio/high_mean": 0.0009656934380473103, "clip_ratio/low_mean": 0.001773669122485444, "clip_ratio/low_min": 0.0003532913806338911, "clip_ratio/region_mean": 0.0027393625205149874, "epoch": 0.109759725600686, "grad_norm": 0.1147800087928772, "learning_rate": 1e-06, "loss": 0.0193, "step": 1176 }, { "clip_ratio/high_max": 0.002356181350478437, "clip_ratio/high_mean": 0.000986929069767939, "clip_ratio/low_mean": 0.0016318887574016117, "clip_ratio/low_min": 4.377805362310028e-05, "clip_ratio/region_mean": 0.002618817801703699, "epoch": 0.10985305870068658, "grad_norm": 0.17265908420085907, "learning_rate": 1e-06, "loss": 0.0561, "step": 1177 }, { "clip_ratio/high_max": 0.002528626428102143, "clip_ratio/high_mean": 0.0010560396876826417, "clip_ratio/low_mean": 0.0014436405144806486, "clip_ratio/low_min": 9.18391360755777e-05, "clip_ratio/region_mean": 0.002499680223991163, "epoch": 0.10994639180068716, "grad_norm": 247.41246032714844, "learning_rate": 1e-06, "loss": 243.5589, "step": 1178 }, { "clip_ratio/high_max": 0.0023474520749005023, "clip_ratio/high_mean": 0.0010316269836039282, "clip_ratio/low_mean": 0.0014443036252487218, "clip_ratio/low_min": 7.071547952364199e-05, "clip_ratio/region_mean": 0.002475930603395682, "epoch": 0.11003972490068775, "grad_norm": 0.11761941760778427, "learning_rate": 1e-06, "loss": 0.0109, "step": 1179 }, { "clip_ratio/high_max": 0.002321440464584157, "clip_ratio/high_mean": 0.0010045192957477411, "clip_ratio/low_mean": 0.0017134848749265075, "clip_ratio/low_min": 0.00018554481721366756, "clip_ratio/region_mean": 0.002718004208873026, "epoch": 0.11013305800068833, "grad_norm": 0.9480327367782593, "learning_rate": 1e-06, "loss": 0.0164, "step": 1180 }, { "clip_ratio/high_max": 0.0025491319320281036, "clip_ratio/high_mean": 0.0009353582972835284, "clip_ratio/low_mean": 0.001538360560516594, "clip_ratio/low_min": 0.00011004501175193582, "clip_ratio/region_mean": 0.0024737188578001224, "epoch": 0.11022639110068891, "grad_norm": 0.11550415307283401, "learning_rate": 1e-06, "loss": -0.0058, "step": 1181 }, { "clip_ratio/high_max": 0.0025997889970312826, "clip_ratio/high_mean": 0.0010012016045948258, "clip_ratio/low_mean": 0.00173277840258379, "clip_ratio/low_min": 0.000186779353498423, "clip_ratio/region_mean": 0.0027339800290064886, "epoch": 0.1103197242006895, "grad_norm": 55.790584564208984, "learning_rate": 1e-06, "loss": 0.0362, "step": 1182 }, { "clip_ratio/high_max": 0.002636319322846248, "clip_ratio/high_mean": 0.0010693519607229973, "clip_ratio/low_mean": 0.0015868947812123224, "clip_ratio/low_min": 0.00014547255068464437, "clip_ratio/region_mean": 0.002656246710103005, "epoch": 0.11041305730069008, "grad_norm": 0.11102475225925446, "learning_rate": 1e-06, "loss": 0.0404, "step": 1183 }, { "clip_ratio/high_max": 0.00252573199395556, "clip_ratio/high_mean": 0.0010742717095126864, "clip_ratio/low_mean": 0.001395404771756148, "clip_ratio/low_min": 5.633953605865827e-05, "clip_ratio/region_mean": 0.0024696764958207496, "epoch": 0.11050639040069067, "grad_norm": 0.10893546789884567, "learning_rate": 1e-06, "loss": 0.012, "step": 1184 }, { "clip_ratio/high_max": 0.003032694097782951, "clip_ratio/high_mean": 0.0013811760873068124, "clip_ratio/low_mean": 0.001046356399456272, "clip_ratio/low_min": 4.110179088456789e-05, "clip_ratio/region_mean": 0.002427532417641487, "completions/clipped_ratio": 0.013044084821428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 4091.0, "completions/mean_length": 604.4788818359375, "completions/mean_terminated_length": 558.3333129882812, "completions/min_length": 3.0, "completions/min_terminated_length": 3.0, "epoch": 0.11059972350069125, "grad_norm": 794392068096.0, "learning_rate": 1e-06, "loss": 1065595568128.0, "num_tokens": 893554330.0, "reward": 0.5915091633796692, "reward_std": 0.19118820130825043, "rewards/simpleverify_reward/mean": 0.5915091633796692, "rewards/simpleverify_reward/std": 0.491556853055954, "step": 1185 }, { "clip_ratio/high_max": 0.0032812982390169054, "clip_ratio/high_mean": 0.0013259194674901664, "clip_ratio/low_mean": 0.0010756525280157803, "clip_ratio/low_min": 6.813856543885777e-05, "clip_ratio/region_mean": 0.002401571946393233, "epoch": 0.11069305660069183, "grad_norm": 2032945.0, "learning_rate": 1e-06, "loss": 303.6327, "step": 1186 }, { "clip_ratio/high_max": 0.002685646490135696, "clip_ratio/high_mean": 0.0011909100794582628, "clip_ratio/low_mean": 0.0012029447498207446, "clip_ratio/low_min": 8.945473655330716e-05, "clip_ratio/region_mean": 0.0023938547674333677, "epoch": 0.11078638970069242, "grad_norm": 77396104.0, "learning_rate": 1e-06, "loss": 40551.8242, "step": 1187 }, { "clip_ratio/high_max": 0.0024277561824419536, "clip_ratio/high_mean": 0.0011242178916290868, "clip_ratio/low_mean": 0.0010116304983966984, "clip_ratio/low_min": 5.449965647130739e-05, "clip_ratio/region_mean": 0.0021358483427320607, "epoch": 0.110879722800693, "grad_norm": 6430587.0, "learning_rate": 1e-06, "loss": 8795.7891, "step": 1188 }, { "clip_ratio/high_max": 0.0028360644246276934, "clip_ratio/high_mean": 0.001222999006131431, "clip_ratio/low_mean": 0.0010287557015544735, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0022517547040479258, "epoch": 0.11097305590069358, "grad_norm": 3894154.0, "learning_rate": 1e-06, "loss": 432.3489, "step": 1189 }, { "clip_ratio/high_max": 0.003117634456430096, "clip_ratio/high_mean": 0.0013883083629480097, "clip_ratio/low_mean": 0.0010426388907944784, "clip_ratio/low_min": 4.074778553331271e-05, "clip_ratio/region_mean": 0.0024309473737957887, "epoch": 0.11106638900069417, "grad_norm": 4270651.5, "learning_rate": 1e-06, "loss": 5669.4448, "step": 1190 }, { "clip_ratio/high_max": 0.0026514838682487607, "clip_ratio/high_mean": 0.0010602464626572328, "clip_ratio/low_mean": 0.001156151502073044, "clip_ratio/low_min": 9.802377280720975e-05, "clip_ratio/region_mean": 0.0022163979811011814, "epoch": 0.11115972210069475, "grad_norm": 2222.460205078125, "learning_rate": 1e-06, "loss": 236.2243, "step": 1191 }, { "clip_ratio/high_max": 0.0029485541163012385, "clip_ratio/high_mean": 0.0012724938824248966, "clip_ratio/low_mean": 0.0013198232336435467, "clip_ratio/low_min": 4.522431117948145e-05, "clip_ratio/region_mean": 0.0025923170760506764, "epoch": 0.11125305520069532, "grad_norm": 327.8789978027344, "learning_rate": 1e-06, "loss": 1.0671, "step": 1192 }, { "clip_ratio/high_max": 0.002309681902261218, "clip_ratio/high_mean": 0.0009305920048063854, "clip_ratio/low_mean": 0.0011323188082315028, "clip_ratio/low_min": 0.00024599146127002314, "clip_ratio/region_mean": 0.002062910840322729, "epoch": 0.11134638830069592, "grad_norm": 217.44329833984375, "learning_rate": 1e-06, "loss": 0.0963, "step": 1193 }, { "clip_ratio/high_max": 0.0024939299255493097, "clip_ratio/high_mean": 0.0011097246351710055, "clip_ratio/low_mean": 0.0010291857288393658, "clip_ratio/low_min": 4.5923356083221734e-05, "clip_ratio/region_mean": 0.002138910313078668, "epoch": 0.1114397214006965, "grad_norm": 800.3222045898438, "learning_rate": 1e-06, "loss": 0.2334, "step": 1194 }, { "clip_ratio/high_max": 0.0032204855378950015, "clip_ratio/high_mean": 0.0013059646298643202, "clip_ratio/low_mean": 0.001289039839321049, "clip_ratio/low_min": 5.4787949920864776e-05, "clip_ratio/region_mean": 0.002595004450995475, "epoch": 0.11153305450069709, "grad_norm": 2.8990097045898438, "learning_rate": 1e-06, "loss": 0.0561, "step": 1195 }, { "clip_ratio/high_max": 0.0029867501434637234, "clip_ratio/high_mean": 0.0013275199889903888, "clip_ratio/low_mean": 0.0010883847389777657, "clip_ratio/low_min": 6.401405698852614e-05, "clip_ratio/region_mean": 0.0024159047607099637, "epoch": 0.11162638760069767, "grad_norm": 1731.1109619140625, "learning_rate": 1e-06, "loss": 1063.0155, "step": 1196 }, { "clip_ratio/high_max": 0.0028330145578365773, "clip_ratio/high_mean": 0.001162853674031794, "clip_ratio/low_mean": 0.0012446948276192416, "clip_ratio/low_min": 7.604400161653757e-05, "clip_ratio/region_mean": 0.0024075484689092264, "epoch": 0.11171972070069824, "grad_norm": 601.0494995117188, "learning_rate": 1e-06, "loss": 0.0909, "step": 1197 }, { "clip_ratio/high_max": 0.002584543191915145, "clip_ratio/high_mean": 0.0011109060142189264, "clip_ratio/low_mean": 0.001239766108483309, "clip_ratio/low_min": 8.229774539358914e-05, "clip_ratio/region_mean": 0.002350672111788299, "epoch": 0.11181305380069884, "grad_norm": 89585.140625, "learning_rate": 1e-06, "loss": 69.6457, "step": 1198 }, { "clip_ratio/high_max": 0.002952156399260275, "clip_ratio/high_mean": 0.0012270632905710954, "clip_ratio/low_mean": 0.0013625667270389386, "clip_ratio/low_min": 0.00014371296310855541, "clip_ratio/region_mean": 0.0025896300285239704, "epoch": 0.11190638690069941, "grad_norm": 2.4704442024230957, "learning_rate": 1e-06, "loss": 0.0441, "step": 1199 }, { "clip_ratio/high_max": 0.0029167511602281593, "clip_ratio/high_mean": 0.0012493978829297703, "clip_ratio/low_mean": 0.001405919494573027, "clip_ratio/low_min": 7.225741501315497e-05, "clip_ratio/region_mean": 0.002655317359312903, "epoch": 0.11199972000069999, "grad_norm": 580.558349609375, "learning_rate": 1e-06, "loss": 0.1047, "step": 1200 }, { "clip_ratio/high_max": 0.0026779835316119716, "clip_ratio/high_mean": 0.001147859096818138, "clip_ratio/low_mean": 0.001546420418890193, "clip_ratio/low_min": 0.00015940708726702724, "clip_ratio/region_mean": 0.0026942794647766277, "epoch": 0.11209305310070058, "grad_norm": 9.425857543945312, "learning_rate": 1e-06, "loss": 0.1292, "step": 1201 }, { "clip_ratio/high_max": 0.003031703694432508, "clip_ratio/high_mean": 0.0013717358815483749, "clip_ratio/low_mean": 0.001517954693554202, "clip_ratio/low_min": 0.00013567385030910373, "clip_ratio/region_mean": 0.0028896905860165134, "epoch": 0.11218638620070116, "grad_norm": 104.549072265625, "learning_rate": 1e-06, "loss": 0.0702, "step": 1202 }, { "clip_ratio/high_max": 0.003224461841455195, "clip_ratio/high_mean": 0.0012773208945873193, "clip_ratio/low_mean": 0.0015749740377941635, "clip_ratio/low_min": 0.00017195318105223123, "clip_ratio/region_mean": 0.0028522949141915888, "epoch": 0.11227971930070174, "grad_norm": 1.1381465196609497, "learning_rate": 1e-06, "loss": 0.0165, "step": 1203 }, { "clip_ratio/high_max": 0.0031134786622715183, "clip_ratio/high_mean": 0.0012439658348739613, "clip_ratio/low_mean": 0.0015783280177856795, "clip_ratio/low_min": 6.882951220177347e-05, "clip_ratio/region_mean": 0.0028222939508850686, "epoch": 0.11237305240070233, "grad_norm": 7.47293758392334, "learning_rate": 1e-06, "loss": 0.0573, "step": 1204 }, { "clip_ratio/high_max": 0.0028630380911636166, "clip_ratio/high_mean": 0.0011970900704909582, "clip_ratio/low_mean": 0.0016848786908667535, "clip_ratio/low_min": 5.800292728963541e-05, "clip_ratio/region_mean": 0.0028819687795476057, "epoch": 0.11246638550070291, "grad_norm": 0.29964470863342285, "learning_rate": 1e-06, "loss": 0.0151, "step": 1205 }, { "clip_ratio/high_max": 0.003358416368428152, "clip_ratio/high_mean": 0.0014295887103799032, "clip_ratio/low_mean": 0.0014843653607385932, "clip_ratio/low_min": 0.00015880863793427125, "clip_ratio/region_mean": 0.0029139541293261573, "epoch": 0.1125597186007035, "grad_norm": 0.6256915330886841, "learning_rate": 1e-06, "loss": -0.0033, "step": 1206 }, { "clip_ratio/high_max": 0.002788866746413987, "clip_ratio/high_mean": 0.001101296664273832, "clip_ratio/low_mean": 0.0016295774366881233, "clip_ratio/low_min": 9.108985341299558e-05, "clip_ratio/region_mean": 0.0027308740536682308, "epoch": 0.11265305170070408, "grad_norm": 1.931517243385315, "learning_rate": 1e-06, "loss": 0.0763, "step": 1207 }, { "clip_ratio/high_max": 0.002541311907407362, "clip_ratio/high_mean": 0.0011258821505180094, "clip_ratio/low_mean": 0.0019550233337213285, "clip_ratio/low_min": 0.00018722818276728503, "clip_ratio/region_mean": 0.0030809054005658254, "epoch": 0.11274638480070466, "grad_norm": 2452.421142578125, "learning_rate": 1e-06, "loss": 0.5394, "step": 1208 }, { "clip_ratio/high_max": 0.002967640779388603, "clip_ratio/high_mean": 0.0012073879479430616, "clip_ratio/low_mean": 0.001830215347581543, "clip_ratio/low_min": 0.0001834741042330279, "clip_ratio/region_mean": 0.0030376032955246046, "epoch": 0.11283971790070525, "grad_norm": 6.742492198944092, "learning_rate": 1e-06, "loss": 0.2086, "step": 1209 }, { "clip_ratio/high_max": 0.0027895457387785427, "clip_ratio/high_mean": 0.0011781330922531197, "clip_ratio/low_mean": 0.0018373019047430716, "clip_ratio/low_min": 0.00020010494881717023, "clip_ratio/region_mean": 0.003015434922417626, "epoch": 0.11293305100070583, "grad_norm": 0.23429690301418304, "learning_rate": 1e-06, "loss": 0.0299, "step": 1210 }, { "clip_ratio/high_max": 0.002603128130431287, "clip_ratio/high_mean": 0.0011555631463124882, "clip_ratio/low_mean": 0.0019232652994105592, "clip_ratio/low_min": 0.00020352118372102268, "clip_ratio/region_mean": 0.003078828420257196, "epoch": 0.11302638410070641, "grad_norm": 0.1572112739086151, "learning_rate": 1e-06, "loss": 0.0199, "step": 1211 }, { "clip_ratio/high_max": 0.003294277295935899, "clip_ratio/high_mean": 0.0013924883678555489, "clip_ratio/low_mean": 0.0015115123060240876, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0029040006920695305, "epoch": 0.113119717200707, "grad_norm": 4.687248229980469, "learning_rate": 1e-06, "loss": 0.0358, "step": 1212 }, { "clip_ratio/high_max": 0.0028070095140719786, "clip_ratio/high_mean": 0.0012718518046312965, "clip_ratio/low_mean": 0.0018819677607098129, "clip_ratio/low_min": 0.00026105763572559226, "clip_ratio/region_mean": 0.003153819649014622, "epoch": 0.11321305030070758, "grad_norm": 1.0149513483047485, "learning_rate": 1e-06, "loss": 0.0415, "step": 1213 }, { "clip_ratio/high_max": 0.0034492992417654023, "clip_ratio/high_mean": 0.0013732083716604393, "clip_ratio/low_mean": 0.0018147242990380619, "clip_ratio/low_min": 0.00027785704151028767, "clip_ratio/region_mean": 0.003187932656146586, "epoch": 0.11330638340070817, "grad_norm": 8.762857437133789, "learning_rate": 1e-06, "loss": -0.0046, "step": 1214 }, { "clip_ratio/high_max": 0.002777370056719519, "clip_ratio/high_mean": 0.0012619030858331826, "clip_ratio/low_mean": 0.0018029679922619835, "clip_ratio/low_min": 8.60692316564382e-05, "clip_ratio/region_mean": 0.003064871081733145, "epoch": 0.11339971650070875, "grad_norm": 0.15594303607940674, "learning_rate": 1e-06, "loss": -0.0286, "step": 1215 }, { "clip_ratio/high_max": 0.0024864997976692393, "clip_ratio/high_mean": 0.0009758406158653088, "clip_ratio/low_mean": 0.0018266438783030026, "clip_ratio/low_min": 8.593178699811688e-05, "clip_ratio/region_mean": 0.0028024846033076756, "epoch": 0.11349304960070933, "grad_norm": 4.194972991943359, "learning_rate": 1e-06, "loss": 0.0588, "step": 1216 }, { "clip_ratio/high_max": 0.0029669780415133573, "clip_ratio/high_mean": 0.0012851457213400863, "clip_ratio/low_mean": 0.001996494484046707, "clip_ratio/low_min": 0.00012038096610922366, "clip_ratio/region_mean": 0.003281640281784348, "epoch": 0.11358638270070992, "grad_norm": 0.7980758547782898, "learning_rate": 1e-06, "loss": 0.0069, "step": 1217 }, { "clip_ratio/high_max": 0.0027649380899674725, "clip_ratio/high_mean": 0.0011233926106797298, "clip_ratio/low_mean": 0.0016570268744544592, "clip_ratio/low_min": 0.00018395105144008994, "clip_ratio/region_mean": 0.002780419592454564, "epoch": 0.1136797158007105, "grad_norm": 0.1547294557094574, "learning_rate": 1e-06, "loss": 0.0276, "step": 1218 }, { "clip_ratio/high_max": 0.002502340270439163, "clip_ratio/high_mean": 0.0011344733393343631, "clip_ratio/low_mean": 0.0019340486323926598, "clip_ratio/low_min": 0.00020018002032884397, "clip_ratio/region_mean": 0.003068522048124578, "epoch": 0.11377304890071108, "grad_norm": 266490.875, "learning_rate": 1e-06, "loss": 31.2843, "step": 1219 }, { "clip_ratio/high_max": 0.0029546780351665802, "clip_ratio/high_mean": 0.0012561836520035286, "clip_ratio/low_mean": 0.0016470184818899725, "clip_ratio/low_min": 0.000241303585426067, "clip_ratio/region_mean": 0.002903202206653077, "epoch": 0.11386638200071167, "grad_norm": 0.195596843957901, "learning_rate": 1e-06, "loss": -0.0216, "step": 1220 }, { "clip_ratio/high_max": 0.0028919655014760792, "clip_ratio/high_mean": 0.0012267936526768608, "clip_ratio/low_mean": 0.0018526626517996192, "clip_ratio/low_min": 5.674742396877264e-05, "clip_ratio/region_mean": 0.0030794562844675966, "epoch": 0.11395971510071225, "grad_norm": 0.1119137704372406, "learning_rate": 1e-06, "loss": 0.0675, "step": 1221 }, { "clip_ratio/high_max": 0.002733828870987054, "clip_ratio/high_mean": 0.0010845898495972506, "clip_ratio/low_mean": 0.0019417991970840376, "clip_ratio/low_min": 0.00024353000480914488, "clip_ratio/region_mean": 0.0030263890730566345, "epoch": 0.11405304820071283, "grad_norm": 0.6785203218460083, "learning_rate": 1e-06, "loss": 0.0422, "step": 1222 }, { "clip_ratio/high_max": 0.002741071773925796, "clip_ratio/high_mean": 0.0011416804954933468, "clip_ratio/low_mean": 0.001983925401873421, "clip_ratio/low_min": 0.0002812875245581381, "clip_ratio/region_mean": 0.003125605915556662, "epoch": 0.11414638130071342, "grad_norm": 0.32578977942466736, "learning_rate": 1e-06, "loss": 0.0526, "step": 1223 }, { "clip_ratio/high_max": 0.0035384619040996768, "clip_ratio/high_mean": 0.0013636497569677886, "clip_ratio/low_mean": 0.0017791824466257822, "clip_ratio/low_min": 0.0002631363458931446, "clip_ratio/region_mean": 0.003142832181765698, "epoch": 0.114239714400714, "grad_norm": 0.14653603732585907, "learning_rate": 1e-06, "loss": 0.0326, "step": 1224 }, { "clip_ratio/high_max": 0.002709354965190869, "clip_ratio/high_mean": 0.0011962852968281368, "clip_ratio/low_mean": 0.0018557922485342715, "clip_ratio/low_min": 0.00024172243047360098, "clip_ratio/region_mean": 0.003052077525353525, "epoch": 0.11433304750071459, "grad_norm": 1.0941698551177979, "learning_rate": 1e-06, "loss": 5.5556, "step": 1225 }, { "clip_ratio/high_max": 0.0028288704343140125, "clip_ratio/high_mean": 0.0011237307444389444, "clip_ratio/low_mean": 0.002013173470913898, "clip_ratio/low_min": 0.0002691760892048478, "clip_ratio/region_mean": 0.003136904226266779, "epoch": 0.11442638060071517, "grad_norm": 1520.499267578125, "learning_rate": 1e-06, "loss": 1.6141, "step": 1226 }, { "clip_ratio/high_max": 0.002226488009910099, "clip_ratio/high_mean": 0.00095945191969804, "clip_ratio/low_mean": 0.002001902314077597, "clip_ratio/low_min": 0.00026526784131419845, "clip_ratio/region_mean": 0.0029613541773869656, "epoch": 0.11451971370071574, "grad_norm": 3.255891799926758, "learning_rate": 1e-06, "loss": 0.0482, "step": 1227 }, { "clip_ratio/high_max": 0.0031671427495894022, "clip_ratio/high_mean": 0.0014287199155660346, "clip_ratio/low_mean": 0.0016771097471064422, "clip_ratio/low_min": 0.00014281956828199327, "clip_ratio/region_mean": 0.003105829659034498, "epoch": 0.11461304680071634, "grad_norm": 0.11460442841053009, "learning_rate": 1e-06, "loss": -0.0035, "step": 1228 }, { "clip_ratio/high_max": 0.002764810502412729, "clip_ratio/high_mean": 0.0011847493951790966, "clip_ratio/low_mean": 0.0020044864068040624, "clip_ratio/low_min": 0.00020119971668464132, "clip_ratio/region_mean": 0.003189235765603371, "epoch": 0.11470637990071691, "grad_norm": 0.15332002937793732, "learning_rate": 1e-06, "loss": 0.0714, "step": 1229 }, { "clip_ratio/high_max": 0.00315444352600025, "clip_ratio/high_mean": 0.0013098239651299082, "clip_ratio/low_mean": 0.0016207654771278612, "clip_ratio/low_min": 0.00010815789119078545, "clip_ratio/region_mean": 0.002930589391326066, "epoch": 0.1147997130007175, "grad_norm": 0.11037442833185196, "learning_rate": 1e-06, "loss": -0.0023, "step": 1230 }, { "clip_ratio/high_max": 0.003091499711445067, "clip_ratio/high_mean": 0.0013358835076360265, "clip_ratio/low_mean": 0.0020024567857035436, "clip_ratio/low_min": 0.00018932899001811165, "clip_ratio/region_mean": 0.0033383403351763263, "epoch": 0.11489304610071809, "grad_norm": 1.1134250164031982, "learning_rate": 1e-06, "loss": 0.004, "step": 1231 }, { "clip_ratio/high_max": 0.0023337664242717437, "clip_ratio/high_mean": 0.0009820510331337573, "clip_ratio/low_mean": 0.0021878711122553796, "clip_ratio/low_min": 0.0002635634618854965, "clip_ratio/region_mean": 0.0031699221290182322, "epoch": 0.11498637920071866, "grad_norm": 626.478759765625, "learning_rate": 1e-06, "loss": 0.5195, "step": 1232 }, { "clip_ratio/high_max": 0.0027651966956909746, "clip_ratio/high_mean": 0.0011395619476388674, "clip_ratio/low_mean": 0.0018406770886940649, "clip_ratio/low_min": 0.00014595335323974723, "clip_ratio/region_mean": 0.002980238990858197, "epoch": 0.11507971230071924, "grad_norm": 0.1234096959233284, "learning_rate": 1e-06, "loss": 0.0543, "step": 1233 }, { "clip_ratio/high_max": 0.002956455748062581, "clip_ratio/high_mean": 0.0011226865062781144, "clip_ratio/low_mean": 0.0018596818190417252, "clip_ratio/low_min": 0.00014584953714802396, "clip_ratio/region_mean": 0.002982368408993352, "epoch": 0.11517304540071983, "grad_norm": 0.13140106201171875, "learning_rate": 1e-06, "loss": 0.0405, "step": 1234 }, { "clip_ratio/high_max": 0.002669380854058545, "clip_ratio/high_mean": 0.0010818442369782133, "clip_ratio/low_mean": 0.0020418209533090703, "clip_ratio/low_min": 0.0001974732422240777, "clip_ratio/region_mean": 0.003123665228486061, "epoch": 0.11526637850072041, "grad_norm": 0.12612858414649963, "learning_rate": 1e-06, "loss": 0.0645, "step": 1235 }, { "clip_ratio/high_max": 0.003160827051033266, "clip_ratio/high_mean": 0.001297491959121544, "clip_ratio/low_mean": 0.0016278140356007498, "clip_ratio/low_min": 8.797007103567012e-05, "clip_ratio/region_mean": 0.0029253060347400606, "epoch": 0.115359711600721, "grad_norm": 0.17970788478851318, "learning_rate": 1e-06, "loss": -0.0061, "step": 1236 }, { "clip_ratio/high_max": 0.0022971027792664245, "clip_ratio/high_mean": 0.0009947096477844752, "clip_ratio/low_mean": 0.0016389978118240833, "clip_ratio/low_min": 0.00014986932364990935, "clip_ratio/region_mean": 0.002633707510540262, "epoch": 0.11545304470072158, "grad_norm": 0.1073864996433258, "learning_rate": 1e-06, "loss": 0.0153, "step": 1237 }, { "clip_ratio/high_max": 0.002956458651169669, "clip_ratio/high_mean": 0.001118570304242894, "clip_ratio/low_mean": 0.0018358497618464753, "clip_ratio/low_min": 0.00013398986084212083, "clip_ratio/region_mean": 0.0029544200078817084, "epoch": 0.11554637780072216, "grad_norm": 76.84618377685547, "learning_rate": 1e-06, "loss": 0.0255, "step": 1238 }, { "clip_ratio/high_max": 0.0028000598831567913, "clip_ratio/high_mean": 0.001073163228284102, "clip_ratio/low_mean": 0.0020497106452239677, "clip_ratio/low_min": 0.00020179963303235127, "clip_ratio/region_mean": 0.0031228738589561544, "epoch": 0.11563971090072275, "grad_norm": 0.4443994462490082, "learning_rate": 1e-06, "loss": 0.0433, "step": 1239 }, { "clip_ratio/high_max": 0.0021829877223353833, "clip_ratio/high_mean": 0.0009795482510526199, "clip_ratio/low_mean": 0.0017536543673486449, "clip_ratio/low_min": 0.000196994006728346, "clip_ratio/region_mean": 0.002733202614763286, "epoch": 0.11573304400072333, "grad_norm": 0.11739692091941833, "learning_rate": 1e-06, "loss": 0.0385, "step": 1240 }, { "clip_ratio/high_max": 0.0029265650882734917, "clip_ratio/high_mean": 0.0011322862810629886, "clip_ratio/low_mean": 0.001775075688783545, "clip_ratio/low_min": 0.00013829570707457606, "clip_ratio/region_mean": 0.002907361908000894, "epoch": 0.11582637710072391, "grad_norm": 43.0748176574707, "learning_rate": 1e-06, "loss": 0.0339, "step": 1241 }, { "clip_ratio/high_max": 0.0024362038748222403, "clip_ratio/high_mean": 0.0009572764047334203, "clip_ratio/low_mean": 0.0017483118353993632, "clip_ratio/low_min": 8.527196223440114e-05, "clip_ratio/region_mean": 0.0027055882674176246, "epoch": 0.1159197102007245, "grad_norm": 0.29234376549720764, "learning_rate": 1e-06, "loss": 0.1213, "step": 1242 }, { "clip_ratio/high_max": 0.0024204413894040044, "clip_ratio/high_mean": 0.0010568282395979622, "clip_ratio/low_mean": 0.001684581788140349, "clip_ratio/low_min": 0.0003453019216976827, "clip_ratio/region_mean": 0.0027414099968154915, "epoch": 0.11601304330072508, "grad_norm": 92.03839111328125, "learning_rate": 1e-06, "loss": 0.0752, "step": 1243 }, { "clip_ratio/high_max": 0.002403621554549318, "clip_ratio/high_mean": 0.0011329416338412557, "clip_ratio/low_mean": 0.0017711455147946253, "clip_ratio/low_min": 0.00014479933633992914, "clip_ratio/region_mean": 0.0029040871013421565, "epoch": 0.11610637640072566, "grad_norm": 1.886216640472412, "learning_rate": 1e-06, "loss": 0.0626, "step": 1244 }, { "clip_ratio/high_max": 0.002889584626245778, "clip_ratio/high_mean": 0.0011757361171476077, "clip_ratio/low_mean": 0.001774308351741638, "clip_ratio/low_min": 0.00012336359213804826, "clip_ratio/region_mean": 0.002950044443423394, "epoch": 0.11619970950072625, "grad_norm": 0.6351476907730103, "learning_rate": 1e-06, "loss": 0.0153, "step": 1245 }, { "clip_ratio/high_max": 0.002820053523464594, "clip_ratio/high_mean": 0.0011566770117497072, "clip_ratio/low_mean": 0.0019905082444893196, "clip_ratio/low_min": 0.00028400716291798744, "clip_ratio/region_mean": 0.0031471852853428572, "epoch": 0.11629304260072683, "grad_norm": 0.5719003081321716, "learning_rate": 1e-06, "loss": 0.0116, "step": 1246 }, { "clip_ratio/high_max": 0.002817532527842559, "clip_ratio/high_mean": 0.0011545338165888097, "clip_ratio/low_mean": 0.0019023437853320502, "clip_ratio/low_min": 0.00014981811273173662, "clip_ratio/region_mean": 0.0030568776855943725, "epoch": 0.11638637570072742, "grad_norm": 0.11359849572181702, "learning_rate": 1e-06, "loss": 0.0165, "step": 1247 }, { "clip_ratio/high_max": 0.0026225447363685817, "clip_ratio/high_mean": 0.001107781334212632, "clip_ratio/low_mean": 0.001633396495890338, "clip_ratio/low_min": 0.00021244072922854684, "clip_ratio/region_mean": 0.0027411778428358957, "epoch": 0.116479708800728, "grad_norm": 0.11829929053783417, "learning_rate": 1e-06, "loss": 0.0028, "step": 1248 }, { "clip_ratio/high_max": 0.0027807414953713305, "clip_ratio/high_mean": 0.0011598324490478262, "clip_ratio/low_mean": 0.001705436130578164, "clip_ratio/low_min": 0.0001993938221858116, "clip_ratio/region_mean": 0.0028652685723500326, "epoch": 0.11657304190072858, "grad_norm": 0.3348468244075775, "learning_rate": 1e-06, "loss": 0.0175, "step": 1249 }, { "clip_ratio/high_max": 0.002594205812783912, "clip_ratio/high_mean": 0.001149193682067562, "clip_ratio/low_mean": 0.0017035869095707312, "clip_ratio/low_min": 8.415698175667785e-05, "clip_ratio/region_mean": 0.00285278054070659, "epoch": 0.11666637500072917, "grad_norm": 15.92387580871582, "learning_rate": 1e-06, "loss": 0.0123, "step": 1250 }, { "clip_ratio/high_max": 0.0024011996501940303, "clip_ratio/high_mean": 0.0010468473738001194, "clip_ratio/low_mean": 0.0015372876805486158, "clip_ratio/low_min": 0.0001214348712892388, "clip_ratio/region_mean": 0.002584135094366502, "epoch": 0.11675970810072975, "grad_norm": 25.111967086791992, "learning_rate": 1e-06, "loss": 0.0358, "step": 1251 }, { "clip_ratio/high_max": 0.0033022605639416724, "clip_ratio/high_mean": 0.001250336856173817, "clip_ratio/low_mean": 0.0015794538339832798, "clip_ratio/low_min": 5.6344608310610056e-05, "clip_ratio/region_mean": 0.0028297906828811392, "epoch": 0.11685304120073033, "grad_norm": 0.4591279625892639, "learning_rate": 1e-06, "loss": -0.0271, "step": 1252 }, { "clip_ratio/high_max": 0.0020914003944199067, "clip_ratio/high_mean": 0.0009098018454096746, "clip_ratio/low_mean": 0.0019995495676994324, "clip_ratio/low_min": 0.00031711560859548626, "clip_ratio/region_mean": 0.0029093513876432553, "epoch": 0.11694637430073092, "grad_norm": 5.456788539886475, "learning_rate": 1e-06, "loss": 0.0775, "step": 1253 }, { "clip_ratio/high_max": 0.002621678475406952, "clip_ratio/high_mean": 0.0011481400797492824, "clip_ratio/low_mean": 0.0016896303386602085, "clip_ratio/low_min": 0.00021568697684415383, "clip_ratio/region_mean": 0.002837770342011936, "epoch": 0.1170397074007315, "grad_norm": 7443535.5, "learning_rate": 1e-06, "loss": 1525166.375, "step": 1254 }, { "clip_ratio/high_max": 0.002634889773617033, "clip_ratio/high_mean": 0.0011017769793397747, "clip_ratio/low_mean": 0.0015114700549929694, "clip_ratio/low_min": 0.00014962339810153935, "clip_ratio/region_mean": 0.0026132470375159755, "epoch": 0.11713304050073207, "grad_norm": 0.16583870351314545, "learning_rate": 1e-06, "loss": 0.0421, "step": 1255 }, { "clip_ratio/high_max": 0.0026016592673840933, "clip_ratio/high_mean": 0.0011186912852281239, "clip_ratio/low_mean": 0.0014728064015798736, "clip_ratio/low_min": 0.0001130934469983913, "clip_ratio/region_mean": 0.0025914976722560823, "epoch": 0.11722637360073267, "grad_norm": 1.7303484678268433, "learning_rate": 1e-06, "loss": -0.0225, "step": 1256 }, { "clip_ratio/high_max": 0.0025716617528814822, "clip_ratio/high_mean": 0.001122258461691672, "clip_ratio/low_mean": 0.001627819801797159, "clip_ratio/low_min": 0.00011881568843818968, "clip_ratio/region_mean": 0.0027500782744027674, "epoch": 0.11731970670073325, "grad_norm": 0.573452353477478, "learning_rate": 1e-06, "loss": 0.0103, "step": 1257 }, { "clip_ratio/high_max": 0.0029006407348788343, "clip_ratio/high_mean": 0.0011830037237814395, "clip_ratio/low_mean": 0.0017434575129300356, "clip_ratio/low_min": 0.00024209160437749233, "clip_ratio/region_mean": 0.002926461245806422, "epoch": 0.11741303980073384, "grad_norm": 18.35322380065918, "learning_rate": 1e-06, "loss": 11.4306, "step": 1258 }, { "clip_ratio/high_max": 0.0023429351203958504, "clip_ratio/high_mean": 0.00099605746618181, "clip_ratio/low_mean": 0.001709912387013901, "clip_ratio/low_min": 0.0001604028616384312, "clip_ratio/region_mean": 0.0027059698186349124, "epoch": 0.11750637290073442, "grad_norm": 0.39945757389068604, "learning_rate": 1e-06, "loss": 0.0445, "step": 1259 }, { "clip_ratio/high_max": 0.0025692359886306804, "clip_ratio/high_mean": 0.001092638362024445, "clip_ratio/low_mean": 0.0017810750105127227, "clip_ratio/low_min": 0.0001803276845748769, "clip_ratio/region_mean": 0.002873713332519401, "epoch": 0.117599706000735, "grad_norm": 21.694705963134766, "learning_rate": 1e-06, "loss": 0.0239, "step": 1260 }, { "clip_ratio/high_max": 0.0027784700359916314, "clip_ratio/high_mean": 0.0011262063544563716, "clip_ratio/low_mean": 0.0016389257361879572, "clip_ratio/low_min": 0.00010603273585729767, "clip_ratio/region_mean": 0.002765132063359488, "epoch": 0.11769303910073559, "grad_norm": 1.8864012956619263, "learning_rate": 1e-06, "loss": 0.0254, "step": 1261 }, { "clip_ratio/high_max": 0.0031021242612041533, "clip_ratio/high_mean": 0.0012276673442102037, "clip_ratio/low_mean": 0.0018145157046092208, "clip_ratio/low_min": 0.0003790237396970042, "clip_ratio/region_mean": 0.0030421830160776153, "epoch": 0.11778637220073616, "grad_norm": 0.18558058142662048, "learning_rate": 1e-06, "loss": 0.0076, "step": 1262 }, { "clip_ratio/high_max": 0.0024200998377637006, "clip_ratio/high_mean": 0.0010604286871966906, "clip_ratio/low_mean": 0.002044226937869098, "clip_ratio/low_min": 0.00015289765360648744, "clip_ratio/region_mean": 0.0031046556250657886, "epoch": 0.11787970530073674, "grad_norm": 1.6986167430877686, "learning_rate": 1e-06, "loss": 0.0399, "step": 1263 }, { "clip_ratio/high_max": 0.002635616561747156, "clip_ratio/high_mean": 0.001049485228577396, "clip_ratio/low_mean": 0.0017352894938085228, "clip_ratio/low_min": 7.518211896240246e-05, "clip_ratio/region_mean": 0.0027847747114719823, "epoch": 0.11797303840073733, "grad_norm": 1077.0032958984375, "learning_rate": 1e-06, "loss": 0.3703, "step": 1264 }, { "clip_ratio/high_max": 0.002858298466890119, "clip_ratio/high_mean": 0.0011186864867340773, "clip_ratio/low_mean": 0.001685362462012563, "clip_ratio/low_min": 6.833515453763539e-05, "clip_ratio/region_mean": 0.0028040489996783435, "epoch": 0.11806637150073791, "grad_norm": 0.440521776676178, "learning_rate": 1e-06, "loss": 0.0339, "step": 1265 }, { "clip_ratio/high_max": 0.0024358613081858493, "clip_ratio/high_mean": 0.0010616764047881588, "clip_ratio/low_mean": 0.0016246877639787272, "clip_ratio/low_min": 0.00015998480739654042, "clip_ratio/region_mean": 0.0026863640887313522, "epoch": 0.1181597046007385, "grad_norm": 8.292718887329102, "learning_rate": 1e-06, "loss": 651214.75, "step": 1266 }, { "clip_ratio/high_max": 0.0028760029235854745, "clip_ratio/high_mean": 0.0012014865751552861, "clip_ratio/low_mean": 0.001623543674213579, "clip_ratio/low_min": 0.00014958317842683755, "clip_ratio/region_mean": 0.002825030154781416, "epoch": 0.11825303770073908, "grad_norm": 141.85693359375, "learning_rate": 1e-06, "loss": 0.0649, "step": 1267 }, { "clip_ratio/high_max": 0.0028612568203243427, "clip_ratio/high_mean": 0.001172155865788227, "clip_ratio/low_mean": 0.0019057675744988956, "clip_ratio/low_min": 1.1968594662903342e-05, "clip_ratio/region_mean": 0.0030779234948568046, "epoch": 0.11834637080073966, "grad_norm": 11331551.0, "learning_rate": 1e-06, "loss": 4685.4932, "step": 1268 }, { "clip_ratio/high_max": 0.002327132177015301, "clip_ratio/high_mean": 0.0009849578018474858, "clip_ratio/low_mean": 0.0016447174239146989, "clip_ratio/low_min": 5.3297328122425824e-05, "clip_ratio/region_mean": 0.0026296751893823966, "epoch": 0.11843970390074025, "grad_norm": 2.2576262950897217, "learning_rate": 1e-06, "loss": 0.0551, "step": 1269 }, { "clip_ratio/high_max": 0.0025863627743092366, "clip_ratio/high_mean": 0.0010621324290696066, "clip_ratio/low_mean": 0.0013763710121565964, "clip_ratio/low_min": 0.00017661631864029914, "clip_ratio/region_mean": 0.0024385033975704573, "epoch": 0.11853303700074083, "grad_norm": 64226.171875, "learning_rate": 1e-06, "loss": 112.0592, "step": 1270 }, { "clip_ratio/high_max": 0.002343168311199406, "clip_ratio/high_mean": 0.0010886430536629632, "clip_ratio/low_mean": 0.0014820904871157836, "clip_ratio/low_min": 0.00011277654084551614, "clip_ratio/region_mean": 0.0025707335662445985, "epoch": 0.11862637010074141, "grad_norm": 1.34003484249115, "learning_rate": 1e-06, "loss": 0.0434, "step": 1271 }, { "clip_ratio/high_max": 0.00225474782200763, "clip_ratio/high_mean": 0.0009727455253596418, "clip_ratio/low_mean": 0.0013826721915393136, "clip_ratio/low_min": 0.00010036127059720457, "clip_ratio/region_mean": 0.0023554176223115064, "epoch": 0.118719703200742, "grad_norm": 178811.484375, "learning_rate": 1e-06, "loss": 5.4301, "step": 1272 }, { "clip_ratio/high_max": 0.002521527374483412, "clip_ratio/high_mean": 0.0010823165612237062, "clip_ratio/low_mean": 0.0014219719378161244, "clip_ratio/low_min": 2.7352298275218345e-05, "clip_ratio/region_mean": 0.0025042884662980214, "epoch": 0.11881303630074258, "grad_norm": 128.9577178955078, "learning_rate": 1e-06, "loss": 0.0298, "step": 1273 }, { "clip_ratio/high_max": 0.0029279654045240022, "clip_ratio/high_mean": 0.0010948126782750478, "clip_ratio/low_mean": 0.001756403136823792, "clip_ratio/low_min": 0.00011030779751308728, "clip_ratio/region_mean": 0.002851215780538041, "epoch": 0.11890636940074316, "grad_norm": 1.7387869358062744, "learning_rate": 1e-06, "loss": 0.0222, "step": 1274 }, { "clip_ratio/high_max": 0.0022350958242896013, "clip_ratio/high_mean": 0.0009996668995881919, "clip_ratio/low_mean": 0.0016625109456072096, "clip_ratio/low_min": 0.00015270732183125801, "clip_ratio/region_mean": 0.0026621778670232743, "epoch": 0.11899970250074375, "grad_norm": 2761.40869140625, "learning_rate": 1e-06, "loss": 0.3884, "step": 1275 }, { "clip_ratio/high_max": 0.002866925540729426, "clip_ratio/high_mean": 0.0011721632508852053, "clip_ratio/low_mean": 0.0016402996188844554, "clip_ratio/low_min": 0.00029226318747532787, "clip_ratio/region_mean": 0.0028124628734076396, "epoch": 0.11909303560074433, "grad_norm": 3.6268842220306396, "learning_rate": 1e-06, "loss": 0.0278, "step": 1276 }, { "clip_ratio/high_max": 0.0023356233214144595, "clip_ratio/high_mean": 0.0009571378050168278, "clip_ratio/low_mean": 0.001458370434193057, "clip_ratio/low_min": 0.0002807661167025799, "clip_ratio/region_mean": 0.0024155082282959484, "epoch": 0.11918636870074492, "grad_norm": 0.37500566244125366, "learning_rate": 1e-06, "loss": 0.0842, "step": 1277 }, { "clip_ratio/high_max": 0.002454078981827479, "clip_ratio/high_mean": 0.0010560654700384475, "clip_ratio/low_mean": 0.0014935763902030885, "clip_ratio/low_min": 1.5221627108985558e-05, "clip_ratio/region_mean": 0.002549641882069409, "epoch": 0.1192797018007455, "grad_norm": 304439.25, "learning_rate": 1e-06, "loss": 56.9431, "step": 1278 }, { "clip_ratio/high_max": 0.002539203007472679, "clip_ratio/high_mean": 0.0010198548407061026, "clip_ratio/low_mean": 0.0017058754183381097, "clip_ratio/low_min": 0.000172702903910249, "clip_ratio/region_mean": 0.002725730257225223, "epoch": 0.11937303490074608, "grad_norm": 107.1011734008789, "learning_rate": 1e-06, "loss": 0.0706, "step": 1279 }, { "clip_ratio/high_max": 0.0025215589703293517, "clip_ratio/high_mean": 0.0010095993002323667, "clip_ratio/low_mean": 0.0015824723523110151, "clip_ratio/low_min": 0.00017615348770050332, "clip_ratio/region_mean": 0.0025920716434484348, "epoch": 0.11946636800074667, "grad_norm": 0.12581756711006165, "learning_rate": 1e-06, "loss": 0.0425, "step": 1280 }, { "clip_ratio/high_max": 0.002762900992820505, "clip_ratio/high_mean": 0.0010711492595874006, "clip_ratio/low_mean": 0.0016012663145374972, "clip_ratio/low_min": 0.0001288427511099144, "clip_ratio/region_mean": 0.0026724155468400568, "epoch": 0.11955970110074725, "grad_norm": 18.771554946899414, "learning_rate": 1e-06, "loss": -0.0004, "step": 1281 }, { "clip_ratio/high_max": 0.002937273013230879, "clip_ratio/high_mean": 0.0011763448819692712, "clip_ratio/low_mean": 0.0018052066516247578, "clip_ratio/low_min": 0.0002281651341036195, "clip_ratio/region_mean": 0.0029815515445079654, "epoch": 0.11965303420074783, "grad_norm": 128.1190643310547, "learning_rate": 1e-06, "loss": 0.0787, "step": 1282 }, { "clip_ratio/high_max": 0.002379294433922041, "clip_ratio/high_mean": 0.0009520577841612976, "clip_ratio/low_mean": 0.001575715337821748, "clip_ratio/low_min": 0.00022441083547164453, "clip_ratio/region_mean": 0.0025277731401729397, "epoch": 0.11974636730074842, "grad_norm": 0.10903871804475784, "learning_rate": 1e-06, "loss": 0.0041, "step": 1283 }, { "clip_ratio/high_max": 0.0026697091016103514, "clip_ratio/high_mean": 0.0010851600127352867, "clip_ratio/low_mean": 0.0018996425860677846, "clip_ratio/low_min": 0.00024376705459872028, "clip_ratio/region_mean": 0.0029848026169929653, "epoch": 0.119839700400749, "grad_norm": 2.140317678451538, "learning_rate": 1e-06, "loss": 0.0407, "step": 1284 }, { "clip_ratio/high_max": 0.002785126052913256, "clip_ratio/high_mean": 0.0010643614987202454, "clip_ratio/low_mean": 0.001758630864060251, "clip_ratio/low_min": 0.00014472265411313856, "clip_ratio/region_mean": 0.0028229924137121998, "epoch": 0.11993303350074958, "grad_norm": 5.583578109741211, "learning_rate": 1e-06, "loss": 0.0383, "step": 1285 }, { "clip_ratio/high_max": 0.0026204491587122902, "clip_ratio/high_mean": 0.0010562085881247185, "clip_ratio/low_mean": 0.0019424147867539432, "clip_ratio/low_min": 0.00012514213085523807, "clip_ratio/region_mean": 0.0029986234148964286, "epoch": 0.12002636660075017, "grad_norm": 1.4959341287612915, "learning_rate": 1e-06, "loss": 0.0041, "step": 1286 }, { "clip_ratio/high_max": 0.002324942543054931, "clip_ratio/high_mean": 0.0010305876148777315, "clip_ratio/low_mean": 0.0017553214056533761, "clip_ratio/low_min": 0.00016375875475205248, "clip_ratio/region_mean": 0.0027859090478159487, "epoch": 0.12011969970075075, "grad_norm": 0.22705040872097015, "learning_rate": 1e-06, "loss": 0.0171, "step": 1287 }, { "clip_ratio/high_max": 0.002367006483837031, "clip_ratio/high_mean": 0.001063610314304242, "clip_ratio/low_mean": 0.0018711815173446666, "clip_ratio/low_min": 0.0002173015463995398, "clip_ratio/region_mean": 0.0029347918680286966, "epoch": 0.12021303280075134, "grad_norm": 0.9103193879127502, "learning_rate": 1e-06, "loss": 0.0434, "step": 1288 }, { "clip_ratio/high_max": 0.00246326452906942, "clip_ratio/high_mean": 0.0010631302138790488, "clip_ratio/low_mean": 0.001633400246646488, "clip_ratio/low_min": 0.00020178403610771056, "clip_ratio/region_mean": 0.002696530405955855, "epoch": 0.12030636590075192, "grad_norm": 0.8848915696144104, "learning_rate": 1e-06, "loss": -0.0293, "step": 1289 }, { "clip_ratio/high_max": 0.0026815372621058486, "clip_ratio/high_mean": 0.001094168681447627, "clip_ratio/low_mean": 0.0018804785649990663, "clip_ratio/low_min": 0.00010425290201965254, "clip_ratio/region_mean": 0.0029746472573606297, "epoch": 0.1203996990007525, "grad_norm": 0.15472941100597382, "learning_rate": 1e-06, "loss": 0.0715, "step": 1290 }, { "clip_ratio/high_max": 0.0026537783196545206, "clip_ratio/high_mean": 0.0011002218852809165, "clip_ratio/low_mean": 0.0024940771181718446, "clip_ratio/low_min": 0.00010717975965235382, "clip_ratio/region_mean": 0.0035942989343311638, "epoch": 0.12049303210075309, "grad_norm": 415.4970703125, "learning_rate": 1e-06, "loss": 0.1435, "step": 1291 }, { "clip_ratio/high_max": 0.0026602099351293873, "clip_ratio/high_mean": 0.0012724521111522336, "clip_ratio/low_mean": 0.0017916627184604295, "clip_ratio/low_min": 8.241842442657799e-05, "clip_ratio/region_mean": 0.003064114833250642, "epoch": 0.12058636520075366, "grad_norm": 95.32950592041016, "learning_rate": 1e-06, "loss": -0.0074, "step": 1292 }, { "clip_ratio/high_max": 0.002446442384098191, "clip_ratio/high_mean": 0.0009719012559799012, "clip_ratio/low_mean": 0.0016809420412755571, "clip_ratio/low_min": 0.00013997410769661656, "clip_ratio/region_mean": 0.002652843242685776, "epoch": 0.12067969830075424, "grad_norm": 7.08378791809082, "learning_rate": 1e-06, "loss": 0.0214, "step": 1293 }, { "clip_ratio/high_max": 0.002776145665848162, "clip_ratio/high_mean": 0.0010755179882835364, "clip_ratio/low_mean": 0.0017547391180414706, "clip_ratio/low_min": 0.0003150701813865453, "clip_ratio/region_mean": 0.0028302570717642084, "epoch": 0.12077303140075484, "grad_norm": 0.1413639783859253, "learning_rate": 1e-06, "loss": 0.0467, "step": 1294 }, { "clip_ratio/high_max": 0.002853964499081485, "clip_ratio/high_mean": 0.0011674125089484733, "clip_ratio/low_mean": 0.001764378586813109, "clip_ratio/low_min": 8.37441530165961e-05, "clip_ratio/region_mean": 0.002931790964794345, "epoch": 0.12086636450075541, "grad_norm": 0.28277555108070374, "learning_rate": 1e-06, "loss": -0.0018, "step": 1295 }, { "clip_ratio/high_max": 0.002345942164538428, "clip_ratio/high_mean": 0.0010516047950659413, "clip_ratio/low_mean": 0.0021422000136226416, "clip_ratio/low_min": 0.0002182956177421147, "clip_ratio/region_mean": 0.003193804863258265, "epoch": 0.12095969760075599, "grad_norm": 16.739648818969727, "learning_rate": 1e-06, "loss": 0.0218, "step": 1296 }, { "clip_ratio/high_max": 0.0024812604096950963, "clip_ratio/high_mean": 0.0009686353223514743, "clip_ratio/low_mean": 0.0019338587480888236, "clip_ratio/low_min": 0.00020287244660721626, "clip_ratio/region_mean": 0.0029024940376984887, "epoch": 0.12105303070075658, "grad_norm": 0.2815110683441162, "learning_rate": 1e-06, "loss": 0.0297, "step": 1297 }, { "clip_ratio/high_max": 0.0027093083954241592, "clip_ratio/high_mean": 0.0010830601149791619, "clip_ratio/low_mean": 0.002142704419384245, "clip_ratio/low_min": 0.00027408121968619525, "clip_ratio/region_mean": 0.003225764521630481, "epoch": 0.12114636380075716, "grad_norm": 0.13505704700946808, "learning_rate": 1e-06, "loss": 0.0815, "step": 1298 }, { "clip_ratio/high_max": 0.0026842443767236546, "clip_ratio/high_mean": 0.0011088426454080036, "clip_ratio/low_mean": 0.0017748572608979885, "clip_ratio/low_min": 0.00022856307896290673, "clip_ratio/region_mean": 0.0028836998535552993, "epoch": 0.12123969690075775, "grad_norm": 4.790085315704346, "learning_rate": 1e-06, "loss": 0.0451, "step": 1299 }, { "clip_ratio/high_max": 0.002646590342919808, "clip_ratio/high_mean": 0.0011177973210578784, "clip_ratio/low_mean": 0.0017304298853559885, "clip_ratio/low_min": 0.00023532180148322368, "clip_ratio/region_mean": 0.0028482271591201425, "epoch": 0.12133303000075833, "grad_norm": 0.6362149119377136, "learning_rate": 1e-06, "loss": -0.0152, "step": 1300 }, { "clip_ratio/high_max": 0.0023714734343229793, "clip_ratio/high_mean": 0.0009635728165449109, "clip_ratio/low_mean": 0.001666305663093226, "clip_ratio/low_min": 0.00010683457367122173, "clip_ratio/region_mean": 0.002629878494190052, "epoch": 0.12142636310075891, "grad_norm": 0.12182816863059998, "learning_rate": 1e-06, "loss": 0.0452, "step": 1301 }, { "clip_ratio/high_max": 0.002856718514522072, "clip_ratio/high_mean": 0.0011808672061306424, "clip_ratio/low_mean": 0.0017053154806490056, "clip_ratio/low_min": 6.371911149471998e-05, "clip_ratio/region_mean": 0.002886182708607521, "epoch": 0.1215196962007595, "grad_norm": 0.33548665046691895, "learning_rate": 1e-06, "loss": -0.0372, "step": 1302 }, { "clip_ratio/high_max": 0.0026209691641270183, "clip_ratio/high_mean": 0.0010677490136004053, "clip_ratio/low_mean": 0.0015185351694526616, "clip_ratio/low_min": 0.0001495559681643499, "clip_ratio/region_mean": 0.002586284179415088, "epoch": 0.12161302930076008, "grad_norm": 0.1582452356815338, "learning_rate": 1e-06, "loss": -0.01, "step": 1303 }, { "clip_ratio/high_max": 0.0024981526839837898, "clip_ratio/high_mean": 0.0009750441913638497, "clip_ratio/low_mean": 0.0020848522035521455, "clip_ratio/low_min": 0.00011993395946774399, "clip_ratio/region_mean": 0.0030598964003729634, "epoch": 0.12170636240076066, "grad_norm": 1.7965190410614014, "learning_rate": 1e-06, "loss": 0.0594, "step": 1304 }, { "clip_ratio/high_max": 0.0027816733418148942, "clip_ratio/high_mean": 0.0011246330959693296, "clip_ratio/low_mean": 0.0017055830903700553, "clip_ratio/low_min": 3.423761427256977e-05, "clip_ratio/region_mean": 0.0028302161299507134, "epoch": 0.12179969550076125, "grad_norm": 0.5362770557403564, "learning_rate": 1e-06, "loss": -0.0031, "step": 1305 }, { "clip_ratio/high_max": 0.002099929864925798, "clip_ratio/high_mean": 0.0009149324614554644, "clip_ratio/low_mean": 0.001991286058910191, "clip_ratio/low_min": 0.00020884660898445873, "clip_ratio/region_mean": 0.0029062184621579945, "epoch": 0.12189302860076183, "grad_norm": 28.99228286743164, "learning_rate": 1e-06, "loss": 0.0744, "step": 1306 }, { "clip_ratio/high_max": 0.002539543049351778, "clip_ratio/high_mean": 0.0012048418466292787, "clip_ratio/low_mean": 0.0018764610176731367, "clip_ratio/low_min": 7.491630458389409e-05, "clip_ratio/region_mean": 0.0030813028206466697, "epoch": 0.12198636170076241, "grad_norm": 777.0072631835938, "learning_rate": 1e-06, "loss": 0.2584, "step": 1307 }, { "clip_ratio/high_max": 0.0025953483855118975, "clip_ratio/high_mean": 0.0011640389275271446, "clip_ratio/low_mean": 0.0018990503376699053, "clip_ratio/low_min": 0.00024639467210363364, "clip_ratio/region_mean": 0.003063089257921092, "epoch": 0.122079694800763, "grad_norm": 128.30288696289062, "learning_rate": 1e-06, "loss": 0.039, "step": 1308 }, { "clip_ratio/high_max": 0.0026090781029779464, "clip_ratio/high_mean": 0.0011438887704571243, "clip_ratio/low_mean": 0.0017591812647879124, "clip_ratio/low_min": 0.00018622421339387074, "clip_ratio/region_mean": 0.002903070067986846, "epoch": 0.12217302790076358, "grad_norm": 0.7800815105438232, "learning_rate": 1e-06, "loss": 0.0146, "step": 1309 }, { "clip_ratio/high_max": 0.0022846799402032048, "clip_ratio/high_mean": 0.0009694874515844276, "clip_ratio/low_mean": 0.001755118450091686, "clip_ratio/low_min": 0.00014912714323145337, "clip_ratio/region_mean": 0.002724605830735527, "epoch": 0.12226636100076417, "grad_norm": 938.2282104492188, "learning_rate": 1e-06, "loss": 1.3953, "step": 1310 }, { "clip_ratio/high_max": 0.0026915706548606977, "clip_ratio/high_mean": 0.001045897719450295, "clip_ratio/low_mean": 0.0016949632590694819, "clip_ratio/low_min": 0.00020850138571404386, "clip_ratio/region_mean": 0.002740861033089459, "epoch": 0.12235969410076475, "grad_norm": 2.192115306854248, "learning_rate": 1e-06, "loss": 280.855, "step": 1311 }, { "clip_ratio/high_max": 0.0027553722902666777, "clip_ratio/high_mean": 0.0010573356157692615, "clip_ratio/low_mean": 0.0019869756215484813, "clip_ratio/low_min": 0.00013522309109248454, "clip_ratio/region_mean": 0.0030443112191278487, "epoch": 0.12245302720076533, "grad_norm": 417156.46875, "learning_rate": 1e-06, "loss": 25.836, "step": 1312 }, { "clip_ratio/high_max": 0.0028961833304492757, "clip_ratio/high_mean": 0.0012140483468101593, "clip_ratio/low_mean": 0.0013652213019668125, "clip_ratio/low_min": 8.400347451242851e-05, "clip_ratio/region_mean": 0.0025792696396820247, "completions/clipped_ratio": 0.01202392578125, "completions/max_length": 4096.0, "completions/max_terminated_length": 4074.0, "completions/mean_length": 603.0662231445312, "completions/mean_terminated_length": 560.5563354492188, "completions/min_length": 3.0, "completions/min_terminated_length": 3.0, "epoch": 0.12254636030076592, "grad_norm": 10544022.0, "learning_rate": 1e-06, "loss": 125965.2969, "num_tokens": 974084786.0, "reward": 0.5883091688156128, "reward_std": 0.1882302314043045, "rewards/simpleverify_reward/mean": 0.5883091688156128, "rewards/simpleverify_reward/std": 0.4921416938304901, "step": 1313 }, { "clip_ratio/high_max": 0.003347525271237828, "clip_ratio/high_mean": 0.0013542946180677973, "clip_ratio/low_mean": 0.001061790657331585, "clip_ratio/low_min": 0.00010008516028392478, "clip_ratio/region_mean": 0.0024160852408385836, "epoch": 0.1226396934007665, "grad_norm": 25046222.0, "learning_rate": 1e-06, "loss": 9189.9004, "step": 1314 }, { "clip_ratio/high_max": 0.0028960613562958315, "clip_ratio/high_mean": 0.0011054927999794018, "clip_ratio/low_mean": 0.0010594565028441139, "clip_ratio/low_min": 0.00013437664892990142, "clip_ratio/region_mean": 0.0021649492846336216, "epoch": 0.12273302650076708, "grad_norm": 6170739.5, "learning_rate": 1e-06, "loss": 10719.4023, "step": 1315 }, { "clip_ratio/high_max": 0.0029601669011753984, "clip_ratio/high_mean": 0.0011510662370710634, "clip_ratio/low_mean": 0.001264434053155128, "clip_ratio/low_min": 0.0001222938244609395, "clip_ratio/region_mean": 0.0024155002683983184, "epoch": 0.12282635960076767, "grad_norm": 14649090048.0, "learning_rate": 1e-06, "loss": 9281067008.0, "step": 1316 }, { "clip_ratio/high_max": 0.002600384053948801, "clip_ratio/high_mean": 0.001070719947165344, "clip_ratio/low_mean": 0.0009981944394894526, "clip_ratio/low_min": 4.0017260289459955e-05, "clip_ratio/region_mean": 0.0020689143748313654, "epoch": 0.12291969270076825, "grad_norm": 63825.01953125, "learning_rate": 1e-06, "loss": 734.1898, "step": 1317 }, { "clip_ratio/high_max": 0.0034723718563327566, "clip_ratio/high_mean": 0.0014510143337247428, "clip_ratio/low_mean": 0.0011123814965685597, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002563395813922398, "epoch": 0.12301302580076884, "grad_norm": 497526.0625, "learning_rate": 1e-06, "loss": 413.6533, "step": 1318 }, { "clip_ratio/high_max": 0.0027586057476582937, "clip_ratio/high_mean": 0.0011281032893748488, "clip_ratio/low_mean": 0.0011721740957000293, "clip_ratio/low_min": 0.00010625063077895902, "clip_ratio/region_mean": 0.002300277403264772, "epoch": 0.12310635890076942, "grad_norm": 3490.934326171875, "learning_rate": 1e-06, "loss": 45.634, "step": 1319 }, { "clip_ratio/high_max": 0.0025460552678850945, "clip_ratio/high_mean": 0.0011641192322713323, "clip_ratio/low_mean": 0.0011363854828232434, "clip_ratio/low_min": 0.00018440070653014118, "clip_ratio/region_mean": 0.0023005047551123425, "epoch": 0.12319969200077, "grad_norm": 1035214.75, "learning_rate": 1e-06, "loss": 817.832, "step": 1320 }, { "clip_ratio/high_max": 0.0028774368402082473, "clip_ratio/high_mean": 0.001188034524602699, "clip_ratio/low_mean": 0.000981177970970748, "clip_ratio/low_min": 7.708375051151961e-05, "clip_ratio/region_mean": 0.002169212471926585, "epoch": 0.12329302510077059, "grad_norm": 193839.96875, "learning_rate": 1e-06, "loss": 22.8366, "step": 1321 }, { "clip_ratio/high_max": 0.0027686473258654587, "clip_ratio/high_mean": 0.0012172394017397892, "clip_ratio/low_mean": 0.0011842159510706551, "clip_ratio/low_min": 0.0001557590376251028, "clip_ratio/region_mean": 0.002401455400104169, "epoch": 0.12338635820077117, "grad_norm": 8212786.5, "learning_rate": 1e-06, "loss": 1091.8442, "step": 1322 }, { "clip_ratio/high_max": 0.003094690735451877, "clip_ratio/high_mean": 0.0012448113811842632, "clip_ratio/low_mean": 0.0011990642506134463, "clip_ratio/low_min": 8.930230251280591e-05, "clip_ratio/region_mean": 0.0024438756081508473, "epoch": 0.12347969130077174, "grad_norm": 1.9770727157592773, "learning_rate": 1e-06, "loss": -0.0075, "step": 1323 }, { "clip_ratio/high_max": 0.0028810346266254783, "clip_ratio/high_mean": 0.00122314972395543, "clip_ratio/low_mean": 0.0010687179819797166, "clip_ratio/low_min": 4.253152383171255e-05, "clip_ratio/region_mean": 0.0022918677641428076, "epoch": 0.12357302440077234, "grad_norm": 1905365.875, "learning_rate": 1e-06, "loss": 6765263.0, "step": 1324 }, { "clip_ratio/high_max": 0.003121137386187911, "clip_ratio/high_mean": 0.0012522940851340536, "clip_ratio/low_mean": 0.0012267583169887075, "clip_ratio/low_min": 5.816938391944859e-05, "clip_ratio/region_mean": 0.002479052411217708, "epoch": 0.12366635750077291, "grad_norm": 3167.805419921875, "learning_rate": 1e-06, "loss": 1.5535, "step": 1325 }, { "clip_ratio/high_max": 0.0027913033991353586, "clip_ratio/high_mean": 0.0012098609731765464, "clip_ratio/low_mean": 0.0011652859393507242, "clip_ratio/low_min": 8.597171927249292e-05, "clip_ratio/region_mean": 0.0023751469125272706, "epoch": 0.12375969060077349, "grad_norm": 34311772.0, "learning_rate": 1e-06, "loss": 24000.7148, "step": 1326 }, { "clip_ratio/high_max": 0.0027516176196513698, "clip_ratio/high_mean": 0.0011580460231925827, "clip_ratio/low_mean": 0.0014469725465460215, "clip_ratio/low_min": 4.25977777922526e-05, "clip_ratio/region_mean": 0.0026050185988424346, "epoch": 0.12385302370077408, "grad_norm": 243.29627990722656, "learning_rate": 1e-06, "loss": 4.6453, "step": 1327 }, { "clip_ratio/high_max": 0.0029805044614477083, "clip_ratio/high_mean": 0.0012430451224645367, "clip_ratio/low_mean": 0.001253142243513139, "clip_ratio/low_min": 0.00011267840636719484, "clip_ratio/region_mean": 0.002496187349606771, "epoch": 0.12394635680077466, "grad_norm": 8.301875114440918, "learning_rate": 1e-06, "loss": 0.0421, "step": 1328 }, { "clip_ratio/high_max": 0.0026758160929603036, "clip_ratio/high_mean": 0.001268159056053264, "clip_ratio/low_mean": 0.0014937930136511568, "clip_ratio/low_min": 0.00016062554823292885, "clip_ratio/region_mean": 0.00276195201149676, "epoch": 0.12403968990077525, "grad_norm": 3.611328363418579, "learning_rate": 1e-06, "loss": 11.8485, "step": 1329 }, { "clip_ratio/high_max": 0.0033672382924123667, "clip_ratio/high_mean": 0.0013653243677254068, "clip_ratio/low_mean": 0.0014212747082638089, "clip_ratio/low_min": 0.00017754750115273055, "clip_ratio/region_mean": 0.0027865991069120355, "epoch": 0.12413302300077583, "grad_norm": 4.12019681930542, "learning_rate": 1e-06, "loss": 0.0248, "step": 1330 }, { "clip_ratio/high_max": 0.002949690526293125, "clip_ratio/high_mean": 0.0012496342078520684, "clip_ratio/low_mean": 0.0014978455474192742, "clip_ratio/low_min": 0.00018984960479428992, "clip_ratio/region_mean": 0.002747479775280226, "epoch": 0.12422635610077641, "grad_norm": 0.23491708934307098, "learning_rate": 1e-06, "loss": 0.0635, "step": 1331 }, { "clip_ratio/high_max": 0.0023644631219212897, "clip_ratio/high_mean": 0.0009763116413523676, "clip_ratio/low_mean": 0.0016999932850012556, "clip_ratio/low_min": 0.00022220371283765417, "clip_ratio/region_mean": 0.002676304917258676, "epoch": 0.124319689200777, "grad_norm": 0.35941219329833984, "learning_rate": 1e-06, "loss": 0.0689, "step": 1332 }, { "clip_ratio/high_max": 0.002707583269511815, "clip_ratio/high_mean": 0.0011329956032568589, "clip_ratio/low_mean": 0.0013309834721439984, "clip_ratio/low_min": 0.0001467885376769118, "clip_ratio/region_mean": 0.0024639790353830904, "epoch": 0.12441302230077758, "grad_norm": 103.4138412475586, "learning_rate": 1e-06, "loss": 29.9006, "step": 1333 }, { "clip_ratio/high_max": 0.0023598151601618156, "clip_ratio/high_mean": 0.000986379265668802, "clip_ratio/low_mean": 0.001635982273000991, "clip_ratio/low_min": 0.00016116808092192514, "clip_ratio/region_mean": 0.002622361498652026, "epoch": 0.12450635540077816, "grad_norm": 3.2083046436309814, "learning_rate": 1e-06, "loss": 0.0677, "step": 1334 }, { "clip_ratio/high_max": 0.0029471293091773987, "clip_ratio/high_mean": 0.0012238292110851035, "clip_ratio/low_mean": 0.0014725156834174413, "clip_ratio/low_min": 6.115491578384535e-05, "clip_ratio/region_mean": 0.0026963449345203117, "epoch": 0.12459968850077875, "grad_norm": 0.14162856340408325, "learning_rate": 1e-06, "loss": 0.0158, "step": 1335 }, { "clip_ratio/high_max": 0.0031741782659082673, "clip_ratio/high_mean": 0.0013061026511422824, "clip_ratio/low_mean": 0.0013866653407603735, "clip_ratio/low_min": 6.309938271442661e-05, "clip_ratio/region_mean": 0.0026927679864456877, "epoch": 0.12469302160077933, "grad_norm": 844.8184204101562, "learning_rate": 1e-06, "loss": 0.1569, "step": 1336 }, { "clip_ratio/high_max": 0.0032562197629886214, "clip_ratio/high_mean": 0.001402841616254591, "clip_ratio/low_mean": 0.0015069984474394005, "clip_ratio/low_min": 0.0001333006966888206, "clip_ratio/region_mean": 0.0029098399900249206, "epoch": 0.12478635470077991, "grad_norm": 22.62038230895996, "learning_rate": 1e-06, "loss": -0.0065, "step": 1337 }, { "clip_ratio/high_max": 0.0030180084140738472, "clip_ratio/high_mean": 0.0012904098330182023, "clip_ratio/low_mean": 0.0016129396208270919, "clip_ratio/low_min": 0.00016985756701615173, "clip_ratio/region_mean": 0.002903349348343909, "epoch": 0.1248796878007805, "grad_norm": 5185.359375, "learning_rate": 1e-06, "loss": 1675588992.0, "step": 1338 }, { "clip_ratio/high_max": 0.0028880392128485255, "clip_ratio/high_mean": 0.0011796596663771197, "clip_ratio/low_mean": 0.0014709192364534829, "clip_ratio/low_min": 5.447237526823301e-05, "clip_ratio/region_mean": 0.0026505789428483695, "epoch": 0.12497302090078108, "grad_norm": 65.60166931152344, "learning_rate": 1e-06, "loss": 5.7471, "step": 1339 }, { "clip_ratio/high_max": 0.0026705306008807383, "clip_ratio/high_mean": 0.0011774634149332996, "clip_ratio/low_mean": 0.0017858708088169806, "clip_ratio/low_min": 0.0002605140298328479, "clip_ratio/region_mean": 0.002963334249216132, "epoch": 0.12506635400078167, "grad_norm": 3.326878786087036, "learning_rate": 1e-06, "loss": 0.054, "step": 1340 }, { "clip_ratio/high_max": 0.0029294352425495163, "clip_ratio/high_mean": 0.0012471622430894058, "clip_ratio/low_mean": 0.0015624780280631967, "clip_ratio/low_min": 6.610868786083302e-05, "clip_ratio/region_mean": 0.0028096401947550476, "epoch": 0.12515968710078224, "grad_norm": 13365.33203125, "learning_rate": 1e-06, "loss": 6.1477, "step": 1341 }, { "clip_ratio/high_max": 0.003351216197188478, "clip_ratio/high_mean": 0.0012298054643906653, "clip_ratio/low_mean": 0.0014938700951461215, "clip_ratio/low_min": 0.00016687980860297102, "clip_ratio/region_mean": 0.0027236755413468927, "epoch": 0.12525302020078283, "grad_norm": 0.2032417356967926, "learning_rate": 1e-06, "loss": -0.0129, "step": 1342 }, { "clip_ratio/high_max": 0.0027873489198100287, "clip_ratio/high_mean": 0.0011186481187905883, "clip_ratio/low_mean": 0.0015980834050424164, "clip_ratio/low_min": 0.00018384671420790255, "clip_ratio/region_mean": 0.0027167315274709836, "epoch": 0.12534635330078342, "grad_norm": 380.8132629394531, "learning_rate": 1e-06, "loss": 0.2398, "step": 1343 }, { "clip_ratio/high_max": 0.0030305194668471813, "clip_ratio/high_mean": 0.0011946755548706278, "clip_ratio/low_mean": 0.0016799342556623742, "clip_ratio/low_min": 0.00010082079825224355, "clip_ratio/region_mean": 0.002874609868740663, "epoch": 0.125439686400784, "grad_norm": 0.1865394413471222, "learning_rate": 1e-06, "loss": 0.0097, "step": 1344 }, { "clip_ratio/high_max": 0.0026593375005177222, "clip_ratio/high_mean": 0.0010928886295005213, "clip_ratio/low_mean": 0.001904479571749107, "clip_ratio/low_min": 0.00016771562513895333, "clip_ratio/region_mean": 0.0029973682467243634, "epoch": 0.12553301950078458, "grad_norm": 34960152.0, "learning_rate": 1e-06, "loss": 30046.6738, "step": 1345 }, { "clip_ratio/high_max": 0.002593464269011747, "clip_ratio/high_mean": 0.0010865710210055113, "clip_ratio/low_mean": 0.0015823002831893973, "clip_ratio/low_min": 0.00013098886483930983, "clip_ratio/region_mean": 0.002668871311470866, "epoch": 0.12562635260078517, "grad_norm": 3.67510724067688, "learning_rate": 1e-06, "loss": 0.0012, "step": 1346 }, { "clip_ratio/high_max": 0.0031570967730658595, "clip_ratio/high_mean": 0.0013122608652338386, "clip_ratio/low_mean": 0.0015063734263094375, "clip_ratio/low_min": 0.00018411947712593246, "clip_ratio/region_mean": 0.0028186341951368377, "epoch": 0.12571968570078576, "grad_norm": 0.46530383825302124, "learning_rate": 1e-06, "loss": 0.2359, "step": 1347 }, { "clip_ratio/high_max": 0.0029708821093663573, "clip_ratio/high_mean": 0.0012352302328508813, "clip_ratio/low_mean": 0.0018282363816979341, "clip_ratio/low_min": 0.00019331154726387467, "clip_ratio/region_mean": 0.0030634665890829638, "epoch": 0.12581301880078632, "grad_norm": 0.15034519135951996, "learning_rate": 1e-06, "loss": 0.0045, "step": 1348 }, { "clip_ratio/high_max": 0.002672720187547384, "clip_ratio/high_mean": 0.0011451203645265196, "clip_ratio/low_mean": 0.0016769073372415733, "clip_ratio/low_min": 0.00014200955592968967, "clip_ratio/region_mean": 0.002822027738147881, "epoch": 0.12590635190078692, "grad_norm": 103.00567626953125, "learning_rate": 1e-06, "loss": 0.0801, "step": 1349 }, { "clip_ratio/high_max": 0.0025779462011996657, "clip_ratio/high_mean": 0.0010173477839998668, "clip_ratio/low_mean": 0.001681696350715356, "clip_ratio/low_min": 4.938759229844436e-05, "clip_ratio/region_mean": 0.0026990441474481486, "epoch": 0.1259996850007875, "grad_norm": 0.6358668208122253, "learning_rate": 1e-06, "loss": 0.0498, "step": 1350 }, { "clip_ratio/high_max": 0.0024100137961795554, "clip_ratio/high_mean": 0.0010299052664777264, "clip_ratio/low_mean": 0.0017007108872348908, "clip_ratio/low_min": 0.00010452467540744692, "clip_ratio/region_mean": 0.002730616135522723, "epoch": 0.12609301810078807, "grad_norm": 22.69719696044922, "learning_rate": 1e-06, "loss": 0.0529, "step": 1351 }, { "clip_ratio/high_max": 0.002751274143520277, "clip_ratio/high_mean": 0.0011349360465828795, "clip_ratio/low_mean": 0.0014919730347173754, "clip_ratio/low_min": 0.00028489053784142016, "clip_ratio/region_mean": 0.0026269091249560006, "epoch": 0.12618635120078867, "grad_norm": 0.8071578145027161, "learning_rate": 1e-06, "loss": 4.4608, "step": 1352 }, { "clip_ratio/high_max": 0.0025065393492695875, "clip_ratio/high_mean": 0.0010474944592715474, "clip_ratio/low_mean": 0.0017422355776943732, "clip_ratio/low_min": 0.00019379730474611279, "clip_ratio/region_mean": 0.0027897300897166133, "epoch": 0.12627968430078926, "grad_norm": 446.40484619140625, "learning_rate": 1e-06, "loss": 0.1581, "step": 1353 }, { "clip_ratio/high_max": 0.0023192357512016315, "clip_ratio/high_mean": 0.0009305693420174066, "clip_ratio/low_mean": 0.0020006406120955944, "clip_ratio/low_min": 5.584729342444916e-05, "clip_ratio/region_mean": 0.0029312099868548103, "epoch": 0.12637301740078982, "grad_norm": 12441.0625, "learning_rate": 1e-06, "loss": 97554.3516, "step": 1354 }, { "clip_ratio/high_max": 0.0025742259531398304, "clip_ratio/high_mean": 0.0010223299905192107, "clip_ratio/low_mean": 0.0019067253306275234, "clip_ratio/low_min": 0.00015493260434595868, "clip_ratio/region_mean": 0.0029290552920429036, "epoch": 0.12646635050079041, "grad_norm": 16.43515968322754, "learning_rate": 1e-06, "loss": 0.0481, "step": 1355 }, { "clip_ratio/high_max": 0.0022704468137817457, "clip_ratio/high_mean": 0.0010347225743316812, "clip_ratio/low_mean": 0.0017949999382835813, "clip_ratio/low_min": 0.00015929684832372004, "clip_ratio/region_mean": 0.0028297225580899976, "epoch": 0.126559683600791, "grad_norm": 346832.75, "learning_rate": 1e-06, "loss": 59.9786, "step": 1356 }, { "clip_ratio/high_max": 0.002804802410537377, "clip_ratio/high_mean": 0.0012214952948852442, "clip_ratio/low_mean": 0.0017772459468687885, "clip_ratio/low_min": 0.00017800223031372298, "clip_ratio/region_mean": 0.0029987412490299903, "epoch": 0.12665301670079157, "grad_norm": 1.6749107837677002, "learning_rate": 1e-06, "loss": 0.0448, "step": 1357 }, { "clip_ratio/high_max": 0.003012486908119172, "clip_ratio/high_mean": 0.0012679977480729576, "clip_ratio/low_mean": 0.0014326255150081124, "clip_ratio/low_min": 4.9747318371373694e-05, "clip_ratio/region_mean": 0.0027006232921849005, "epoch": 0.12674634980079216, "grad_norm": 72151.25, "learning_rate": 1e-06, "loss": 11.6192, "step": 1358 }, { "clip_ratio/high_max": 0.0030121333329589106, "clip_ratio/high_mean": 0.0011580454811337404, "clip_ratio/low_mean": 0.0014632416095992085, "clip_ratio/low_min": 0.00010403153919469332, "clip_ratio/region_mean": 0.0026212870943709277, "epoch": 0.12683968290079276, "grad_norm": 1.1690666675567627, "learning_rate": 1e-06, "loss": 0.0251, "step": 1359 }, { "clip_ratio/high_max": 0.002762368691037409, "clip_ratio/high_mean": 0.0010374671510362532, "clip_ratio/low_mean": 0.0015640249112038873, "clip_ratio/low_min": 4.444766091182828e-05, "clip_ratio/region_mean": 0.0026014920804300345, "epoch": 0.12693301600079332, "grad_norm": 20.569284439086914, "learning_rate": 1e-06, "loss": 0.0465, "step": 1360 }, { "clip_ratio/high_max": 0.0029249489889480174, "clip_ratio/high_mean": 0.0012907175259897485, "clip_ratio/low_mean": 0.0015227844851324335, "clip_ratio/low_min": 0.00017174917775264475, "clip_ratio/region_mean": 0.002813502011122182, "epoch": 0.1270263491007939, "grad_norm": 94.8472671508789, "learning_rate": 1e-06, "loss": 0.003, "step": 1361 }, { "clip_ratio/high_max": 0.0026098782764165662, "clip_ratio/high_mean": 0.0010239554903819226, "clip_ratio/low_mean": 0.0017414901049050968, "clip_ratio/low_min": 8.76168196555227e-05, "clip_ratio/region_mean": 0.0027654455770971254, "epoch": 0.1271196822007945, "grad_norm": 2.6673460006713867, "learning_rate": 1e-06, "loss": 0.0349, "step": 1362 }, { "clip_ratio/high_max": 0.0024488610579282977, "clip_ratio/high_mean": 0.0010470156848896295, "clip_ratio/low_mean": 0.001575091591803357, "clip_ratio/low_min": 0.00016256840990536148, "clip_ratio/region_mean": 0.0026221072403131984, "epoch": 0.12721301530079507, "grad_norm": 0.13707396388053894, "learning_rate": 1e-06, "loss": 0.0218, "step": 1363 }, { "clip_ratio/high_max": 0.003056119261600543, "clip_ratio/high_mean": 0.0012559415445139166, "clip_ratio/low_mean": 0.0018413906182104256, "clip_ratio/low_min": 0.00017275308528041933, "clip_ratio/region_mean": 0.003097332126344554, "epoch": 0.12730634840079566, "grad_norm": 943148.9375, "learning_rate": 1e-06, "loss": 139.0204, "step": 1364 }, { "clip_ratio/high_max": 0.0023716850228083786, "clip_ratio/high_mean": 0.0010917439049080713, "clip_ratio/low_mean": 0.0017096003102778923, "clip_ratio/low_min": 0.0002219008101747022, "clip_ratio/region_mean": 0.0028013442424708046, "epoch": 0.12739968150079625, "grad_norm": 0.1299154907464981, "learning_rate": 1e-06, "loss": 0.0756, "step": 1365 }, { "clip_ratio/high_max": 0.002816854823322501, "clip_ratio/high_mean": 0.0011024784198525595, "clip_ratio/low_mean": 0.0016180742713913787, "clip_ratio/low_min": 0.00010820514580700547, "clip_ratio/region_mean": 0.0027205527294427156, "epoch": 0.12749301460079684, "grad_norm": 0.43317341804504395, "learning_rate": 1e-06, "loss": 0.0493, "step": 1366 }, { "clip_ratio/high_max": 0.0028227652510395274, "clip_ratio/high_mean": 0.001115926970669534, "clip_ratio/low_mean": 0.001455483681638725, "clip_ratio/low_min": 0.00017691719222057145, "clip_ratio/region_mean": 0.0025714106304803863, "epoch": 0.1275863477007974, "grad_norm": 87.6314697265625, "learning_rate": 1e-06, "loss": 0.0156, "step": 1367 }, { "clip_ratio/high_max": 0.002606844100228045, "clip_ratio/high_mean": 0.0011581685084820492, "clip_ratio/low_mean": 0.0013427623871393735, "clip_ratio/low_min": 2.098417462548241e-05, "clip_ratio/region_mean": 0.002500930888345465, "epoch": 0.127679680800798, "grad_norm": 1070412.625, "learning_rate": 1e-06, "loss": 152.3054, "step": 1368 }, { "clip_ratio/high_max": 0.00316045150248101, "clip_ratio/high_mean": 0.0012049879032929312, "clip_ratio/low_mean": 0.0017226222225872334, "clip_ratio/low_min": 9.289862737205112e-05, "clip_ratio/region_mean": 0.002927610090409871, "epoch": 0.1277730139007986, "grad_norm": 0.2591453492641449, "learning_rate": 1e-06, "loss": -0.0184, "step": 1369 }, { "clip_ratio/high_max": 0.002523561670386698, "clip_ratio/high_mean": 0.0010778622345242184, "clip_ratio/low_mean": 0.001299512183322804, "clip_ratio/low_min": 0.00013137760470272042, "clip_ratio/region_mean": 0.0023773744542268105, "epoch": 0.12786634700079916, "grad_norm": 0.27729931473731995, "learning_rate": 1e-06, "loss": 0.0119, "step": 1370 }, { "clip_ratio/high_max": 0.0026265912674716674, "clip_ratio/high_mean": 0.0012920482076879125, "clip_ratio/low_mean": 0.001608399448741693, "clip_ratio/low_min": 0.00014944261329219444, "clip_ratio/region_mean": 0.00290044770372333, "epoch": 0.12795968010079975, "grad_norm": 1.5196881294250488, "learning_rate": 1e-06, "loss": 0.0059, "step": 1371 }, { "clip_ratio/high_max": 0.0029461203594109975, "clip_ratio/high_mean": 0.0011058318050345406, "clip_ratio/low_mean": 0.0012287265199120156, "clip_ratio/low_min": 3.429375647101551e-05, "clip_ratio/region_mean": 0.0023345582667388953, "epoch": 0.12805301320080034, "grad_norm": 0.14407366514205933, "learning_rate": 1e-06, "loss": -0.0008, "step": 1372 }, { "clip_ratio/high_max": 0.0024220183040597476, "clip_ratio/high_mean": 0.0010868157805816736, "clip_ratio/low_mean": 0.001566384562465828, "clip_ratio/low_min": 0.0001373180148220854, "clip_ratio/region_mean": 0.002653200368513353, "epoch": 0.1281463463008009, "grad_norm": 139069632.0, "learning_rate": 1e-06, "loss": 95620.7578, "step": 1373 }, { "clip_ratio/high_max": 0.0026553634816082194, "clip_ratio/high_mean": 0.0009497580504103098, "clip_ratio/low_mean": 0.0014684297348139808, "clip_ratio/low_min": 2.9056252969894558e-05, "clip_ratio/region_mean": 0.002418187810690142, "epoch": 0.1282396794008015, "grad_norm": 70330.828125, "learning_rate": 1e-06, "loss": 36.8409, "step": 1374 }, { "clip_ratio/high_max": 0.0026666441044653766, "clip_ratio/high_mean": 0.001082396433048416, "clip_ratio/low_mean": 0.0014729111499036662, "clip_ratio/low_min": 8.50972137413919e-05, "clip_ratio/region_mean": 0.0025553076120559126, "epoch": 0.1283330125008021, "grad_norm": 3025.2099609375, "learning_rate": 1e-06, "loss": 2.3512, "step": 1375 }, { "clip_ratio/high_max": 0.0024378779708058573, "clip_ratio/high_mean": 0.0009895453240460483, "clip_ratio/low_mean": 0.0015612897150276694, "clip_ratio/low_min": 0.000215641972317826, "clip_ratio/region_mean": 0.002550835022702813, "epoch": 0.12842634560080265, "grad_norm": 22.265060424804688, "learning_rate": 1e-06, "loss": 0.0333, "step": 1376 }, { "clip_ratio/high_max": 0.002777752553811297, "clip_ratio/high_mean": 0.0011693892483890522, "clip_ratio/low_mean": 0.0014943634250812465, "clip_ratio/low_min": 0.00020059328380739316, "clip_ratio/region_mean": 0.002663752718945034, "epoch": 0.12851967870080325, "grad_norm": 11.997663497924805, "learning_rate": 1e-06, "loss": 0.0517, "step": 1377 }, { "clip_ratio/high_max": 0.0027311431476846337, "clip_ratio/high_mean": 0.0010842281244549667, "clip_ratio/low_mean": 0.0015534753147221636, "clip_ratio/low_min": 0.0002039544087892864, "clip_ratio/region_mean": 0.002637703437358141, "epoch": 0.12861301180080384, "grad_norm": 187.4585723876953, "learning_rate": 1e-06, "loss": 0.0918, "step": 1378 }, { "clip_ratio/high_max": 0.002413547452306375, "clip_ratio/high_mean": 0.001022085982185672, "clip_ratio/low_mean": 0.0014121076092123985, "clip_ratio/low_min": 0.00010598198969091754, "clip_ratio/region_mean": 0.0024341936223208904, "epoch": 0.1287063449008044, "grad_norm": 8386586112.0, "learning_rate": 1e-06, "loss": 1437457.5, "step": 1379 }, { "clip_ratio/high_max": 0.002541276342526544, "clip_ratio/high_mean": 0.0009673101103544468, "clip_ratio/low_mean": 0.00158500838369946, "clip_ratio/low_min": 0.00016196291835512966, "clip_ratio/region_mean": 0.002552318495872896, "epoch": 0.128799678000805, "grad_norm": 606.2467041015625, "learning_rate": 1e-06, "loss": 2.1694, "step": 1380 }, { "clip_ratio/high_max": 0.0026504655979806557, "clip_ratio/high_mean": 0.0011263451779086608, "clip_ratio/low_mean": 0.001498097237345064, "clip_ratio/low_min": 5.3306962399801705e-05, "clip_ratio/region_mean": 0.0026244423715979792, "epoch": 0.1288930111008056, "grad_norm": 0.2823176980018616, "learning_rate": 1e-06, "loss": 0.0404, "step": 1381 }, { "clip_ratio/high_max": 0.0026424962052260526, "clip_ratio/high_mean": 0.0010761064331745729, "clip_ratio/low_mean": 0.0016086902796814684, "clip_ratio/low_min": 0.0001325031516898889, "clip_ratio/region_mean": 0.00268479671649402, "epoch": 0.12898634420080615, "grad_norm": 1.0655531883239746, "learning_rate": 1e-06, "loss": 0.0143, "step": 1382 }, { "clip_ratio/high_max": 0.0031903236158541404, "clip_ratio/high_mean": 0.0012527883736765943, "clip_ratio/low_mean": 0.0016746588298701681, "clip_ratio/low_min": 0.00013047294760326622, "clip_ratio/region_mean": 0.002927447247202508, "epoch": 0.12907967730080674, "grad_norm": 63.9605598449707, "learning_rate": 1e-06, "loss": -0.0016, "step": 1383 }, { "clip_ratio/high_max": 0.0027157221556990407, "clip_ratio/high_mean": 0.0010691424686228856, "clip_ratio/low_mean": 0.0014518251446133945, "clip_ratio/low_min": 3.35815666403505e-05, "clip_ratio/region_mean": 0.002520967638702132, "epoch": 0.12917301040080734, "grad_norm": 0.1845264732837677, "learning_rate": 1e-06, "loss": 0.0011, "step": 1384 }, { "clip_ratio/high_max": 0.0026524175045778975, "clip_ratio/high_mean": 0.001186581972433487, "clip_ratio/low_mean": 0.0014880025264574215, "clip_ratio/low_min": 0.00012023159797536209, "clip_ratio/region_mean": 0.0026745844807010144, "epoch": 0.1292663435008079, "grad_norm": 31.096954345703125, "learning_rate": 1e-06, "loss": 0.0244, "step": 1385 }, { "clip_ratio/high_max": 0.0027504787212819792, "clip_ratio/high_mean": 0.0010514798777876422, "clip_ratio/low_mean": 0.0018460640567354858, "clip_ratio/low_min": 0.0001360344631393673, "clip_ratio/region_mean": 0.002897543956351001, "epoch": 0.1293596766008085, "grad_norm": 1.6837533712387085, "learning_rate": 1e-06, "loss": 0.0118, "step": 1386 }, { "clip_ratio/high_max": 0.002441510689095594, "clip_ratio/high_mean": 0.0011561646861082409, "clip_ratio/low_mean": 0.0017586114117875695, "clip_ratio/low_min": 9.969240636564791e-05, "clip_ratio/region_mean": 0.002914776101533789, "epoch": 0.12945300970080909, "grad_norm": 3.9044623374938965, "learning_rate": 1e-06, "loss": 0.0182, "step": 1387 }, { "clip_ratio/high_max": 0.0031801716177142225, "clip_ratio/high_mean": 0.0012834373774239793, "clip_ratio/low_mean": 0.0015878853773756418, "clip_ratio/low_min": 0.00019396785864955746, "clip_ratio/region_mean": 0.002871322722057812, "epoch": 0.12954634280080968, "grad_norm": 0.21645905077457428, "learning_rate": 1e-06, "loss": 0.0061, "step": 1388 }, { "clip_ratio/high_max": 0.0024777334183454514, "clip_ratio/high_mean": 0.0009807090937101748, "clip_ratio/low_mean": 0.0016138226201292127, "clip_ratio/low_min": 0.0001415236147295218, "clip_ratio/region_mean": 0.0025945317174773663, "epoch": 0.12963967590081024, "grad_norm": 0.13816550374031067, "learning_rate": 1e-06, "loss": 0.0012, "step": 1389 }, { "clip_ratio/high_max": 0.002441591554088518, "clip_ratio/high_mean": 0.0009266625202144496, "clip_ratio/low_mean": 0.001571533088281285, "clip_ratio/low_min": 0.00013892288006900344, "clip_ratio/region_mean": 0.00249819562304765, "epoch": 0.12973300900081083, "grad_norm": 1.4158962965011597, "learning_rate": 1e-06, "loss": 0.0545, "step": 1390 }, { "clip_ratio/high_max": 0.002397955147898756, "clip_ratio/high_mean": 0.0011183817532582907, "clip_ratio/low_mean": 0.001571245211380301, "clip_ratio/low_min": 8.463525773549918e-05, "clip_ratio/region_mean": 0.0026896269628196023, "epoch": 0.12982634210081143, "grad_norm": 91517.6328125, "learning_rate": 1e-06, "loss": 15.2876, "step": 1391 }, { "clip_ratio/high_max": 0.002460081028402783, "clip_ratio/high_mean": 0.0010267010075040162, "clip_ratio/low_mean": 0.001574430429172935, "clip_ratio/low_min": 8.699306454218458e-05, "clip_ratio/region_mean": 0.002601131403935142, "epoch": 0.129919675200812, "grad_norm": 21.62727165222168, "learning_rate": 1e-06, "loss": 0.1577, "step": 1392 }, { "clip_ratio/high_max": 0.003137223502562847, "clip_ratio/high_mean": 0.0013424749340629205, "clip_ratio/low_mean": 0.0013437964735203423, "clip_ratio/low_min": 8.148366759996861e-05, "clip_ratio/region_mean": 0.0026862713712034747, "epoch": 0.13001300830081258, "grad_norm": 0.16642116010189056, "learning_rate": 1e-06, "loss": -0.0672, "step": 1393 }, { "clip_ratio/high_max": 0.002282716552144848, "clip_ratio/high_mean": 0.000946288801060291, "clip_ratio/low_mean": 0.0015198332057480002, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002466122015903238, "epoch": 0.13010634140081317, "grad_norm": 196.65061950683594, "learning_rate": 1e-06, "loss": 0.1428, "step": 1394 }, { "clip_ratio/high_max": 0.002296278595167678, "clip_ratio/high_mean": 0.0010135039028682513, "clip_ratio/low_mean": 0.0017669905537331942, "clip_ratio/low_min": 0.00018960191118821967, "clip_ratio/region_mean": 0.0027804944620584138, "epoch": 0.13019967450081374, "grad_norm": 194.7013397216797, "learning_rate": 1e-06, "loss": 5.4421, "step": 1395 }, { "clip_ratio/high_max": 0.002409439483017195, "clip_ratio/high_mean": 0.0009610891229385743, "clip_ratio/low_mean": 0.0016240528420894407, "clip_ratio/low_min": 7.231796007545199e-05, "clip_ratio/region_mean": 0.002585142028692644, "epoch": 0.13029300760081433, "grad_norm": 2.2608273029327393, "learning_rate": 1e-06, "loss": 0.041, "step": 1396 }, { "clip_ratio/high_max": 0.0026952748667099513, "clip_ratio/high_mean": 0.0011247053880651947, "clip_ratio/low_mean": 0.001473238691687584, "clip_ratio/low_min": 0.00015692210581619292, "clip_ratio/region_mean": 0.0025979440397350118, "epoch": 0.13038634070081492, "grad_norm": 0.16640086472034454, "learning_rate": 1e-06, "loss": 0.0156, "step": 1397 }, { "clip_ratio/high_max": 0.003061710885958746, "clip_ratio/high_mean": 0.0012513508154370356, "clip_ratio/low_mean": 0.0013228721436462365, "clip_ratio/low_min": 7.185409594967496e-05, "clip_ratio/region_mean": 0.002574222940893378, "epoch": 0.1304796738008155, "grad_norm": 0.11927152425050735, "learning_rate": 1e-06, "loss": -0.0245, "step": 1398 }, { "clip_ratio/high_max": 0.0026412271326989867, "clip_ratio/high_mean": 0.001060487840732094, "clip_ratio/low_mean": 0.001678478638496017, "clip_ratio/low_min": 0.00016108862564578885, "clip_ratio/region_mean": 0.0027389664537622593, "epoch": 0.13057300690081608, "grad_norm": 0.13923116028308868, "learning_rate": 1e-06, "loss": 0.013, "step": 1399 }, { "clip_ratio/high_max": 0.002258217915368732, "clip_ratio/high_mean": 0.0009664555927884066, "clip_ratio/low_mean": 0.0017232697264262242, "clip_ratio/low_min": 0.00017557459250383545, "clip_ratio/region_mean": 0.002689725282834843, "epoch": 0.13066634000081667, "grad_norm": 0.13826005160808563, "learning_rate": 1e-06, "loss": 0.0461, "step": 1400 }, { "clip_ratio/high_max": 0.002481078823620919, "clip_ratio/high_mean": 0.0010340552380512236, "clip_ratio/low_mean": 0.0016081076209957246, "clip_ratio/low_min": 0.00019811754646070767, "clip_ratio/region_mean": 0.002642162929987535, "epoch": 0.13075967310081724, "grad_norm": 241.93060302734375, "learning_rate": 1e-06, "loss": 0.1939, "step": 1401 }, { "clip_ratio/high_max": 0.0024730574514251202, "clip_ratio/high_mean": 0.0010984615983034018, "clip_ratio/low_mean": 0.0016005689685698599, "clip_ratio/low_min": 0.00011741509752027923, "clip_ratio/region_mean": 0.0026990305559593253, "epoch": 0.13085300620081783, "grad_norm": 135.19520568847656, "learning_rate": 1e-06, "loss": 0.0128, "step": 1402 }, { "clip_ratio/high_max": 0.002391772115515778, "clip_ratio/high_mean": 0.0010481422850716626, "clip_ratio/low_mean": 0.0015887210793152917, "clip_ratio/low_min": 0.00016959891945589334, "clip_ratio/region_mean": 0.002636863384395838, "epoch": 0.13094633930081842, "grad_norm": 9.176450729370117, "learning_rate": 1e-06, "loss": 0.0053, "step": 1403 }, { "clip_ratio/high_max": 0.0026769865544338245, "clip_ratio/high_mean": 0.0011089461295341607, "clip_ratio/low_mean": 0.0015980123007466318, "clip_ratio/low_min": 0.00013346876494324533, "clip_ratio/region_mean": 0.0027069584393757395, "epoch": 0.13103967240081899, "grad_norm": 41.052024841308594, "learning_rate": 1e-06, "loss": 0.0138, "step": 1404 }, { "clip_ratio/high_max": 0.002473982262017671, "clip_ratio/high_mean": 0.0010532237156439805, "clip_ratio/low_mean": 0.0015532347970292903, "clip_ratio/low_min": 0.00014090799231780693, "clip_ratio/region_mean": 0.0026064585326821543, "epoch": 0.13113300550081958, "grad_norm": 0.1330929696559906, "learning_rate": 1e-06, "loss": 0.018, "step": 1405 }, { "clip_ratio/high_max": 0.0027944278554059565, "clip_ratio/high_mean": 0.001026321348035708, "clip_ratio/low_mean": 0.0016032019921112806, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002629523391078692, "epoch": 0.13122633860082017, "grad_norm": 2934.9677734375, "learning_rate": 1e-06, "loss": 0.1599, "step": 1406 }, { "clip_ratio/high_max": 0.002365184911468532, "clip_ratio/high_mean": 0.0010893745293287793, "clip_ratio/low_mean": 0.0017818395463109482, "clip_ratio/low_min": 0.00015969385640346445, "clip_ratio/region_mean": 0.002871214077458717, "epoch": 0.13131967170082076, "grad_norm": 0.5821436047554016, "learning_rate": 1e-06, "loss": 0.0597, "step": 1407 }, { "clip_ratio/high_max": 0.0026922695251414552, "clip_ratio/high_mean": 0.001224003248353256, "clip_ratio/low_mean": 0.0013902223563491134, "clip_ratio/low_min": 1.6684463844285347e-05, "clip_ratio/region_mean": 0.0026142256247112527, "epoch": 0.13141300480082133, "grad_norm": 0.3245605528354645, "learning_rate": 1e-06, "loss": 0.0018, "step": 1408 }, { "clip_ratio/high_max": 0.0026298124212189578, "clip_ratio/high_mean": 0.001025515051878756, "clip_ratio/low_mean": 0.001735656609525904, "clip_ratio/low_min": 0.00013841494001098908, "clip_ratio/region_mean": 0.002761171585007105, "epoch": 0.13150633790082192, "grad_norm": 273.7525634765625, "learning_rate": 1e-06, "loss": 0.0941, "step": 1409 }, { "clip_ratio/high_max": 0.0024033721529121976, "clip_ratio/high_mean": 0.0009953529170161346, "clip_ratio/low_mean": 0.0017387284606229514, "clip_ratio/low_min": 0.00028151608785265125, "clip_ratio/region_mean": 0.0027340813976479694, "epoch": 0.1315996710008225, "grad_norm": 0.130961075425148, "learning_rate": 1e-06, "loss": 0.0631, "step": 1410 }, { "clip_ratio/high_max": 0.00272478587430669, "clip_ratio/high_mean": 0.0011675254609144758, "clip_ratio/low_mean": 0.0015568342460028362, "clip_ratio/low_min": 7.283431114046834e-05, "clip_ratio/region_mean": 0.0027243596559856087, "epoch": 0.13169300410082307, "grad_norm": 0.40550360083580017, "learning_rate": 1e-06, "loss": 0.0042, "step": 1411 }, { "clip_ratio/high_max": 0.002548497348470846, "clip_ratio/high_mean": 0.0011468323955341475, "clip_ratio/low_mean": 0.001566671908221906, "clip_ratio/low_min": 5.344206510926597e-05, "clip_ratio/region_mean": 0.0027135042619192973, "epoch": 0.13178633720082367, "grad_norm": 0.12724648416042328, "learning_rate": 1e-06, "loss": -0.0104, "step": 1412 }, { "clip_ratio/high_max": 0.002627470181323588, "clip_ratio/high_mean": 0.0011432980918471003, "clip_ratio/low_mean": 0.0016754151874920353, "clip_ratio/low_min": 0.0001339665086561581, "clip_ratio/region_mean": 0.002818713299348019, "epoch": 0.13187967030082426, "grad_norm": 0.7243697643280029, "learning_rate": 1e-06, "loss": -0.0088, "step": 1413 }, { "clip_ratio/high_max": 0.002435280752251856, "clip_ratio/high_mean": 0.0009853520496108104, "clip_ratio/low_mean": 0.0017027171343215741, "clip_ratio/low_min": 5.3409672545967624e-05, "clip_ratio/region_mean": 0.002688069245778024, "epoch": 0.13197300340082482, "grad_norm": 188.37423706054688, "learning_rate": 1e-06, "loss": 0.0672, "step": 1414 }, { "clip_ratio/high_max": 0.002681160916836234, "clip_ratio/high_mean": 0.0009766693292476702, "clip_ratio/low_mean": 0.0016269573861791287, "clip_ratio/low_min": 9.762062836671248e-05, "clip_ratio/region_mean": 0.002603626729978714, "epoch": 0.13206633650082542, "grad_norm": 0.12333964556455612, "learning_rate": 1e-06, "loss": 0.0334, "step": 1415 }, { "clip_ratio/high_max": 0.0024466469330945984, "clip_ratio/high_mean": 0.0010013362916652113, "clip_ratio/low_mean": 0.0017507261072751135, "clip_ratio/low_min": 0.00011843363972730003, "clip_ratio/region_mean": 0.0027520624425960705, "epoch": 0.132159669600826, "grad_norm": 0.20634008944034576, "learning_rate": 1e-06, "loss": 0.0051, "step": 1416 }, { "clip_ratio/high_max": 0.0029374447258305736, "clip_ratio/high_mean": 0.0011167653356096707, "clip_ratio/low_mean": 0.0018540215751272626, "clip_ratio/low_min": 0.0002036642617895268, "clip_ratio/region_mean": 0.00297078685980523, "epoch": 0.13225300270082657, "grad_norm": 13.281930923461914, "learning_rate": 1e-06, "loss": 0.0289, "step": 1417 }, { "clip_ratio/high_max": 0.0028347576735541224, "clip_ratio/high_mean": 0.0012266089506738354, "clip_ratio/low_mean": 0.0016293151456920896, "clip_ratio/low_min": 7.572861431981437e-05, "clip_ratio/region_mean": 0.0028559242127812468, "epoch": 0.13234633580082716, "grad_norm": 258.9142150878906, "learning_rate": 1e-06, "loss": 30.6861, "step": 1418 }, { "clip_ratio/high_max": 0.002383655904850457, "clip_ratio/high_mean": 0.0008805097913864302, "clip_ratio/low_mean": 0.001709411561023444, "clip_ratio/low_min": 0.00018154647023038706, "clip_ratio/region_mean": 0.0025899213724187575, "epoch": 0.13243966890082776, "grad_norm": 430.251708984375, "learning_rate": 1e-06, "loss": 1241.2106, "step": 1419 }, { "clip_ratio/high_max": 0.002969776847749017, "clip_ratio/high_mean": 0.001251856370799942, "clip_ratio/low_mean": 0.001433284724043915, "clip_ratio/low_min": 9.145713556790724e-05, "clip_ratio/region_mean": 0.002685141036636196, "epoch": 0.13253300200082832, "grad_norm": 8.915518760681152, "learning_rate": 1e-06, "loss": -0.0118, "step": 1420 }, { "clip_ratio/high_max": 0.0028583743842318654, "clip_ratio/high_mean": 0.0011785467941081151, "clip_ratio/low_mean": 0.0017261717221117578, "clip_ratio/low_min": 0.00017226733416464413, "clip_ratio/region_mean": 0.002904718538047746, "epoch": 0.1326263351008289, "grad_norm": 0.11725408583879471, "learning_rate": 1e-06, "loss": 0.0372, "step": 1421 }, { "clip_ratio/high_max": 0.002540175693866331, "clip_ratio/high_mean": 0.001076865206414368, "clip_ratio/low_mean": 0.0015768845187267289, "clip_ratio/low_min": 0.00019682511810970027, "clip_ratio/region_mean": 0.002653749739693012, "epoch": 0.1327196682008295, "grad_norm": 0.12659770250320435, "learning_rate": 1e-06, "loss": -0.0007, "step": 1422 }, { "clip_ratio/high_max": 0.002712263914872892, "clip_ratio/high_mean": 0.0010842850024346262, "clip_ratio/low_mean": 0.0018126404065696988, "clip_ratio/low_min": 0.00020109376782784238, "clip_ratio/region_mean": 0.002896925463574007, "epoch": 0.13281300130083007, "grad_norm": 1034.2421875, "learning_rate": 1e-06, "loss": 1.2496, "step": 1423 }, { "clip_ratio/high_max": 0.0024864735460141674, "clip_ratio/high_mean": 0.0010395293647889048, "clip_ratio/low_mean": 0.0016647437805659138, "clip_ratio/low_min": 0.0002000084441533545, "clip_ratio/region_mean": 0.002704273138078861, "epoch": 0.13290633440083066, "grad_norm": 0.10571229457855225, "learning_rate": 1e-06, "loss": 0.0154, "step": 1424 }, { "clip_ratio/high_max": 0.0022296260176517535, "clip_ratio/high_mean": 0.00095350923766091, "clip_ratio/low_mean": 0.001687831652816385, "clip_ratio/low_min": 0.00012180322664789855, "clip_ratio/region_mean": 0.002641340885020327, "epoch": 0.13299966750083125, "grad_norm": 187.03111267089844, "learning_rate": 1e-06, "loss": 0.0672, "step": 1425 }, { "clip_ratio/high_max": 0.0029540174291469157, "clip_ratio/high_mean": 0.0011986405188508797, "clip_ratio/low_mean": 0.0019007343289558776, "clip_ratio/low_min": 0.00029376470592978876, "clip_ratio/region_mean": 0.0030993748805485666, "epoch": 0.13309300060083182, "grad_norm": 172669.859375, "learning_rate": 1e-06, "loss": 62.844, "step": 1426 }, { "clip_ratio/high_max": 0.0025557678382028826, "clip_ratio/high_mean": 0.0011500860382511746, "clip_ratio/low_mean": 0.0017583253975317348, "clip_ratio/low_min": 0.00010957470021821791, "clip_ratio/region_mean": 0.00290841146488674, "epoch": 0.1331863337008324, "grad_norm": 0.1349499225616455, "learning_rate": 1e-06, "loss": 0.0208, "step": 1427 }, { "clip_ratio/high_max": 0.002436662296531722, "clip_ratio/high_mean": 0.0010106390072905924, "clip_ratio/low_mean": 0.0019087325272266753, "clip_ratio/low_min": 0.00024005529121495783, "clip_ratio/region_mean": 0.002919371450843755, "epoch": 0.133279666800833, "grad_norm": 5.4312357902526855, "learning_rate": 1e-06, "loss": 0.0135, "step": 1428 }, { "clip_ratio/high_max": 0.0026954376662615687, "clip_ratio/high_mean": 0.0010204571335634682, "clip_ratio/low_mean": 0.0016871898478711955, "clip_ratio/low_min": 0.0001616558647583588, "clip_ratio/region_mean": 0.002707646941416897, "epoch": 0.1333729999008336, "grad_norm": 0.21249426901340485, "learning_rate": 1e-06, "loss": 0.0177, "step": 1429 }, { "clip_ratio/high_max": 0.0027993397307000123, "clip_ratio/high_mean": 0.0010981743853335502, "clip_ratio/low_mean": 0.0018174365832237527, "clip_ratio/low_min": 0.00013034590028837556, "clip_ratio/region_mean": 0.0029156109812902287, "epoch": 0.13346633300083416, "grad_norm": 0.8861382603645325, "learning_rate": 1e-06, "loss": 0.0418, "step": 1430 }, { "clip_ratio/high_max": 0.0024428457763860933, "clip_ratio/high_mean": 0.0009711748080007965, "clip_ratio/low_mean": 0.001741700998536544, "clip_ratio/low_min": 9.761672117747366e-05, "clip_ratio/region_mean": 0.0027128758156322874, "epoch": 0.13355966610083475, "grad_norm": 1.3219248056411743, "learning_rate": 1e-06, "loss": 0.0244, "step": 1431 }, { "clip_ratio/high_max": 0.0019408474472584203, "clip_ratio/high_mean": 0.0008184157995856367, "clip_ratio/low_mean": 0.0018007251710514538, "clip_ratio/low_min": 0.00020190087616356323, "clip_ratio/region_mean": 0.002619140919705387, "epoch": 0.13365299920083534, "grad_norm": 0.1533147692680359, "learning_rate": 1e-06, "loss": 0.0706, "step": 1432 }, { "clip_ratio/high_max": 0.002205795761256013, "clip_ratio/high_mean": 0.0010021921298175585, "clip_ratio/low_mean": 0.0017404344835085794, "clip_ratio/low_min": 0.00016646071890136227, "clip_ratio/region_mean": 0.00274262666789582, "epoch": 0.1337463323008359, "grad_norm": 39.03219223022461, "learning_rate": 1e-06, "loss": 0.306, "step": 1433 }, { "clip_ratio/high_max": 0.0025487060920568183, "clip_ratio/high_mean": 0.0009893514761643019, "clip_ratio/low_mean": 0.0020536193151201587, "clip_ratio/low_min": 0.00024965910233731847, "clip_ratio/region_mean": 0.003042970798560418, "epoch": 0.1338396654008365, "grad_norm": 1.1089484691619873, "learning_rate": 1e-06, "loss": 0.0681, "step": 1434 }, { "clip_ratio/high_max": 0.0021889104828005657, "clip_ratio/high_mean": 0.0008505442274326924, "clip_ratio/low_mean": 0.001904676879348699, "clip_ratio/low_min": 0.00010199920689046849, "clip_ratio/region_mean": 0.0027552211395232007, "epoch": 0.1339329985008371, "grad_norm": 1933.003173828125, "learning_rate": 1e-06, "loss": 0.6202, "step": 1435 }, { "clip_ratio/high_max": 0.002621724648633972, "clip_ratio/high_mean": 0.0011468651209725067, "clip_ratio/low_mean": 0.0014508028034470044, "clip_ratio/low_min": 5.736778439313639e-05, "clip_ratio/region_mean": 0.002597667851659935, "epoch": 0.13402633160083766, "grad_norm": 6259031040.0, "learning_rate": 1e-06, "loss": 2310289.0, "step": 1436 }, { "clip_ratio/high_max": 0.002287137584062293, "clip_ratio/high_mean": 0.0010277461678924737, "clip_ratio/low_mean": 0.0021229051126283593, "clip_ratio/low_min": 0.0002761772830126574, "clip_ratio/region_mean": 0.0031506513405474834, "epoch": 0.13411966470083825, "grad_norm": 0.13045425713062286, "learning_rate": 1e-06, "loss": 0.0571, "step": 1437 }, { "clip_ratio/high_max": 0.002381912992859725, "clip_ratio/high_mean": 0.000982425701295142, "clip_ratio/low_mean": 0.0017085306899389252, "clip_ratio/low_min": 0.00011801501659647329, "clip_ratio/region_mean": 0.002690956367587205, "epoch": 0.13421299780083884, "grad_norm": 0.2502935230731964, "learning_rate": 1e-06, "loss": 0.0496, "step": 1438 }, { "clip_ratio/high_max": 0.002949854140751995, "clip_ratio/high_mean": 0.0011462210241006687, "clip_ratio/low_mean": 0.001587361217389116, "clip_ratio/low_min": 7.671535968256649e-06, "clip_ratio/region_mean": 0.0027335822232998908, "epoch": 0.1343063309008394, "grad_norm": 0.7111345529556274, "learning_rate": 1e-06, "loss": -0.0266, "step": 1439 }, { "clip_ratio/high_max": 0.002485919336322695, "clip_ratio/high_mean": 0.00097697609635361, "clip_ratio/low_mean": 0.0017890233757498208, "clip_ratio/low_min": 0.00010785142330860253, "clip_ratio/region_mean": 0.0027659994520945475, "epoch": 0.13439966400084, "grad_norm": 6.836536884307861, "learning_rate": 1e-06, "loss": 0.0736, "step": 1440 }, { "clip_ratio/high_max": 0.0023693528601143043, "clip_ratio/high_mean": 0.0011060235829063458, "clip_ratio/low_mean": 0.0012804510733985808, "clip_ratio/low_min": 0.00015452839943463914, "clip_ratio/region_mean": 0.002386474618106149, "completions/clipped_ratio": 0.012965611049107095, "completions/max_length": 4096.0, "completions/max_terminated_length": 4095.0, "completions/mean_length": 607.1629028320312, "completions/mean_terminated_length": 561.3338012695312, "completions/min_length": 3.0, "completions/min_terminated_length": 3.0, "epoch": 0.1344929971008406, "grad_norm": 541003743232.0, "learning_rate": 1e-06, "loss": 1099726443249664.0, "num_tokens": 1055091901.0, "reward": 0.57861328125, "reward_std": 0.18524903059005737, "rewards/simpleverify_reward/mean": 0.57861328125, "rewards/simpleverify_reward/std": 0.49378326535224915, "step": 1441 }, { "clip_ratio/high_max": 0.002696283219847828, "clip_ratio/high_mean": 0.0011504272733873222, "clip_ratio/low_mean": 0.0012075958911736961, "clip_ratio/low_min": 0.00012501117134888773, "clip_ratio/region_mean": 0.002358023150009103, "epoch": 0.13458633020084115, "grad_norm": 6609274667008.0, "learning_rate": 1e-06, "loss": 6838346240.0, "step": 1442 }, { "clip_ratio/high_max": 0.003130032360786572, "clip_ratio/high_mean": 0.0013046305994066643, "clip_ratio/low_mean": 0.000954624874793808, "clip_ratio/low_min": 6.335805119306315e-05, "clip_ratio/region_mean": 0.002259255496028345, "epoch": 0.13467966330084175, "grad_norm": 12655656.0, "learning_rate": 1e-06, "loss": 12854152396800.0, "step": 1443 }, { "clip_ratio/high_max": 0.0033774690455175005, "clip_ratio/high_mean": 0.0013816131759085692, "clip_ratio/low_mean": 0.001182761898235185, "clip_ratio/low_min": 7.217589336505625e-05, "clip_ratio/region_mean": 0.0025643750486779027, "epoch": 0.13477299640084234, "grad_norm": 200055415898112.0, "learning_rate": 1e-06, "loss": 12370237440.0, "step": 1444 }, { "clip_ratio/high_max": 0.00291202831431292, "clip_ratio/high_mean": 0.0011468935408629477, "clip_ratio/low_mean": 0.0011852568186441204, "clip_ratio/low_min": 6.471354026871268e-05, "clip_ratio/region_mean": 0.0023321503758779727, "epoch": 0.1348663295008429, "grad_norm": 23144734720.0, "learning_rate": 1e-06, "loss": 376667893661696.0, "step": 1445 }, { "clip_ratio/high_max": 0.003294552559964359, "clip_ratio/high_mean": 0.0013599780722870491, "clip_ratio/low_mean": 0.0011681275645969436, "clip_ratio/low_min": 0.00012788169988198206, "clip_ratio/region_mean": 0.00252810561505612, "epoch": 0.1349596626008435, "grad_norm": 3792513024.0, "learning_rate": 1e-06, "loss": 31824876.0, "step": 1446 }, { "clip_ratio/high_max": 0.002593165307189338, "clip_ratio/high_mean": 0.0011639173353614751, "clip_ratio/low_mean": 0.0013022872735746205, "clip_ratio/low_min": 2.303722794749774e-05, "clip_ratio/region_mean": 0.002466204605298117, "epoch": 0.1350529957008441, "grad_norm": 79509692874752.0, "learning_rate": 1e-06, "loss": 1581861044224.0, "step": 1447 }, { "clip_ratio/high_max": 0.0027249893319094554, "clip_ratio/high_mean": 0.0011779220258176792, "clip_ratio/low_mean": 0.0012073728757968638, "clip_ratio/low_min": 0.00012239763236721046, "clip_ratio/region_mean": 0.002385294937994331, "epoch": 0.13514632880084468, "grad_norm": 1784650268672.0, "learning_rate": 1e-06, "loss": 198344304.0, "step": 1448 }, { "clip_ratio/high_max": 0.0027382071421016008, "clip_ratio/high_mean": 0.0011431629318394698, "clip_ratio/low_mean": 0.0012400315317790955, "clip_ratio/low_min": 0.000131476705064415, "clip_ratio/region_mean": 0.002383194470894523, "epoch": 0.13523966190084524, "grad_norm": 1246192599040.0, "learning_rate": 1e-06, "loss": 530943756468224.0, "step": 1449 }, { "clip_ratio/high_max": 0.002934131698566489, "clip_ratio/high_mean": 0.0010814723063958809, "clip_ratio/low_mean": 0.001010956906611682, "clip_ratio/low_min": 4.7995610657380894e-05, "clip_ratio/region_mean": 0.0020924291893607005, "epoch": 0.13533299500084583, "grad_norm": 1249203453952.0, "learning_rate": 1e-06, "loss": 163251232.0, "step": 1450 }, { "clip_ratio/high_max": 0.002858911029761657, "clip_ratio/high_mean": 0.001094266652216902, "clip_ratio/low_mean": 0.0010532965115999104, "clip_ratio/low_min": 0.00013665326332557015, "clip_ratio/region_mean": 0.0021475632238434628, "epoch": 0.13542632810084643, "grad_norm": 515118.46875, "learning_rate": 1e-06, "loss": 16807.9004, "step": 1451 }, { "clip_ratio/high_max": 0.0030716607507201843, "clip_ratio/high_mean": 0.0011527667884365655, "clip_ratio/low_mean": 0.0012555806224554544, "clip_ratio/low_min": 0.00015277917191269808, "clip_ratio/region_mean": 0.0024083474272629246, "epoch": 0.135519661200847, "grad_norm": 38709356.0, "learning_rate": 1e-06, "loss": 330850048.0, "step": 1452 }, { "clip_ratio/high_max": 0.003258261211158242, "clip_ratio/high_mean": 0.0013011778755753767, "clip_ratio/low_mean": 0.00134764647737029, "clip_ratio/low_min": 8.870191777532455e-05, "clip_ratio/region_mean": 0.0026488243529456668, "epoch": 0.13561299430084758, "grad_norm": 761610560.0, "learning_rate": 1e-06, "loss": 777385.25, "step": 1453 }, { "clip_ratio/high_max": 0.0023756955488352105, "clip_ratio/high_mean": 0.000998451028863201, "clip_ratio/low_mean": 0.001275668077141745, "clip_ratio/low_min": 0.0001602807387826033, "clip_ratio/region_mean": 0.0022741190914530307, "epoch": 0.13570632740084818, "grad_norm": 294116736.0, "learning_rate": 1e-06, "loss": 286564.4375, "step": 1454 }, { "clip_ratio/high_max": 0.0037044620257802308, "clip_ratio/high_mean": 0.0013623190388898365, "clip_ratio/low_mean": 0.0010984848049702123, "clip_ratio/low_min": 8.363441338588018e-06, "clip_ratio/region_mean": 0.0024608039093436673, "epoch": 0.13579966050084874, "grad_norm": 112560.53125, "learning_rate": 1e-06, "loss": 69654.8594, "step": 1455 }, { "clip_ratio/high_max": 0.0029073726691422053, "clip_ratio/high_mean": 0.0011535467201611027, "clip_ratio/low_mean": 0.0012407429385348223, "clip_ratio/low_min": 0.000180566531525983, "clip_ratio/region_mean": 0.002394289636868052, "epoch": 0.13589299360084933, "grad_norm": 163265.15625, "learning_rate": 1e-06, "loss": 1490.7811, "step": 1456 }, { "clip_ratio/high_max": 0.0030246080714277923, "clip_ratio/high_mean": 0.001346336655842606, "clip_ratio/low_mean": 0.0012095758429495618, "clip_ratio/low_min": 2.9620852728839964e-05, "clip_ratio/region_mean": 0.0025559125206200406, "epoch": 0.13598632670084992, "grad_norm": 3139.61376953125, "learning_rate": 1e-06, "loss": 31.5692, "step": 1457 }, { "clip_ratio/high_max": 0.003034998670045752, "clip_ratio/high_mean": 0.0012340804132691119, "clip_ratio/low_mean": 0.0012775945724570192, "clip_ratio/low_min": 9.924536789185368e-05, "clip_ratio/region_mean": 0.0025116749602602795, "epoch": 0.1360796598008505, "grad_norm": 22173950.0, "learning_rate": 1e-06, "loss": 42564.3398, "step": 1458 }, { "clip_ratio/high_max": 0.0027653024590108544, "clip_ratio/high_mean": 0.0010914822978520533, "clip_ratio/low_mean": 0.001364976640616078, "clip_ratio/low_min": 5.9280187997501343e-05, "clip_ratio/region_mean": 0.002456458969390951, "epoch": 0.13617299290085108, "grad_norm": 9155649.0, "learning_rate": 1e-06, "loss": 1366.3774, "step": 1459 }, { "clip_ratio/high_max": 0.0026723660485004075, "clip_ratio/high_mean": 0.0010863188090297626, "clip_ratio/low_mean": 0.0013358630749280564, "clip_ratio/low_min": 7.267360342666507e-05, "clip_ratio/region_mean": 0.0024221818457590416, "epoch": 0.13626632600085167, "grad_norm": 16312503.0, "learning_rate": 1e-06, "loss": 61015.5625, "step": 1460 }, { "clip_ratio/high_max": 0.003069936268730089, "clip_ratio/high_mean": 0.0011654438494588248, "clip_ratio/low_mean": 0.0012870612281403737, "clip_ratio/low_min": 6.497378308267798e-05, "clip_ratio/region_mean": 0.002452505061228294, "epoch": 0.13635965910085224, "grad_norm": 3661.021484375, "learning_rate": 1e-06, "loss": 1.5682, "step": 1461 }, { "clip_ratio/high_max": 0.002802778282784857, "clip_ratio/high_mean": 0.0011618156459007878, "clip_ratio/low_mean": 0.0016058762121247128, "clip_ratio/low_min": 8.116667777358089e-05, "clip_ratio/region_mean": 0.002767691810731776, "epoch": 0.13645299220085283, "grad_norm": 0.17752307653427124, "learning_rate": 1e-06, "loss": 0.0489, "step": 1462 }, { "clip_ratio/high_max": 0.0033654941435088404, "clip_ratio/high_mean": 0.0013382411416387185, "clip_ratio/low_mean": 0.001714239999273559, "clip_ratio/low_min": 0.00022977018943493022, "clip_ratio/region_mean": 0.003052481151826214, "epoch": 0.13654632530085342, "grad_norm": 132407704.0, "learning_rate": 1e-06, "loss": 19732.2305, "step": 1463 }, { "clip_ratio/high_max": 0.0028428116638679057, "clip_ratio/high_mean": 0.0010520606592763215, "clip_ratio/low_mean": 0.0013326241896720603, "clip_ratio/low_min": 9.816868623602204e-05, "clip_ratio/region_mean": 0.002384684943535831, "epoch": 0.13663965840085399, "grad_norm": 6.5344109535217285, "learning_rate": 1e-06, "loss": 0.0431, "step": 1464 }, { "clip_ratio/high_max": 0.003020563948666677, "clip_ratio/high_mean": 0.0011408307655074168, "clip_ratio/low_mean": 0.001660366848227568, "clip_ratio/low_min": 0.00013768580720352475, "clip_ratio/region_mean": 0.002801197631924879, "epoch": 0.13673299150085458, "grad_norm": 87.53910064697266, "learning_rate": 1e-06, "loss": 0.1598, "step": 1465 }, { "clip_ratio/high_max": 0.0027107544883619994, "clip_ratio/high_mean": 0.0010623608413879992, "clip_ratio/low_mean": 0.0014829712235950865, "clip_ratio/low_min": 0.00011926272145501571, "clip_ratio/region_mean": 0.00254533214319963, "epoch": 0.13682632460085517, "grad_norm": 176.5533905029297, "learning_rate": 1e-06, "loss": 0.1242, "step": 1466 }, { "clip_ratio/high_max": 0.0026013092865468934, "clip_ratio/high_mean": 0.0011091056949226186, "clip_ratio/low_mean": 0.0014492754016828258, "clip_ratio/low_min": 0.00018529252338339575, "clip_ratio/region_mean": 0.00255838104931172, "epoch": 0.13691965770085573, "grad_norm": 21.03973388671875, "learning_rate": 1e-06, "loss": 0.5121, "step": 1467 }, { "clip_ratio/high_max": 0.0030666312304674648, "clip_ratio/high_mean": 0.0011620019522524672, "clip_ratio/low_mean": 0.0017692050387267955, "clip_ratio/low_min": 9.229573697666638e-05, "clip_ratio/region_mean": 0.0029312069818843156, "epoch": 0.13701299080085633, "grad_norm": 0.4532798230648041, "learning_rate": 1e-06, "loss": 0.0502, "step": 1468 }, { "clip_ratio/high_max": 0.0023283137707039714, "clip_ratio/high_mean": 0.0009691105151432566, "clip_ratio/low_mean": 0.0016064558149082586, "clip_ratio/low_min": 1.5052986782393418e-05, "clip_ratio/region_mean": 0.0025755663809832186, "epoch": 0.13710632390085692, "grad_norm": 77.69818115234375, "learning_rate": 1e-06, "loss": 5.7352, "step": 1469 }, { "clip_ratio/high_max": 0.003395376101252623, "clip_ratio/high_mean": 0.0013902069622417912, "clip_ratio/low_mean": 0.0014539737949235132, "clip_ratio/low_min": 8.237928341259249e-05, "clip_ratio/region_mean": 0.002844180751708336, "epoch": 0.1371996570008575, "grad_norm": 299.71783447265625, "learning_rate": 1e-06, "loss": 0.0082, "step": 1470 }, { "clip_ratio/high_max": 0.002120830729836598, "clip_ratio/high_mean": 0.0009156599826383172, "clip_ratio/low_mean": 0.00175433985714335, "clip_ratio/low_min": 0.00030856522789690644, "clip_ratio/region_mean": 0.002669999870704487, "epoch": 0.13729299010085808, "grad_norm": 2576.32421875, "learning_rate": 1e-06, "loss": 0.688, "step": 1471 }, { "clip_ratio/high_max": 0.0025532959698466584, "clip_ratio/high_mean": 0.0010217638737231027, "clip_ratio/low_mean": 0.0018515736519475468, "clip_ratio/low_min": 0.0002334883301955415, "clip_ratio/region_mean": 0.0028733375074807554, "epoch": 0.13738632320085867, "grad_norm": 1146906.625, "learning_rate": 1e-06, "loss": 126.0968, "step": 1472 }, { "clip_ratio/high_max": 0.0023415247414959595, "clip_ratio/high_mean": 0.0008965679935499793, "clip_ratio/low_mean": 0.0016249498949036933, "clip_ratio/low_min": 0.00011522577642608667, "clip_ratio/region_mean": 0.0025215178829967044, "epoch": 0.13747965630085926, "grad_norm": 56.68210983276367, "learning_rate": 1e-06, "loss": 0.1344, "step": 1473 }, { "clip_ratio/high_max": 0.0026426260446896777, "clip_ratio/high_mean": 0.0011075572583649773, "clip_ratio/low_mean": 0.001867687824415043, "clip_ratio/low_min": 0.0001511025329818949, "clip_ratio/region_mean": 0.0029752450936939567, "epoch": 0.13757298940085982, "grad_norm": 45.35641860961914, "learning_rate": 1e-06, "loss": 0.0716, "step": 1474 }, { "clip_ratio/high_max": 0.0026933189656119794, "clip_ratio/high_mean": 0.0011102631578978617, "clip_ratio/low_mean": 0.0017461525458202232, "clip_ratio/low_min": 0.00020138562740612542, "clip_ratio/region_mean": 0.0028564157328219153, "epoch": 0.13766632250086042, "grad_norm": 75.17736053466797, "learning_rate": 1e-06, "loss": 204.6845, "step": 1475 }, { "clip_ratio/high_max": 0.0025441070101805963, "clip_ratio/high_mean": 0.0011320877383695915, "clip_ratio/low_mean": 0.0020320771291153505, "clip_ratio/low_min": 0.0002113869677486946, "clip_ratio/region_mean": 0.0031641648165532388, "epoch": 0.137759655600861, "grad_norm": 0.15559247136116028, "learning_rate": 1e-06, "loss": 0.0551, "step": 1476 }, { "clip_ratio/high_max": 0.002552034806285519, "clip_ratio/high_mean": 0.0010478249614607194, "clip_ratio/low_mean": 0.0015189962578006089, "clip_ratio/low_min": 5.760727253800724e-05, "clip_ratio/region_mean": 0.002566821247455664, "epoch": 0.13785298870086157, "grad_norm": 48.580528259277344, "learning_rate": 1e-06, "loss": 0.3133, "step": 1477 }, { "clip_ratio/high_max": 0.003021911354153417, "clip_ratio/high_mean": 0.0012097829840058694, "clip_ratio/low_mean": 0.0018334678279643413, "clip_ratio/low_min": 5.069810322311241e-05, "clip_ratio/region_mean": 0.003043250799237285, "epoch": 0.13794632180086216, "grad_norm": 1.014416217803955, "learning_rate": 1e-06, "loss": -0.0159, "step": 1478 }, { "clip_ratio/high_max": 0.0027577435175771825, "clip_ratio/high_mean": 0.00114695446609403, "clip_ratio/low_mean": 0.002126230210706126, "clip_ratio/low_min": 0.0003649435839179205, "clip_ratio/region_mean": 0.003273184600402601, "epoch": 0.13803965490086276, "grad_norm": 0.6722376346588135, "learning_rate": 1e-06, "loss": 0.0475, "step": 1479 }, { "clip_ratio/high_max": 0.0025276836386183277, "clip_ratio/high_mean": 0.0010270712446072139, "clip_ratio/low_mean": 0.0018856107708415948, "clip_ratio/low_min": 0.00016682858768035658, "clip_ratio/region_mean": 0.002912681971793063, "epoch": 0.13813298800086332, "grad_norm": 138.02804565429688, "learning_rate": 1e-06, "loss": 0.0701, "step": 1480 }, { "clip_ratio/high_max": 0.0025165362094412558, "clip_ratio/high_mean": 0.0010436761367600411, "clip_ratio/low_mean": 0.0019109858185402118, "clip_ratio/low_min": 0.0002596727417767397, "clip_ratio/region_mean": 0.002954661918920465, "epoch": 0.1382263211008639, "grad_norm": 41.58405685424805, "learning_rate": 1e-06, "loss": 0.0258, "step": 1481 }, { "clip_ratio/high_max": 0.0025035100552486256, "clip_ratio/high_mean": 0.00100844184817106, "clip_ratio/low_mean": 0.0016781365739007015, "clip_ratio/low_min": 0.00018274706417287234, "clip_ratio/region_mean": 0.002686578423890751, "epoch": 0.1383196542008645, "grad_norm": 94748942336.0, "learning_rate": 1e-06, "loss": 16039725.0, "step": 1482 }, { "clip_ratio/high_max": 0.0025282146452809684, "clip_ratio/high_mean": 0.001100837082049111, "clip_ratio/low_mean": 0.001622892279556254, "clip_ratio/low_min": 0.00010250700415781466, "clip_ratio/region_mean": 0.002723729397985153, "epoch": 0.13841298730086507, "grad_norm": 47.18560791015625, "learning_rate": 1e-06, "loss": -0.0018, "step": 1483 }, { "clip_ratio/high_max": 0.0025120902428170666, "clip_ratio/high_mean": 0.0011071353046645527, "clip_ratio/low_mean": 0.001842096826294437, "clip_ratio/low_min": 0.0001426637318218127, "clip_ratio/region_mean": 0.0029492320827557705, "epoch": 0.13850632040086566, "grad_norm": 36.2614631652832, "learning_rate": 1e-06, "loss": 0.008, "step": 1484 }, { "clip_ratio/high_max": 0.0028797902341466397, "clip_ratio/high_mean": 0.0012152022718510125, "clip_ratio/low_mean": 0.0018695794133236632, "clip_ratio/low_min": 0.00014046985052118544, "clip_ratio/region_mean": 0.003084781754296273, "epoch": 0.13859965350086625, "grad_norm": 0.2104385793209076, "learning_rate": 1e-06, "loss": -0.0032, "step": 1485 }, { "clip_ratio/high_max": 0.003036441503354581, "clip_ratio/high_mean": 0.001093587703508092, "clip_ratio/low_mean": 0.0018492365852580406, "clip_ratio/low_min": 7.315428683796199e-05, "clip_ratio/region_mean": 0.0029428243069560267, "epoch": 0.13869298660086682, "grad_norm": 2864.622314453125, "learning_rate": 1e-06, "loss": 0.6687, "step": 1486 }, { "clip_ratio/high_max": 0.0025476403898210265, "clip_ratio/high_mean": 0.0010516468064452056, "clip_ratio/low_mean": 0.0019159467410645448, "clip_ratio/low_min": 0.0001725349520711461, "clip_ratio/region_mean": 0.0029675935802515596, "epoch": 0.1387863197008674, "grad_norm": 0.9033017158508301, "learning_rate": 1e-06, "loss": 0.0295, "step": 1487 }, { "clip_ratio/high_max": 0.0027203057179576717, "clip_ratio/high_mean": 0.0010762014280771837, "clip_ratio/low_mean": 0.0019635607895907015, "clip_ratio/low_min": 0.00012617603897524532, "clip_ratio/region_mean": 0.0030397622322198004, "epoch": 0.138879652800868, "grad_norm": 0.12829628586769104, "learning_rate": 1e-06, "loss": 0.036, "step": 1488 }, { "clip_ratio/high_max": 0.002672911221452523, "clip_ratio/high_mean": 0.0010790321848617168, "clip_ratio/low_mean": 0.0020393501108628698, "clip_ratio/low_min": 0.000298680504783988, "clip_ratio/region_mean": 0.0031183823448373005, "epoch": 0.13897298590086857, "grad_norm": 82108.6640625, "learning_rate": 1e-06, "loss": 98470.9062, "step": 1489 }, { "clip_ratio/high_max": 0.00289368380617816, "clip_ratio/high_mean": 0.00111874097638065, "clip_ratio/low_mean": 0.0020657636923715472, "clip_ratio/low_min": 0.00021526488671952393, "clip_ratio/region_mean": 0.0031845046614762396, "epoch": 0.13906631900086916, "grad_norm": 11798.328125, "learning_rate": 1e-06, "loss": 3.9193, "step": 1490 }, { "clip_ratio/high_max": 0.002735727437539026, "clip_ratio/high_mean": 0.0010649611458575237, "clip_ratio/low_mean": 0.0020351482853584457, "clip_ratio/low_min": 0.00011033204100385774, "clip_ratio/region_mean": 0.003100109417573549, "epoch": 0.13915965210086975, "grad_norm": 0.2727256715297699, "learning_rate": 1e-06, "loss": 0.0455, "step": 1491 }, { "clip_ratio/high_max": 0.0028316142052062787, "clip_ratio/high_mean": 0.0010857903507712763, "clip_ratio/low_mean": 0.0018259168064105324, "clip_ratio/low_min": 8.079096005531028e-05, "clip_ratio/region_mean": 0.0029117071826476604, "epoch": 0.13925298520087034, "grad_norm": 361965.40625, "learning_rate": 1e-06, "loss": 41.9279, "step": 1492 }, { "clip_ratio/high_max": 0.0035483056853991, "clip_ratio/high_mean": 0.0012848416808992624, "clip_ratio/low_mean": 0.0017576928712514928, "clip_ratio/low_min": 9.259940816264134e-05, "clip_ratio/region_mean": 0.0030425345394178294, "epoch": 0.1393463183008709, "grad_norm": 1.8188329935073853, "learning_rate": 1e-06, "loss": -0.0186, "step": 1493 }, { "clip_ratio/high_max": 0.002997007148223929, "clip_ratio/high_mean": 0.0011136674347653752, "clip_ratio/low_mean": 0.0021935204713372514, "clip_ratio/low_min": 0.0002424653039270197, "clip_ratio/region_mean": 0.003307187871541828, "epoch": 0.1394396514008715, "grad_norm": 1499.675537109375, "learning_rate": 1e-06, "loss": 0.8552, "step": 1494 }, { "clip_ratio/high_max": 0.0028543345688376576, "clip_ratio/high_mean": 0.0011214733185624937, "clip_ratio/low_mean": 0.0018787241206155159, "clip_ratio/low_min": 5.1910259571741335e-05, "clip_ratio/region_mean": 0.003000197422807105, "epoch": 0.1395329845008721, "grad_norm": 0.1897302269935608, "learning_rate": 1e-06, "loss": 0.0225, "step": 1495 }, { "clip_ratio/high_max": 0.002491327468305826, "clip_ratio/high_mean": 0.0010863590923690936, "clip_ratio/low_mean": 0.0018487736961105838, "clip_ratio/low_min": 0.00012748785957228392, "clip_ratio/region_mean": 0.0029351327975746244, "epoch": 0.13962631760087266, "grad_norm": 0.5874179601669312, "learning_rate": 1e-06, "loss": 0.0421, "step": 1496 }, { "clip_ratio/high_max": 0.002512344333808869, "clip_ratio/high_mean": 0.001070617987352307, "clip_ratio/low_mean": 0.0016460278820886742, "clip_ratio/low_min": 0.00010348728847020539, "clip_ratio/region_mean": 0.0027166458748979494, "epoch": 0.13971965070087325, "grad_norm": 0.13642385601997375, "learning_rate": 1e-06, "loss": 0.0295, "step": 1497 }, { "clip_ratio/high_max": 0.002776670349703636, "clip_ratio/high_mean": 0.0010997803365171421, "clip_ratio/low_mean": 0.001967505093489308, "clip_ratio/low_min": 0.00012074071128154173, "clip_ratio/region_mean": 0.0030672854190925136, "epoch": 0.13981298380087384, "grad_norm": 17646.11328125, "learning_rate": 1e-06, "loss": 1.2225, "step": 1498 }, { "clip_ratio/high_max": 0.002684170969587285, "clip_ratio/high_mean": 0.0010857417455554241, "clip_ratio/low_mean": 0.002158684321329929, "clip_ratio/low_min": 0.000279772822977975, "clip_ratio/region_mean": 0.003244426115998067, "epoch": 0.1399063169008744, "grad_norm": 32.705810546875, "learning_rate": 1e-06, "loss": 0.3635, "step": 1499 }, { "clip_ratio/high_max": 0.002476988476701081, "clip_ratio/high_mean": 0.0011053278230974684, "clip_ratio/low_mean": 0.0018188966823800001, "clip_ratio/low_min": 0.00012389728362904862, "clip_ratio/region_mean": 0.00292422465281561, "epoch": 0.139999650000875, "grad_norm": 634.0368041992188, "learning_rate": 1e-06, "loss": 0.3528, "step": 1500 }, { "clip_ratio/high_max": 0.002559054169978481, "clip_ratio/high_mean": 0.000955936819082126, "clip_ratio/low_mean": 0.0019511119462549686, "clip_ratio/low_min": 0.0002279407526657451, "clip_ratio/region_mean": 0.0029070487144053914, "epoch": 0.1400929831008756, "grad_norm": 1.091138482093811, "learning_rate": 1e-06, "loss": 0.0722, "step": 1501 }, { "clip_ratio/high_max": 0.002729569867369719, "clip_ratio/high_mean": 0.0010826243797055213, "clip_ratio/low_mean": 0.0019234130013501272, "clip_ratio/low_min": 0.00015380119293695316, "clip_ratio/region_mean": 0.0030060373683227226, "epoch": 0.14018631620087615, "grad_norm": 118.66854858398438, "learning_rate": 1e-06, "loss": 0.0953, "step": 1502 }, { "clip_ratio/high_max": 0.002382494873018004, "clip_ratio/high_mean": 0.0010244989389320835, "clip_ratio/low_mean": 0.002014790392422583, "clip_ratio/low_min": 0.00014803543854213785, "clip_ratio/region_mean": 0.0030392894113902003, "epoch": 0.14027964930087675, "grad_norm": 622016.375, "learning_rate": 1e-06, "loss": 2683385.0, "step": 1503 }, { "clip_ratio/high_max": 0.0025807753554545343, "clip_ratio/high_mean": 0.0011015739110007416, "clip_ratio/low_mean": 0.0019533998492988758, "clip_ratio/low_min": 0.00017696638678899035, "clip_ratio/region_mean": 0.0030549737784895115, "epoch": 0.14037298240087734, "grad_norm": 119163952.0, "learning_rate": 1e-06, "loss": 18249.5703, "step": 1504 }, { "clip_ratio/high_max": 0.002656534365087282, "clip_ratio/high_mean": 0.00112109645124292, "clip_ratio/low_mean": 0.0019305386922496837, "clip_ratio/low_min": 0.00020837357715208782, "clip_ratio/region_mean": 0.003051635176234413, "epoch": 0.1404663155008779, "grad_norm": 0.18996372818946838, "learning_rate": 1e-06, "loss": 0.033, "step": 1505 }, { "clip_ratio/high_max": 0.002911505704105366, "clip_ratio/high_mean": 0.0011872584436787292, "clip_ratio/low_mean": 0.0018686972071009222, "clip_ratio/low_min": 8.32857949717436e-05, "clip_ratio/region_mean": 0.003055955676245503, "epoch": 0.1405596486008785, "grad_norm": 0.2725635766983032, "learning_rate": 1e-06, "loss": 0.0157, "step": 1506 }, { "clip_ratio/high_max": 0.002351409784751013, "clip_ratio/high_mean": 0.0009633962326915935, "clip_ratio/low_mean": 0.0015226272043946665, "clip_ratio/low_min": 0.0001658974142628722, "clip_ratio/region_mean": 0.002486023433448281, "epoch": 0.1406529817008791, "grad_norm": 385662.90625, "learning_rate": 1e-06, "loss": 16.4364, "step": 1507 }, { "clip_ratio/high_max": 0.0024626222366350703, "clip_ratio/high_mean": 0.0009602317968528951, "clip_ratio/low_mean": 0.002021338754275348, "clip_ratio/low_min": 0.00019941251503041713, "clip_ratio/region_mean": 0.002981570578413084, "epoch": 0.14074631480087965, "grad_norm": 127.84528350830078, "learning_rate": 1e-06, "loss": 1.1158, "step": 1508 }, { "clip_ratio/high_max": 0.0022737359977327287, "clip_ratio/high_mean": 0.0009457884279981954, "clip_ratio/low_mean": 0.0019727403596334625, "clip_ratio/low_min": 0.00023420342313329456, "clip_ratio/region_mean": 0.0029185287639847957, "epoch": 0.14083964790088024, "grad_norm": 190.7408905029297, "learning_rate": 1e-06, "loss": 0.1127, "step": 1509 }, { "clip_ratio/high_max": 0.00239675917691784, "clip_ratio/high_mean": 0.0010256225468765479, "clip_ratio/low_mean": 0.001787137702194741, "clip_ratio/low_min": 0.00027842019881063607, "clip_ratio/region_mean": 0.002812760285451077, "epoch": 0.14093298100088084, "grad_norm": 60.61906051635742, "learning_rate": 1e-06, "loss": 0.098, "step": 1510 }, { "clip_ratio/high_max": 0.0025427394939470105, "clip_ratio/high_mean": 0.001114632726967102, "clip_ratio/low_mean": 0.0020051450774190016, "clip_ratio/low_min": 0.00018451150208420586, "clip_ratio/region_mean": 0.003119777829851955, "epoch": 0.14102631410088143, "grad_norm": 3.8603458404541016, "learning_rate": 1e-06, "loss": 0.0114, "step": 1511 }, { "clip_ratio/high_max": 0.0025595306578907184, "clip_ratio/high_mean": 0.0011419912334531546, "clip_ratio/low_mean": 0.0016593784603173845, "clip_ratio/low_min": 6.422706610464957e-05, "clip_ratio/region_mean": 0.0028013697592541575, "epoch": 0.141119647200882, "grad_norm": 15.372194290161133, "learning_rate": 1e-06, "loss": 0.011, "step": 1512 }, { "clip_ratio/high_max": 0.0028597326490853447, "clip_ratio/high_mean": 0.0010098287148139207, "clip_ratio/low_mean": 0.0016525302489753813, "clip_ratio/low_min": 0.00010009286415879615, "clip_ratio/region_mean": 0.0026623589801602066, "epoch": 0.14121298030088258, "grad_norm": 0.18187662959098816, "learning_rate": 1e-06, "loss": 0.034, "step": 1513 }, { "clip_ratio/high_max": 0.002360991478781216, "clip_ratio/high_mean": 0.0009326376821263693, "clip_ratio/low_mean": 0.0017642461025388911, "clip_ratio/low_min": 0.00016139954641403165, "clip_ratio/region_mean": 0.00269688375556143, "epoch": 0.14130631340088318, "grad_norm": 5957963.0, "learning_rate": 1e-06, "loss": 6755.7212, "step": 1514 }, { "clip_ratio/high_max": 0.002386312113230815, "clip_ratio/high_mean": 0.0010744222236098722, "clip_ratio/low_mean": 0.001646172608161578, "clip_ratio/low_min": 0.00011660064774332568, "clip_ratio/region_mean": 0.002720594875427196, "epoch": 0.14139964650088374, "grad_norm": 22499.126953125, "learning_rate": 1e-06, "loss": 5727.5049, "step": 1515 }, { "clip_ratio/high_max": 0.002802005074045155, "clip_ratio/high_mean": 0.001139350097218994, "clip_ratio/low_mean": 0.0015593713969792589, "clip_ratio/low_min": 5.4314161388901994e-05, "clip_ratio/region_mean": 0.002698721524211578, "epoch": 0.14149297960088433, "grad_norm": 0.6947351098060608, "learning_rate": 1e-06, "loss": -0.0062, "step": 1516 }, { "clip_ratio/high_max": 0.0026291890317224897, "clip_ratio/high_mean": 0.0010123235115315765, "clip_ratio/low_mean": 0.00137959133280674, "clip_ratio/low_min": 6.238477726583369e-05, "clip_ratio/region_mean": 0.002391914909821935, "epoch": 0.14158631270088493, "grad_norm": 0.360291987657547, "learning_rate": 1e-06, "loss": 0.0027, "step": 1517 }, { "clip_ratio/high_max": 0.003240668673242908, "clip_ratio/high_mean": 0.001235901945619844, "clip_ratio/low_mean": 0.001896585825306829, "clip_ratio/low_min": 0.0001170319264929276, "clip_ratio/region_mean": 0.0031324878218583763, "epoch": 0.1416796458008855, "grad_norm": 0.44307708740234375, "learning_rate": 1e-06, "loss": 0.0287, "step": 1518 }, { "clip_ratio/high_max": 0.002857858209608821, "clip_ratio/high_mean": 0.0011926780844078166, "clip_ratio/low_mean": 0.0016738115955376998, "clip_ratio/low_min": 0.00017300990748481127, "clip_ratio/region_mean": 0.002866489565349184, "epoch": 0.14177297890088608, "grad_norm": 2.150840997695923, "learning_rate": 1e-06, "loss": -0.0212, "step": 1519 }, { "clip_ratio/high_max": 0.0025947736285161227, "clip_ratio/high_mean": 0.0009376536290801596, "clip_ratio/low_mean": 0.001650780024647247, "clip_ratio/low_min": 0.00016852348380780313, "clip_ratio/region_mean": 0.0025884337082970887, "epoch": 0.14186631200088667, "grad_norm": 0.7292062640190125, "learning_rate": 1e-06, "loss": 0.0414, "step": 1520 }, { "clip_ratio/high_max": 0.003037308946659323, "clip_ratio/high_mean": 0.0012134050302847754, "clip_ratio/low_mean": 0.0016742802454245975, "clip_ratio/low_min": 6.721875979565084e-05, "clip_ratio/region_mean": 0.0028876853175461292, "epoch": 0.14195964510088724, "grad_norm": 35.17675018310547, "learning_rate": 1e-06, "loss": -0.0114, "step": 1521 }, { "clip_ratio/high_max": 0.0026719561356003396, "clip_ratio/high_mean": 0.00107242417652742, "clip_ratio/low_mean": 0.0015977268740243744, "clip_ratio/low_min": 5.0152462790720165e-05, "clip_ratio/region_mean": 0.002670151057827752, "epoch": 0.14205297820088783, "grad_norm": 202.90602111816406, "learning_rate": 1e-06, "loss": 1689.6633, "step": 1522 }, { "clip_ratio/high_max": 0.0027935032994719222, "clip_ratio/high_mean": 0.0012418804108165205, "clip_ratio/low_mean": 0.0015954648952174466, "clip_ratio/low_min": 0.00016618280642433092, "clip_ratio/region_mean": 0.002837345273292158, "epoch": 0.14214631130088842, "grad_norm": 0.17362655699253082, "learning_rate": 1e-06, "loss": -0.007, "step": 1523 }, { "clip_ratio/high_max": 0.002675141477084253, "clip_ratio/high_mean": 0.0011253019565629074, "clip_ratio/low_mean": 0.0018174434335378464, "clip_ratio/low_min": 0.00012321599206188694, "clip_ratio/region_mean": 0.0029427453846437857, "epoch": 0.142239644400889, "grad_norm": 1808.189208984375, "learning_rate": 1e-06, "loss": 0.1595, "step": 1524 }, { "clip_ratio/high_max": 0.002512169914552942, "clip_ratio/high_mean": 0.001044392971380148, "clip_ratio/low_mean": 0.0019251096091466025, "clip_ratio/low_min": 0.00017428356295567937, "clip_ratio/region_mean": 0.0029695025586988777, "epoch": 0.14233297750088958, "grad_norm": 10983.962890625, "learning_rate": 1e-06, "loss": 17.4482, "step": 1525 }, { "clip_ratio/high_max": 0.0029271678649820387, "clip_ratio/high_mean": 0.0011840961597044952, "clip_ratio/low_mean": 0.0016681844608683605, "clip_ratio/low_min": 8.891924426279729e-05, "clip_ratio/region_mean": 0.00285228066786658, "epoch": 0.14242631060089017, "grad_norm": 0.16691197454929352, "learning_rate": 1e-06, "loss": 0.1573, "step": 1526 }, { "clip_ratio/high_max": 0.002384984720265493, "clip_ratio/high_mean": 0.0010091786716657225, "clip_ratio/low_mean": 0.00175820903677959, "clip_ratio/low_min": 0.00011475205610622652, "clip_ratio/region_mean": 0.002767387675703503, "epoch": 0.14251964370089074, "grad_norm": 0.3756590187549591, "learning_rate": 1e-06, "loss": 0.0163, "step": 1527 }, { "clip_ratio/high_max": 0.002629722010169644, "clip_ratio/high_mean": 0.00115497025399236, "clip_ratio/low_mean": 0.001925291548104724, "clip_ratio/low_min": 0.00017708532504912, "clip_ratio/region_mean": 0.0030802617911831476, "epoch": 0.14261297680089133, "grad_norm": 20940692.0, "learning_rate": 1e-06, "loss": 3077.0569, "step": 1528 }, { "clip_ratio/high_max": 0.002441800148517359, "clip_ratio/high_mean": 0.0010241254713037051, "clip_ratio/low_mean": 0.002081389229715569, "clip_ratio/low_min": 8.49371608637739e-05, "clip_ratio/region_mean": 0.0031055146901053376, "epoch": 0.14270630990089192, "grad_norm": 1656.2562255859375, "learning_rate": 1e-06, "loss": 0.2107, "step": 1529 }, { "clip_ratio/high_max": 0.002847775351256132, "clip_ratio/high_mean": 0.0011108239523309749, "clip_ratio/low_mean": 0.0019398850126890466, "clip_ratio/low_min": 0.00010571417442406528, "clip_ratio/region_mean": 0.0030507089395541698, "epoch": 0.14279964300089248, "grad_norm": 0.16886836290359497, "learning_rate": 1e-06, "loss": 0.013, "step": 1530 }, { "clip_ratio/high_max": 0.0023026893504720647, "clip_ratio/high_mean": 0.0009090209950954886, "clip_ratio/low_mean": 0.002193897104007192, "clip_ratio/low_min": 0.00018469767746864818, "clip_ratio/region_mean": 0.0031029181118356064, "epoch": 0.14289297610089308, "grad_norm": 3523.4658203125, "learning_rate": 1e-06, "loss": 3.8969, "step": 1531 }, { "clip_ratio/high_max": 0.002778836900688475, "clip_ratio/high_mean": 0.0011307587246847106, "clip_ratio/low_mean": 0.0020236671189195476, "clip_ratio/low_min": 0.00013172544458939228, "clip_ratio/region_mean": 0.0031544257581117563, "epoch": 0.14298630920089367, "grad_norm": 1.3277515172958374, "learning_rate": 1e-06, "loss": 0.0124, "step": 1532 }, { "clip_ratio/high_max": 0.002336524943530094, "clip_ratio/high_mean": 0.0009155230145552196, "clip_ratio/low_mean": 0.0018058124478557147, "clip_ratio/low_min": 0.00012848975165979937, "clip_ratio/region_mean": 0.002721335527894553, "epoch": 0.14307964230089426, "grad_norm": 9.080251693725586, "learning_rate": 1e-06, "loss": 0.0334, "step": 1533 }, { "clip_ratio/high_max": 0.0027948216084041633, "clip_ratio/high_mean": 0.001224981027917238, "clip_ratio/low_mean": 0.001545853381685447, "clip_ratio/low_min": 0.00011692508815031033, "clip_ratio/region_mean": 0.0027708344205166213, "epoch": 0.14317297540089483, "grad_norm": 95.20043182373047, "learning_rate": 1e-06, "loss": 0.016, "step": 1534 }, { "clip_ratio/high_max": 0.0028045622020727023, "clip_ratio/high_mean": 0.0011645861159195192, "clip_ratio/low_mean": 0.0016224990540649742, "clip_ratio/low_min": 3.11798467009794e-05, "clip_ratio/region_mean": 0.0027870852063642815, "epoch": 0.14326630850089542, "grad_norm": 4437693.5, "learning_rate": 1e-06, "loss": 1205.972, "step": 1535 }, { "clip_ratio/high_max": 0.0028038497621309943, "clip_ratio/high_mean": 0.001094444582122378, "clip_ratio/low_mean": 0.00190238327922998, "clip_ratio/low_min": 0.00015456132859981153, "clip_ratio/region_mean": 0.002996827766764909, "epoch": 0.143359641600896, "grad_norm": 1.7393862009048462, "learning_rate": 1e-06, "loss": 0.027, "step": 1536 }, { "clip_ratio/high_max": 0.0029447367196553387, "clip_ratio/high_mean": 0.001133869565819623, "clip_ratio/low_mean": 0.0019141227639920544, "clip_ratio/low_min": 0.00013457245950121433, "clip_ratio/region_mean": 0.003047992278879974, "epoch": 0.14345297470089657, "grad_norm": 0.1973133385181427, "learning_rate": 1e-06, "loss": 0.0009, "step": 1537 }, { "clip_ratio/high_max": 0.0026746113144326955, "clip_ratio/high_mean": 0.0012373189092613757, "clip_ratio/low_mean": 0.0015054828218126204, "clip_ratio/low_min": 4.110761983611155e-05, "clip_ratio/region_mean": 0.0027428017565398477, "epoch": 0.14354630780089717, "grad_norm": 0.4735668897628784, "learning_rate": 1e-06, "loss": -0.0611, "step": 1538 }, { "clip_ratio/high_max": 0.0025380641891388223, "clip_ratio/high_mean": 0.0009869569439615589, "clip_ratio/low_mean": 0.002009772309975233, "clip_ratio/low_min": 0.00041927020265575266, "clip_ratio/region_mean": 0.0029967292939545587, "epoch": 0.14363964090089776, "grad_norm": 562.114501953125, "learning_rate": 1e-06, "loss": 0.2454, "step": 1539 }, { "clip_ratio/high_max": 0.002552912905230187, "clip_ratio/high_mean": 0.0009877963966573589, "clip_ratio/low_mean": 0.0017049977977876551, "clip_ratio/low_min": 0.00011808660019596573, "clip_ratio/region_mean": 0.0026927941435133107, "epoch": 0.14373297400089832, "grad_norm": 0.10326199233531952, "learning_rate": 1e-06, "loss": 0.0194, "step": 1540 }, { "clip_ratio/high_max": 0.0024103790856315754, "clip_ratio/high_mean": 0.0009840424736466957, "clip_ratio/low_mean": 0.0019094600938842632, "clip_ratio/low_min": 0.00011936086229979992, "clip_ratio/region_mean": 0.0028935025620739907, "epoch": 0.14382630710089891, "grad_norm": 6.811111927032471, "learning_rate": 1e-06, "loss": 0.0263, "step": 1541 }, { "clip_ratio/high_max": 0.0026147133539780043, "clip_ratio/high_mean": 0.0011004669686371926, "clip_ratio/low_mean": 0.0017559021871420555, "clip_ratio/low_min": 9.48631604842376e-05, "clip_ratio/region_mean": 0.0028563691157614812, "epoch": 0.1439196402008995, "grad_norm": 0.1275119185447693, "learning_rate": 1e-06, "loss": 0.0121, "step": 1542 }, { "clip_ratio/high_max": 0.00252138889482012, "clip_ratio/high_mean": 0.001115317621952272, "clip_ratio/low_mean": 0.001678161552263191, "clip_ratio/low_min": 0.00012585634158313042, "clip_ratio/region_mean": 0.002793479179672431, "epoch": 0.14401297330090007, "grad_norm": 0.13024497032165527, "learning_rate": 1e-06, "loss": 0.0143, "step": 1543 }, { "clip_ratio/high_max": 0.0024465847964165732, "clip_ratio/high_mean": 0.0011198112115380354, "clip_ratio/low_mean": 0.001520789510323084, "clip_ratio/low_min": 9.366517133457819e-05, "clip_ratio/region_mean": 0.0026406007673358545, "epoch": 0.14410630640090066, "grad_norm": 0.19698739051818848, "learning_rate": 1e-06, "loss": 0.0314, "step": 1544 }, { "clip_ratio/high_max": 0.0023658588906982914, "clip_ratio/high_mean": 0.00094227220324683, "clip_ratio/low_mean": 0.001641942286369158, "clip_ratio/low_min": 1.524762137705693e-05, "clip_ratio/region_mean": 0.0025842145332717337, "epoch": 0.14419963950090126, "grad_norm": 16.53726577758789, "learning_rate": 1e-06, "loss": 0.0208, "step": 1545 }, { "clip_ratio/high_max": 0.0026494650592212565, "clip_ratio/high_mean": 0.0009865426800388377, "clip_ratio/low_mean": 0.001996525468712207, "clip_ratio/low_min": 0.00016797171701909974, "clip_ratio/region_mean": 0.00298306810145732, "epoch": 0.14429297260090182, "grad_norm": 14.814027786254883, "learning_rate": 1e-06, "loss": 0.3019, "step": 1546 }, { "clip_ratio/high_max": 0.002630096932989545, "clip_ratio/high_mean": 0.0010100509134645108, "clip_ratio/low_mean": 0.001864409859990701, "clip_ratio/low_min": 0.00015251380318659358, "clip_ratio/region_mean": 0.0028744607770931907, "epoch": 0.1443863057009024, "grad_norm": 0.19725888967514038, "learning_rate": 1e-06, "loss": 0.0513, "step": 1547 }, { "clip_ratio/high_max": 0.0026789793046191335, "clip_ratio/high_mean": 0.001211010410770541, "clip_ratio/low_mean": 0.001779705849912716, "clip_ratio/low_min": 9.62578806138481e-05, "clip_ratio/region_mean": 0.002990716340718791, "epoch": 0.144479638800903, "grad_norm": 665696.0, "learning_rate": 1e-06, "loss": 102.9458, "step": 1548 }, { "clip_ratio/high_max": 0.002337312005693093, "clip_ratio/high_mean": 0.0010466457315487787, "clip_ratio/low_mean": 0.0017432257009204477, "clip_ratio/low_min": 0.0001165657713499968, "clip_ratio/region_mean": 0.002789871381537523, "epoch": 0.14457297190090357, "grad_norm": 727194.5625, "learning_rate": 1e-06, "loss": 162.4721, "step": 1549 }, { "clip_ratio/high_max": 0.0026424499665154144, "clip_ratio/high_mean": 0.0011280195467406884, "clip_ratio/low_mean": 0.002104257651808439, "clip_ratio/low_min": 0.00015156389599724207, "clip_ratio/region_mean": 0.0032322772894985974, "epoch": 0.14466630500090416, "grad_norm": 8.772529602050781, "learning_rate": 1e-06, "loss": 0.0055, "step": 1550 }, { "clip_ratio/high_max": 0.002590702279121615, "clip_ratio/high_mean": 0.00112200504008797, "clip_ratio/low_mean": 0.0020673071921919473, "clip_ratio/low_min": 9.938728908309713e-05, "clip_ratio/region_mean": 0.0031893123232293874, "epoch": 0.14475963810090475, "grad_norm": 8387987.5, "learning_rate": 1e-06, "loss": 3175947.25, "step": 1551 }, { "clip_ratio/high_max": 0.002854348967957776, "clip_ratio/high_mean": 0.001110080807848135, "clip_ratio/low_mean": 0.001879286934126867, "clip_ratio/low_min": 0.00013872201361664338, "clip_ratio/region_mean": 0.002989367741975002, "epoch": 0.14485297120090532, "grad_norm": 55558.0703125, "learning_rate": 1e-06, "loss": 1.366, "step": 1552 }, { "clip_ratio/high_max": 0.0023278400549315847, "clip_ratio/high_mean": 0.0009991454462578986, "clip_ratio/low_mean": 0.00191927971172845, "clip_ratio/low_min": 0.00010911193930951413, "clip_ratio/region_mean": 0.0029184251834522, "epoch": 0.1449463043009059, "grad_norm": 0.3620549440383911, "learning_rate": 1e-06, "loss": 0.0417, "step": 1553 }, { "clip_ratio/high_max": 0.0026657240669010207, "clip_ratio/high_mean": 0.0010828655922523467, "clip_ratio/low_mean": 0.001869636766059557, "clip_ratio/low_min": 0.00010762582860479597, "clip_ratio/region_mean": 0.0029525023492169566, "epoch": 0.1450396374009065, "grad_norm": 3.3240063190460205, "learning_rate": 1e-06, "loss": 0.0444, "step": 1554 }, { "clip_ratio/high_max": 0.0026116896988241933, "clip_ratio/high_mean": 0.0010168802600674098, "clip_ratio/low_mean": 0.0019212280385545455, "clip_ratio/low_min": 0.00014460312013397925, "clip_ratio/region_mean": 0.00293810831499286, "epoch": 0.1451329705009071, "grad_norm": 5.375405788421631, "learning_rate": 1e-06, "loss": 677.525, "step": 1555 }, { "clip_ratio/high_max": 0.002663207778823562, "clip_ratio/high_mean": 0.0011043696831620764, "clip_ratio/low_mean": 0.0018181700033892412, "clip_ratio/low_min": 0.00010887289499805775, "clip_ratio/region_mean": 0.002922539701103233, "epoch": 0.14522630360090766, "grad_norm": 0.30215802788734436, "learning_rate": 1e-06, "loss": 0.0102, "step": 1556 }, { "clip_ratio/high_max": 0.0022382863789971452, "clip_ratio/high_mean": 0.0009825547003856627, "clip_ratio/low_mean": 0.0019114146889478434, "clip_ratio/low_min": 4.953003008267842e-05, "clip_ratio/region_mean": 0.0028939694093423896, "epoch": 0.14531963670090825, "grad_norm": 1.254478931427002, "learning_rate": 1e-06, "loss": 0.0217, "step": 1557 }, { "clip_ratio/high_max": 0.002280154152686009, "clip_ratio/high_mean": 0.0009389863353135297, "clip_ratio/low_mean": 0.002273672856972553, "clip_ratio/low_min": 0.0002548917200329015, "clip_ratio/region_mean": 0.00321265913953539, "epoch": 0.14541296980090884, "grad_norm": 25.644412994384766, "learning_rate": 1e-06, "loss": 13.2576, "step": 1558 }, { "clip_ratio/high_max": 0.002329032482521143, "clip_ratio/high_mean": 0.0010551533741818275, "clip_ratio/low_mean": 0.001995294249354629, "clip_ratio/low_min": 0.00010950394334940938, "clip_ratio/region_mean": 0.0030504476235364564, "epoch": 0.1455063029009094, "grad_norm": 0.382550448179245, "learning_rate": 1e-06, "loss": 0.0568, "step": 1559 }, { "clip_ratio/high_max": 0.0023041013155307155, "clip_ratio/high_mean": 0.0009252987283616676, "clip_ratio/low_mean": 0.002036337220488349, "clip_ratio/low_min": 0.000311169456836069, "clip_ratio/region_mean": 0.00296163595339749, "epoch": 0.14559963600091, "grad_norm": 0.2616417407989502, "learning_rate": 1e-06, "loss": 0.0692, "step": 1560 }, { "clip_ratio/high_max": 0.0027700834252755158, "clip_ratio/high_mean": 0.0010676859947125195, "clip_ratio/low_mean": 0.001991885743336752, "clip_ratio/low_min": 0.0001697625375527423, "clip_ratio/region_mean": 0.0030595717835240066, "epoch": 0.1456929691009106, "grad_norm": 1.573317050933838, "learning_rate": 1e-06, "loss": 0.0444, "step": 1561 }, { "clip_ratio/high_max": 0.00326845495874295, "clip_ratio/high_mean": 0.001287224036786938, "clip_ratio/low_mean": 0.0016744497552281246, "clip_ratio/low_min": 0.00016795816918602213, "clip_ratio/region_mean": 0.0029616738174809143, "epoch": 0.14578630220091116, "grad_norm": 0.9270631074905396, "learning_rate": 1e-06, "loss": -0.0215, "step": 1562 }, { "clip_ratio/high_max": 0.0028114469969295897, "clip_ratio/high_mean": 0.0011550192175491247, "clip_ratio/low_mean": 0.0016833135814522393, "clip_ratio/low_min": 4.99443103763042e-05, "clip_ratio/region_mean": 0.0028383327298797667, "epoch": 0.14587963530091175, "grad_norm": 0.14496400952339172, "learning_rate": 1e-06, "loss": -0.0249, "step": 1563 }, { "clip_ratio/high_max": 0.0029258170325192623, "clip_ratio/high_mean": 0.0011192556594323833, "clip_ratio/low_mean": 0.002335351939109387, "clip_ratio/low_min": 0.00013288938498590142, "clip_ratio/region_mean": 0.003454607620369643, "epoch": 0.14597296840091234, "grad_norm": 1450.6258544921875, "learning_rate": 1e-06, "loss": 1.2704, "step": 1564 }, { "clip_ratio/high_max": 0.002235146333987359, "clip_ratio/high_mean": 0.0009744386115926318, "clip_ratio/low_mean": 0.0020400446992425714, "clip_ratio/low_min": 0.00023177365073934197, "clip_ratio/region_mean": 0.003014483314473182, "epoch": 0.1460663015009129, "grad_norm": 0.13401196897029877, "learning_rate": 1e-06, "loss": 0.0257, "step": 1565 }, { "clip_ratio/high_max": 0.0022301519311440643, "clip_ratio/high_mean": 0.0009865138054010458, "clip_ratio/low_mean": 0.0022935192537261173, "clip_ratio/low_min": 0.0002055229906545719, "clip_ratio/region_mean": 0.0032800331246107817, "epoch": 0.1461596346009135, "grad_norm": 9797.1435546875, "learning_rate": 1e-06, "loss": 1.0058, "step": 1566 }, { "clip_ratio/high_max": 0.0029357981038629077, "clip_ratio/high_mean": 0.0011617877571552526, "clip_ratio/low_mean": 0.0020853328351222444, "clip_ratio/low_min": 0.00023827277800592128, "clip_ratio/region_mean": 0.0032471206941409037, "epoch": 0.1462529677009141, "grad_norm": 0.22918985784053802, "learning_rate": 1e-06, "loss": 0.0299, "step": 1567 }, { "clip_ratio/high_max": 0.0028895750947413035, "clip_ratio/high_mean": 0.001170220049971249, "clip_ratio/low_mean": 0.002253498139907606, "clip_ratio/low_min": 0.0003207621157343965, "clip_ratio/region_mean": 0.003423718226258643, "epoch": 0.14634630080091465, "grad_norm": 0.5339184403419495, "learning_rate": 1e-06, "loss": 0.0363, "step": 1568 }, { "clip_ratio/high_max": 0.0029230715590529144, "clip_ratio/high_mean": 0.001218335317389574, "clip_ratio/low_mean": 0.0010923545087280218, "clip_ratio/low_min": 8.421917482337449e-05, "clip_ratio/region_mean": 0.0023106898515834473, "completions/clipped_ratio": 0.012032645089285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 4093.0, "completions/mean_length": 603.3818359375, "completions/mean_terminated_length": 560.8445434570312, "completions/min_length": 5.0, "completions/min_terminated_length": 5.0, "epoch": 0.14643963390091524, "grad_norm": 28565546.0, "learning_rate": 1e-06, "loss": 2520624.5, "num_tokens": 1135648982.0, "reward": 0.586922824382782, "reward_std": 0.18817292153835297, "rewards/simpleverify_reward/mean": 0.5869227647781372, "rewards/simpleverify_reward/std": 0.4923885464668274, "step": 1569 }, { "clip_ratio/high_max": 0.0030137619651213754, "clip_ratio/high_mean": 0.0012447561293811304, "clip_ratio/low_mean": 0.0010305654795956798, "clip_ratio/low_min": 6.751879845978692e-05, "clip_ratio/region_mean": 0.0022753216107957996, "epoch": 0.14653296700091584, "grad_norm": 21864908800.0, "learning_rate": 1e-06, "loss": 20389784.0, "step": 1570 }, { "clip_ratio/high_max": 0.0028389483777573332, "clip_ratio/high_mean": 0.0012866233519162051, "clip_ratio/low_mean": 0.001145498714322457, "clip_ratio/low_min": 0.00013981657684780657, "clip_ratio/region_mean": 0.002432122055324726, "epoch": 0.1466263001009164, "grad_norm": 3831295232.0, "learning_rate": 1e-06, "loss": 19580633088.0, "step": 1571 }, { "clip_ratio/high_max": 0.0030210322947823443, "clip_ratio/high_mean": 0.0012224624297232367, "clip_ratio/low_mean": 0.001150653581134975, "clip_ratio/low_min": 5.977197906759102e-05, "clip_ratio/region_mean": 0.0023731159963062964, "epoch": 0.146719633200917, "grad_norm": 4588065783808.0, "learning_rate": 1e-06, "loss": 374512222208.0, "step": 1572 }, { "clip_ratio/high_max": 0.0031125456152949482, "clip_ratio/high_mean": 0.0013337968448468018, "clip_ratio/low_mean": 0.0012805607766495086, "clip_ratio/low_min": 0.0001424820329702925, "clip_ratio/region_mean": 0.0026143576542381197, "epoch": 0.14681296630091759, "grad_norm": 416710240.0, "learning_rate": 1e-06, "loss": 2405130.5, "step": 1573 }, { "clip_ratio/high_max": 0.002857889332517516, "clip_ratio/high_mean": 0.001126748313254211, "clip_ratio/low_mean": 0.0012352399389783386, "clip_ratio/low_min": 7.148998702177778e-05, "clip_ratio/region_mean": 0.002361988299526274, "epoch": 0.14690629940091818, "grad_norm": 18308866048.0, "learning_rate": 1e-06, "loss": 3460791808.0, "step": 1574 }, { "clip_ratio/high_max": 0.003078666632063687, "clip_ratio/high_mean": 0.0012404187327774707, "clip_ratio/low_mean": 0.0011807699302153196, "clip_ratio/low_min": 7.42705578886671e-05, "clip_ratio/region_mean": 0.002421188648440875, "epoch": 0.14699963250091874, "grad_norm": 3874119.5, "learning_rate": 1e-06, "loss": 4755.4546, "step": 1575 }, { "clip_ratio/high_max": 0.0034402690434944816, "clip_ratio/high_mean": 0.0013720394690608373, "clip_ratio/low_mean": 0.0011528497743711341, "clip_ratio/low_min": 0.00010688407928682864, "clip_ratio/region_mean": 0.002524889183405321, "epoch": 0.14709296560091933, "grad_norm": 1089571584.0, "learning_rate": 1e-06, "loss": 330477.0625, "step": 1576 }, { "clip_ratio/high_max": 0.0033594313863432035, "clip_ratio/high_mean": 0.0014116493039182387, "clip_ratio/low_mean": 0.0010817144575412385, "clip_ratio/low_min": 8.8118315943575e-05, "clip_ratio/region_mean": 0.0024933637178037316, "epoch": 0.14718629870091993, "grad_norm": 1540312.25, "learning_rate": 1e-06, "loss": 1468.3246, "step": 1577 }, { "clip_ratio/high_max": 0.0029704681073781103, "clip_ratio/high_mean": 0.0011912590744032059, "clip_ratio/low_mean": 0.001314467936026631, "clip_ratio/low_min": 0.00018844029546016827, "clip_ratio/region_mean": 0.0025057270395336673, "epoch": 0.1472796318009205, "grad_norm": 779905.0625, "learning_rate": 1e-06, "loss": 459.175, "step": 1578 }, { "clip_ratio/high_max": 0.003109902267169673, "clip_ratio/high_mean": 0.0013284446140460204, "clip_ratio/low_mean": 0.001349199825199321, "clip_ratio/low_min": 0.00015743750918773003, "clip_ratio/region_mean": 0.002677644391951617, "epoch": 0.14737296490092108, "grad_norm": 9454.1728515625, "learning_rate": 1e-06, "loss": 4.7731, "step": 1579 }, { "clip_ratio/high_max": 0.0032486036943737417, "clip_ratio/high_mean": 0.0012373789340927033, "clip_ratio/low_mean": 0.0013306890250532888, "clip_ratio/low_min": 9.487463466939516e-05, "clip_ratio/region_mean": 0.002568067931861151, "epoch": 0.14746629800092168, "grad_norm": 7601012.0, "learning_rate": 1e-06, "loss": 16110.3232, "step": 1580 }, { "clip_ratio/high_max": 0.0031644735572626814, "clip_ratio/high_mean": 0.0013020262849749997, "clip_ratio/low_mean": 0.0012839754235756118, "clip_ratio/low_min": 9.50353714870289e-05, "clip_ratio/region_mean": 0.0025860017485683784, "epoch": 0.14755963110092224, "grad_norm": 3932926.0, "learning_rate": 1e-06, "loss": 5218361344.0, "step": 1581 }, { "clip_ratio/high_max": 0.002600990643259138, "clip_ratio/high_mean": 0.0011930091750400607, "clip_ratio/low_mean": 0.0011814659137598937, "clip_ratio/low_min": 3.4019445593003184e-05, "clip_ratio/region_mean": 0.002374475145188626, "epoch": 0.14765296420092283, "grad_norm": 34405941248.0, "learning_rate": 1e-06, "loss": 5108135.0, "step": 1582 }, { "clip_ratio/high_max": 0.002748634811723605, "clip_ratio/high_mean": 0.0011759144654206466, "clip_ratio/low_mean": 0.0012018321212963201, "clip_ratio/low_min": 8.07980986792245e-05, "clip_ratio/region_mean": 0.0023777466485626064, "epoch": 0.14774629730092342, "grad_norm": 300797.15625, "learning_rate": 1e-06, "loss": 88.9562, "step": 1583 }, { "clip_ratio/high_max": 0.0027803430348285474, "clip_ratio/high_mean": 0.0012593587089213543, "clip_ratio/low_mean": 0.001418678668414941, "clip_ratio/low_min": 8.06786247267155e-05, "clip_ratio/region_mean": 0.0026780373154906556, "epoch": 0.147839630400924, "grad_norm": 781058.125, "learning_rate": 1e-06, "loss": 150.4753, "step": 1584 }, { "clip_ratio/high_max": 0.0034344434170634486, "clip_ratio/high_mean": 0.0014339090721477987, "clip_ratio/low_mean": 0.001440911215468077, "clip_ratio/low_min": 9.853634946921375e-05, "clip_ratio/region_mean": 0.0028748203185386956, "epoch": 0.14793296350092458, "grad_norm": 511.2007141113281, "learning_rate": 1e-06, "loss": 22437.3926, "step": 1585 }, { "clip_ratio/high_max": 0.0035069282530457713, "clip_ratio/high_mean": 0.0013038595097896177, "clip_ratio/low_mean": 0.001471184914407786, "clip_ratio/low_min": 8.048731160670286e-05, "clip_ratio/region_mean": 0.0027750444060075097, "epoch": 0.14802629660092517, "grad_norm": 13778335.0, "learning_rate": 1e-06, "loss": 3458.4351, "step": 1586 }, { "clip_ratio/high_max": 0.002965187974041328, "clip_ratio/high_mean": 0.0012370891054160893, "clip_ratio/low_mean": 0.0016227386149694212, "clip_ratio/low_min": 0.00019646307919174433, "clip_ratio/region_mean": 0.002859827669453807, "epoch": 0.14811962970092574, "grad_norm": 202.98048400878906, "learning_rate": 1e-06, "loss": 0.1618, "step": 1587 }, { "clip_ratio/high_max": 0.0033403335692128167, "clip_ratio/high_mean": 0.0014088917996559758, "clip_ratio/low_mean": 0.00150203059456544, "clip_ratio/low_min": 0.00012860166680184193, "clip_ratio/region_mean": 0.0029109224342391826, "epoch": 0.14821296280092633, "grad_norm": 19.36944007873535, "learning_rate": 1e-06, "loss": 0.0067, "step": 1588 }, { "clip_ratio/high_max": 0.003234310250263661, "clip_ratio/high_mean": 0.0013366493585635908, "clip_ratio/low_mean": 0.0015972804740158608, "clip_ratio/low_min": 5.11289053974906e-05, "clip_ratio/region_mean": 0.0029339298271224834, "epoch": 0.14830629590092692, "grad_norm": 1305844096.0, "learning_rate": 1e-06, "loss": 269629.9062, "step": 1589 }, { "clip_ratio/high_max": 0.003312863947940059, "clip_ratio/high_mean": 0.001299374347581761, "clip_ratio/low_mean": 0.00151879384793574, "clip_ratio/low_min": 0.00012856240073233494, "clip_ratio/region_mean": 0.002818168250087183, "epoch": 0.14839962900092749, "grad_norm": 185.1660919189453, "learning_rate": 1e-06, "loss": 7960.3071, "step": 1590 }, { "clip_ratio/high_max": 0.003106338561337907, "clip_ratio/high_mean": 0.0012937858373334166, "clip_ratio/low_mean": 0.0016290267922158819, "clip_ratio/low_min": 0.0002096015587085276, "clip_ratio/region_mean": 0.0029228125931695104, "epoch": 0.14849296210092808, "grad_norm": 137292.40625, "learning_rate": 1e-06, "loss": 33.3874, "step": 1591 }, { "clip_ratio/high_max": 0.0033875743101816624, "clip_ratio/high_mean": 0.0015467292578250635, "clip_ratio/low_mean": 0.001573028010170674, "clip_ratio/low_min": 6.822130490036216e-05, "clip_ratio/region_mean": 0.0031197572316159494, "epoch": 0.14858629520092867, "grad_norm": 5721.7216796875, "learning_rate": 1e-06, "loss": 1.5251, "step": 1592 }, { "clip_ratio/high_max": 0.0026431048318045214, "clip_ratio/high_mean": 0.0011660960626613814, "clip_ratio/low_mean": 0.001824152310291538, "clip_ratio/low_min": 0.00036151911535853287, "clip_ratio/region_mean": 0.0029902483511250466, "epoch": 0.14867962830092923, "grad_norm": 988.8856201171875, "learning_rate": 1e-06, "loss": 0.2976, "step": 1593 }, { "clip_ratio/high_max": 0.0030779329972574487, "clip_ratio/high_mean": 0.0012305582458793651, "clip_ratio/low_mean": 0.0016532464760530274, "clip_ratio/low_min": 0.0001464408769606962, "clip_ratio/region_mean": 0.0028838047437602654, "epoch": 0.14877296140092983, "grad_norm": 2.7741458415985107, "learning_rate": 1e-06, "loss": 0.0345, "step": 1594 }, { "clip_ratio/high_max": 0.0035970796379842795, "clip_ratio/high_mean": 0.001583169479999924, "clip_ratio/low_mean": 0.0017590854586160276, "clip_ratio/low_min": 9.158254943031352e-05, "clip_ratio/region_mean": 0.0033422549022361636, "epoch": 0.14886629450093042, "grad_norm": 1480150.5, "learning_rate": 1e-06, "loss": 18602.1855, "step": 1595 }, { "clip_ratio/high_max": 0.0035218798657297157, "clip_ratio/high_mean": 0.0014541324380843434, "clip_ratio/low_mean": 0.0017742020700097783, "clip_ratio/low_min": 0.00017495865540695377, "clip_ratio/region_mean": 0.0032283345353789628, "epoch": 0.148959627600931, "grad_norm": 13392.486328125, "learning_rate": 1e-06, "loss": 16875.0391, "step": 1596 }, { "clip_ratio/high_max": 0.003159449937811587, "clip_ratio/high_mean": 0.0013697167596546933, "clip_ratio/low_mean": 0.0019263198628323153, "clip_ratio/low_min": 5.722316200262867e-05, "clip_ratio/region_mean": 0.0032960366588667966, "epoch": 0.14905296070093157, "grad_norm": 40718.03515625, "learning_rate": 1e-06, "loss": 4.4022, "step": 1597 }, { "clip_ratio/high_max": 0.003392810744117014, "clip_ratio/high_mean": 0.0014693576777062844, "clip_ratio/low_mean": 0.0017080184916267172, "clip_ratio/low_min": 0.00012124370186938904, "clip_ratio/region_mean": 0.0031773761875228956, "epoch": 0.14914629380093217, "grad_norm": 62737612.0, "learning_rate": 1e-06, "loss": 10002.1816, "step": 1598 }, { "clip_ratio/high_max": 0.003099215573456604, "clip_ratio/high_mean": 0.0014748414287168998, "clip_ratio/low_mean": 0.0016316440851369407, "clip_ratio/low_min": 0.00010184631355514284, "clip_ratio/region_mean": 0.00310648548475001, "epoch": 0.14923962690093276, "grad_norm": 2324671.25, "learning_rate": 1e-06, "loss": 2327.3533, "step": 1599 }, { "clip_ratio/high_max": 0.0034614399846759625, "clip_ratio/high_mean": 0.001446096292056609, "clip_ratio/low_mean": 0.0017500548710813746, "clip_ratio/low_min": 8.624988731753547e-05, "clip_ratio/region_mean": 0.003196151214069687, "epoch": 0.14933296000093332, "grad_norm": 15523.39453125, "learning_rate": 1e-06, "loss": 7.2142, "step": 1600 }, { "epoch": 0.14933296000093332, "step": 1600, "total_flos": 0.0, "train_loss": 1264545902427.3843, "train_runtime": 60559.9674, "train_samples_per_second": 23.672, "train_steps_per_second": 0.026 } ], "logging_steps": 1, "max_steps": 1600, "num_input_tokens_seen": 1135648982, "num_train_epochs": 1, "save_steps": 160, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }