{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 0.14933296000093332, "eval_steps": 500, "global_step": 1600, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.013933454241071397, "completions/max_length": 4096.0, "completions/max_terminated_length": 4083.0, "completions/mean_length": 601.7027587890625, "completions/mean_terminated_length": 552.3270874023438, "completions/min_length": 2.0, "completions/min_terminated_length": 2.0, "epoch": 9.333310000058333e-05, "grad_norm": 0.14953841269016266, "learning_rate": 2e-07, "loss": -0.0262, "num_tokens": 80430161.0, "reward": 0.4950823187828064, "reward_std": 0.2626723647117615, "rewards/simpleverify_reward/mean": 0.4950823187828064, "rewards/simpleverify_reward/std": 0.49997806549072266, "step": 1 }, { "clip_ratio/high_max": 0.0023674857075093314, "clip_ratio/high_mean": 0.0009939373121596873, "clip_ratio/low_mean": 0.0006238104006115464, "clip_ratio/low_min": 6.543002746184357e-05, "clip_ratio/region_mean": 0.0016177476973098237, "epoch": 0.00018666620000116666, "grad_norm": 0.16591979563236237, "learning_rate": 2e-07, "loss": 0.0125, "step": 2 }, { "clip_ratio/high_max": 0.0021884036796109285, "clip_ratio/high_mean": 0.0010375111578468932, "clip_ratio/low_mean": 0.0006382719457178609, "clip_ratio/low_min": 8.395045188080985e-05, "clip_ratio/region_mean": 0.001675783089012839, "epoch": 0.00027999930000175, "grad_norm": 0.16085569560527802, "learning_rate": 2e-07, "loss": 0.0068, "step": 3 }, { "clip_ratio/high_max": 0.0021159901880309917, "clip_ratio/high_mean": 0.0009920538850565208, "clip_ratio/low_mean": 0.0005990383360767737, "clip_ratio/low_min": 8.886166506272275e-05, "clip_ratio/region_mean": 0.001591092222952284, "epoch": 0.0003733324000023333, "grad_norm": 0.1453714370727539, "learning_rate": 2e-07, "loss": 0.0124, "step": 4 }, { "clip_ratio/high_max": 0.0024020678029046394, "clip_ratio/high_mean": 0.001057566871168092, "clip_ratio/low_mean": 0.0006378542566380929, "clip_ratio/low_min": 4.592448840412544e-05, "clip_ratio/region_mean": 0.0016954211314441636, "epoch": 0.0004666655000029167, "grad_norm": 0.1448395699262619, "learning_rate": 2e-07, "loss": -0.0184, "step": 5 }, { "clip_ratio/high_max": 0.00210578952828655, "clip_ratio/high_mean": 0.0009746707728481852, "clip_ratio/low_mean": 0.00069473342045967, "clip_ratio/low_min": 6.13690126556321e-05, "clip_ratio/region_mean": 0.0016694041842129081, "epoch": 0.0005599986000035, "grad_norm": 0.13367842137813568, "learning_rate": 2e-07, "loss": 0.0233, "step": 6 }, { "clip_ratio/high_max": 0.0024697580374777317, "clip_ratio/high_mean": 0.0011314713956380729, "clip_ratio/low_mean": 0.0006404539362847572, "clip_ratio/low_min": 0.00011031753092538565, "clip_ratio/region_mean": 0.0017719253592076711, "epoch": 0.0006533317000040833, "grad_norm": 0.15834608674049377, "learning_rate": 2e-07, "loss": -0.0267, "step": 7 }, { "clip_ratio/high_max": 0.002535952859034296, "clip_ratio/high_mean": 0.0012183077778900042, "clip_ratio/low_mean": 0.0005654168326145737, "clip_ratio/low_min": 6.856378513475647e-05, "clip_ratio/region_mean": 0.0017837245904956944, "epoch": 0.0007466648000046666, "grad_norm": 0.14437466859817505, "learning_rate": 2e-07, "loss": -0.0465, "step": 8 }, { "clip_ratio/high_max": 0.0026118963869521394, "clip_ratio/high_mean": 0.0011256630386924371, "clip_ratio/low_mean": 0.000591609572438756, "clip_ratio/low_min": 3.871378703479422e-05, "clip_ratio/region_mean": 0.0017172726074932143, "epoch": 0.00083999790000525, "grad_norm": 0.14675189554691315, "learning_rate": 2e-07, "loss": -0.0058, "step": 9 }, { "clip_ratio/high_max": 0.0019322549269418232, "clip_ratio/high_mean": 0.0008848520192259457, "clip_ratio/low_mean": 0.0005785529883723939, "clip_ratio/low_min": 2.1938327336101793e-05, "clip_ratio/region_mean": 0.0014634050130553078, "epoch": 0.0009333310000058334, "grad_norm": 0.13194601237773895, "learning_rate": 2e-07, "loss": 0.0501, "step": 10 }, { "clip_ratio/high_max": 0.0020083620183868334, "clip_ratio/high_mean": 0.000981784389296081, "clip_ratio/low_mean": 0.0006944317246961873, "clip_ratio/low_min": 6.862139434815617e-05, "clip_ratio/region_mean": 0.0016762160885264166, "epoch": 0.0010266641000064166, "grad_norm": 0.14319021999835968, "learning_rate": 2e-07, "loss": 0.0196, "step": 11 }, { "clip_ratio/high_max": 0.002448038212605752, "clip_ratio/high_mean": 0.0011525647059897892, "clip_ratio/low_mean": 0.0005976697352707561, "clip_ratio/low_min": 1.5153352251218166e-05, "clip_ratio/region_mean": 0.001750234452629229, "epoch": 0.001119997200007, "grad_norm": 0.16065634787082672, "learning_rate": 2e-07, "loss": 0.0127, "step": 12 }, { "clip_ratio/high_max": 0.0023032343451632187, "clip_ratio/high_mean": 0.001108757896872703, "clip_ratio/low_mean": 0.0005706423480660305, "clip_ratio/low_min": 4.988859018340008e-05, "clip_ratio/region_mean": 0.001679400258581154, "epoch": 0.0012133303000075833, "grad_norm": 0.1447317898273468, "learning_rate": 2e-07, "loss": -0.0149, "step": 13 }, { "clip_ratio/high_max": 0.002321246785868425, "clip_ratio/high_mean": 0.0010930707976513077, "clip_ratio/low_mean": 0.0006298797507042764, "clip_ratio/low_min": 5.399934616434621e-05, "clip_ratio/region_mean": 0.0017229505247087218, "epoch": 0.0013066634000081666, "grad_norm": 0.1313619613647461, "learning_rate": 2e-07, "loss": -0.0108, "step": 14 }, { "clip_ratio/high_max": 0.002160714073397685, "clip_ratio/high_mean": 0.0009533318316243822, "clip_ratio/low_mean": 0.0005689996214641724, "clip_ratio/low_min": 7.197210106824059e-05, "clip_ratio/region_mean": 0.0015223314840113744, "epoch": 0.00139999650000875, "grad_norm": 0.1434275507926941, "learning_rate": 2e-07, "loss": 0.0045, "step": 15 }, { "clip_ratio/high_max": 0.0019111685505777132, "clip_ratio/high_mean": 0.0008887380445230519, "clip_ratio/low_mean": 0.0006624179059144808, "clip_ratio/low_min": 1.147315288108075e-05, "clip_ratio/region_mean": 0.0015511559322476387, "epoch": 0.0014933296000093333, "grad_norm": 0.13989700376987457, "learning_rate": 2e-07, "loss": 0.0664, "step": 16 }, { "clip_ratio/high_max": 0.0026097571462742053, "clip_ratio/high_mean": 0.0009490021038800478, "clip_ratio/low_mean": 0.0006101581711845938, "clip_ratio/low_min": 9.664449862611946e-06, "clip_ratio/region_mean": 0.0015591602568747476, "epoch": 0.0015866627000099165, "grad_norm": 0.1435522586107254, "learning_rate": 2e-07, "loss": -0.0178, "step": 17 }, { "clip_ratio/high_max": 0.0019821005407720804, "clip_ratio/high_mean": 0.0009335906197520671, "clip_ratio/low_mean": 0.000633852641840349, "clip_ratio/low_min": 5.833512113895267e-05, "clip_ratio/region_mean": 0.001567443265230395, "epoch": 0.0016799958000105, "grad_norm": 0.14138367772102356, "learning_rate": 2e-07, "loss": -0.0139, "step": 18 }, { "clip_ratio/high_max": 0.0025178020441671833, "clip_ratio/high_mean": 0.0010550906445132568, "clip_ratio/low_mean": 0.0005183770508665475, "clip_ratio/low_min": 1.2480031728046015e-05, "clip_ratio/region_mean": 0.0015734676635474898, "epoch": 0.0017733289000110833, "grad_norm": 0.1399834007024765, "learning_rate": 2e-07, "loss": -0.0378, "step": 19 }, { "clip_ratio/high_max": 0.002387506792729255, "clip_ratio/high_mean": 0.0010503932608116884, "clip_ratio/low_mean": 0.000715991109245806, "clip_ratio/low_min": 8.918568164517637e-05, "clip_ratio/region_mean": 0.0017663843827904202, "epoch": 0.0018666620000116667, "grad_norm": 0.15333129465579987, "learning_rate": 2e-07, "loss": 0.0301, "step": 20 }, { "clip_ratio/high_max": 0.002245577925350517, "clip_ratio/high_mean": 0.0009637679431762081, "clip_ratio/low_mean": 0.0006616623913942021, "clip_ratio/low_min": 6.358617520163534e-05, "clip_ratio/region_mean": 0.001625430340936873, "epoch": 0.00195999510001225, "grad_norm": 0.14982850849628448, "learning_rate": 2e-07, "loss": 0.0291, "step": 21 }, { "clip_ratio/high_max": 0.0019911075214622542, "clip_ratio/high_mean": 0.0009537351870676503, "clip_ratio/low_mean": 0.0007553755731350975, "clip_ratio/low_min": 7.43609730307071e-05, "clip_ratio/region_mean": 0.001709110765659716, "epoch": 0.002053328200012833, "grad_norm": 0.14227502048015594, "learning_rate": 2e-07, "loss": 0.0702, "step": 22 }, { "clip_ratio/high_max": 0.0021435838352772407, "clip_ratio/high_mean": 0.0010525440629862715, "clip_ratio/low_mean": 0.000745167237255373, "clip_ratio/low_min": 6.530354949063621e-05, "clip_ratio/region_mean": 0.0017977112947846763, "epoch": 0.0021466613000134167, "grad_norm": 0.15410882234573364, "learning_rate": 2e-07, "loss": 0.0263, "step": 23 }, { "clip_ratio/high_max": 0.001867049442807911, "clip_ratio/high_mean": 0.0008090599167189794, "clip_ratio/low_mean": 0.0007187115988926962, "clip_ratio/low_min": 8.171320223482326e-05, "clip_ratio/region_mean": 0.0015277715574484318, "epoch": 0.002239994400014, "grad_norm": 0.16155502200126648, "learning_rate": 2e-07, "loss": 0.064, "step": 24 }, { "clip_ratio/high_max": 0.002423106889182236, "clip_ratio/high_mean": 0.0010662513122952078, "clip_ratio/low_mean": 0.0007340449556068052, "clip_ratio/low_min": 7.112937873898773e-05, "clip_ratio/region_mean": 0.0018002962897298858, "epoch": 0.002333327500014583, "grad_norm": 0.17242395877838135, "learning_rate": 2e-07, "loss": -0.0207, "step": 25 }, { "clip_ratio/high_max": 0.002586542977951467, "clip_ratio/high_mean": 0.001085506853996776, "clip_ratio/low_mean": 0.0007136358653951902, "clip_ratio/low_min": 1.0444518920849077e-05, "clip_ratio/region_mean": 0.0017991427084780298, "epoch": 0.0024266606000151666, "grad_norm": 0.18845485150814056, "learning_rate": 2e-07, "loss": 0.0296, "step": 26 }, { "clip_ratio/high_max": 0.0023713085320196114, "clip_ratio/high_mean": 0.0009943550103344023, "clip_ratio/low_mean": 0.0006678636436845409, "clip_ratio/low_min": 9.117938679992221e-05, "clip_ratio/region_mean": 0.0016622186740278266, "epoch": 0.00251999370001575, "grad_norm": 0.17658445239067078, "learning_rate": 2e-07, "loss": 0.0373, "step": 27 }, { "clip_ratio/high_max": 0.0025220852476195432, "clip_ratio/high_mean": 0.0010478980548214167, "clip_ratio/low_mean": 0.0006360245952237165, "clip_ratio/low_min": 3.55375468643615e-05, "clip_ratio/region_mean": 0.0016839226373122074, "epoch": 0.002613326800016333, "grad_norm": 0.1478639543056488, "learning_rate": 2e-07, "loss": 0.0018, "step": 28 }, { "clip_ratio/high_max": 0.002461256823153235, "clip_ratio/high_mean": 0.0010364703557570465, "clip_ratio/low_mean": 0.0006398533205356216, "clip_ratio/low_min": 6.13540842095972e-05, "clip_ratio/region_mean": 0.0016763236781116575, "epoch": 0.0027066599000169166, "grad_norm": 0.18225984275341034, "learning_rate": 2e-07, "loss": 0.0185, "step": 29 }, { "clip_ratio/high_max": 0.0022175155536388047, "clip_ratio/high_mean": 0.0009533763677609386, "clip_ratio/low_mean": 0.0007033304373180727, "clip_ratio/low_min": 7.825453212717548e-05, "clip_ratio/region_mean": 0.0016567068196309265, "epoch": 0.0027999930000175, "grad_norm": 0.16387026011943817, "learning_rate": 2e-07, "loss": 0.0298, "step": 30 }, { "clip_ratio/high_max": 0.0026038878422696143, "clip_ratio/high_mean": 0.0011194687758688815, "clip_ratio/low_mean": 0.0006538700235978467, "clip_ratio/low_min": 6.658352776867105e-05, "clip_ratio/region_mean": 0.00177333879400976, "epoch": 0.002893326100018083, "grad_norm": 0.18538165092468262, "learning_rate": 2e-07, "loss": 0.0081, "step": 31 }, { "clip_ratio/high_max": 0.0023883673711679876, "clip_ratio/high_mean": 0.0010355769118177705, "clip_ratio/low_mean": 0.0007208803926914698, "clip_ratio/low_min": 3.0472242997348076e-05, "clip_ratio/region_mean": 0.0017564572917763144, "epoch": 0.0029866592000186666, "grad_norm": 0.16681787371635437, "learning_rate": 2e-07, "loss": 0.0573, "step": 32 }, { "clip_ratio/high_max": 0.002104593542753719, "clip_ratio/high_mean": 0.0010913532332779141, "clip_ratio/low_mean": 0.0007352181401074631, "clip_ratio/low_min": 7.196445312729338e-05, "clip_ratio/region_mean": 0.001826571358833462, "epoch": 0.00307999230001925, "grad_norm": 0.14422070980072021, "learning_rate": 2e-07, "loss": 0.0205, "step": 33 }, { "clip_ratio/high_max": 0.0021195796398387756, "clip_ratio/high_mean": 0.0009154372000921285, "clip_ratio/low_mean": 0.000656920085020829, "clip_ratio/low_min": 0.0001107642665374442, "clip_ratio/region_mean": 0.001572357265104074, "epoch": 0.003173325400019833, "grad_norm": 0.1587039828300476, "learning_rate": 2e-07, "loss": 0.0571, "step": 34 }, { "clip_ratio/high_max": 0.0023674454096180853, "clip_ratio/high_mean": 0.0010550709630479105, "clip_ratio/low_mean": 0.0006784659908589674, "clip_ratio/low_min": 8.873475235304795e-05, "clip_ratio/region_mean": 0.0017335369120701216, "epoch": 0.0032666585000204165, "grad_norm": 0.16123159229755402, "learning_rate": 2e-07, "loss": 0.0068, "step": 35 }, { "clip_ratio/high_max": 0.002620395869598724, "clip_ratio/high_mean": 0.0011347207546350546, "clip_ratio/low_mean": 0.0006833997340436326, "clip_ratio/low_min": 5.905487614654703e-05, "clip_ratio/region_mean": 0.0018181204359279945, "epoch": 0.003359991600021, "grad_norm": 0.1757831871509552, "learning_rate": 2e-07, "loss": 0.0076, "step": 36 }, { "clip_ratio/high_max": 0.0022089821359259076, "clip_ratio/high_mean": 0.0009705943375593051, "clip_ratio/low_mean": 0.000560308015337796, "clip_ratio/low_min": 5.188483737583738e-05, "clip_ratio/region_mean": 0.0015309023292502388, "epoch": 0.0034533247000215835, "grad_norm": 0.18159349262714386, "learning_rate": 2e-07, "loss": 0.0043, "step": 37 }, { "clip_ratio/high_max": 0.0025637145117798354, "clip_ratio/high_mean": 0.001121771392718074, "clip_ratio/low_mean": 0.0006521710820379667, "clip_ratio/low_min": 5.358485850592842e-05, "clip_ratio/region_mean": 0.0017739424729370512, "epoch": 0.0035466578000221665, "grad_norm": 0.1653040200471878, "learning_rate": 2e-07, "loss": -0.0042, "step": 38 }, { "clip_ratio/high_max": 0.0021728387437178753, "clip_ratio/high_mean": 0.001027424295898527, "clip_ratio/low_mean": 0.0007209866234916262, "clip_ratio/low_min": 9.615711951482808e-05, "clip_ratio/region_mean": 0.0017484109484939836, "epoch": 0.00363999090002275, "grad_norm": 0.16213110089302063, "learning_rate": 2e-07, "loss": 0.0103, "step": 39 }, { "clip_ratio/high_max": 0.0021183517455938272, "clip_ratio/high_mean": 0.0008936427548178472, "clip_ratio/low_mean": 0.0009624898648326052, "clip_ratio/low_min": 5.6695236708037555e-05, "clip_ratio/region_mean": 0.001856132599641569, "epoch": 0.0037333240000233334, "grad_norm": 0.12478988617658615, "learning_rate": 2e-07, "loss": 0.0472, "step": 40 }, { "clip_ratio/high_max": 0.0021446100327011663, "clip_ratio/high_mean": 0.0010357052451581694, "clip_ratio/low_mean": 0.0007963624830154004, "clip_ratio/low_min": 8.249898837675573e-05, "clip_ratio/region_mean": 0.0018320677627343684, "epoch": 0.0038266571000239165, "grad_norm": 0.1299549639225006, "learning_rate": 2e-07, "loss": 0.0334, "step": 41 }, { "clip_ratio/high_max": 0.002538864970119903, "clip_ratio/high_mean": 0.0010702273939386941, "clip_ratio/low_mean": 0.0009539508901070803, "clip_ratio/low_min": 0.0001460920484532835, "clip_ratio/region_mean": 0.002024178254941944, "epoch": 0.0039199902000245, "grad_norm": 0.1413993090391159, "learning_rate": 2e-07, "loss": 0.0193, "step": 42 }, { "clip_ratio/high_max": 0.00302773411385715, "clip_ratio/high_mean": 0.0012631381869141478, "clip_ratio/low_mean": 0.0007151913468987914, "clip_ratio/low_min": 7.079160786815919e-05, "clip_ratio/region_mean": 0.001978329528355971, "epoch": 0.004013323300025083, "grad_norm": 0.12021080404520035, "learning_rate": 2e-07, "loss": -0.0386, "step": 43 }, { "clip_ratio/high_max": 0.0023914417688502, "clip_ratio/high_mean": 0.0010761782687040977, "clip_ratio/low_mean": 0.0008260987979156198, "clip_ratio/low_min": 0.00010776017188618425, "clip_ratio/region_mean": 0.00190227702842094, "epoch": 0.004106656400025666, "grad_norm": 0.1382819563150406, "learning_rate": 2e-07, "loss": -0.0023, "step": 44 }, { "clip_ratio/high_max": 0.002267268253490329, "clip_ratio/high_mean": 0.0010841776711458806, "clip_ratio/low_mean": 0.0008521507879777346, "clip_ratio/low_min": 0.00012050447639921913, "clip_ratio/region_mean": 0.0019363284955034032, "epoch": 0.00419998950002625, "grad_norm": 0.16588380932807922, "learning_rate": 2e-07, "loss": 0.0398, "step": 45 }, { "clip_ratio/high_max": 0.002457712238538079, "clip_ratio/high_mean": 0.0011450489255366847, "clip_ratio/low_mean": 0.0007951563256938243, "clip_ratio/low_min": 0.00013263345408631722, "clip_ratio/region_mean": 0.001940205242135562, "epoch": 0.004293322600026833, "grad_norm": 0.14558298885822296, "learning_rate": 2e-07, "loss": 0.013, "step": 46 }, { "clip_ratio/high_max": 0.0022139805550978053, "clip_ratio/high_mean": 0.000999439473162056, "clip_ratio/low_mean": 0.0008757558680372313, "clip_ratio/low_min": 0.000175554148881929, "clip_ratio/region_mean": 0.0018751953502942342, "epoch": 0.004386655700027417, "grad_norm": 0.17660871148109436, "learning_rate": 2e-07, "loss": 0.0469, "step": 47 }, { "clip_ratio/high_max": 0.0027948246570304036, "clip_ratio/high_mean": 0.0011936235314351507, "clip_ratio/low_mean": 0.0007576054340461269, "clip_ratio/low_min": 7.088794109222363e-05, "clip_ratio/region_mean": 0.0019512289145495743, "epoch": 0.004479988800028, "grad_norm": 0.17675215005874634, "learning_rate": 2e-07, "loss": -0.0025, "step": 48 }, { "clip_ratio/high_max": 0.002381072859861888, "clip_ratio/high_mean": 0.0009864217863650993, "clip_ratio/low_mean": 0.0007424233099300181, "clip_ratio/low_min": 8.64379644553992e-05, "clip_ratio/region_mean": 0.00172884514904581, "epoch": 0.004573321900028583, "grad_norm": 0.1627051830291748, "learning_rate": 2e-07, "loss": 0.0385, "step": 49 }, { "clip_ratio/high_max": 0.002706680417759344, "clip_ratio/high_mean": 0.0011382067441445542, "clip_ratio/low_mean": 0.0008244439577538287, "clip_ratio/low_min": 8.638664075988345e-05, "clip_ratio/region_mean": 0.001962650756468065, "epoch": 0.004666655000029166, "grad_norm": 0.16825319826602936, "learning_rate": 2e-07, "loss": -0.0247, "step": 50 }, { "clip_ratio/high_max": 0.0023495928835473023, "clip_ratio/high_mean": 0.0011310262416373007, "clip_ratio/low_mean": 0.0007959395843499806, "clip_ratio/low_min": 8.145416813931661e-05, "clip_ratio/region_mean": 0.0019269658223493025, "epoch": 0.00475998810002975, "grad_norm": 0.17190200090408325, "learning_rate": 2e-07, "loss": -0.0028, "step": 51 }, { "clip_ratio/high_max": 0.0025433526243432425, "clip_ratio/high_mean": 0.0010059376454591984, "clip_ratio/low_mean": 0.0007976649030752014, "clip_ratio/low_min": 0.00011158106644870713, "clip_ratio/region_mean": 0.0018036025830951985, "epoch": 0.004853321200030333, "grad_norm": 0.16411852836608887, "learning_rate": 2e-07, "loss": 0.0346, "step": 52 }, { "clip_ratio/high_max": 0.002570785647549201, "clip_ratio/high_mean": 0.0011279989157628734, "clip_ratio/low_mean": 0.0008269209793070331, "clip_ratio/low_min": 0.0001437667115169461, "clip_ratio/region_mean": 0.0019549198914319277, "epoch": 0.004946654300030917, "grad_norm": 0.23368988931179047, "learning_rate": 2e-07, "loss": 0.0347, "step": 53 }, { "clip_ratio/high_max": 0.00221728650649311, "clip_ratio/high_mean": 0.0010440790247230325, "clip_ratio/low_mean": 0.0008208208382711746, "clip_ratio/low_min": 0.00015387419443868566, "clip_ratio/region_mean": 0.001864899881184101, "epoch": 0.0050399874000315, "grad_norm": 0.16691303253173828, "learning_rate": 2e-07, "loss": 0.0353, "step": 54 }, { "clip_ratio/high_max": 0.0021591855911538005, "clip_ratio/high_mean": 0.0009948813494702335, "clip_ratio/low_mean": 0.0007651457453903276, "clip_ratio/low_min": 7.640478452231036e-05, "clip_ratio/region_mean": 0.0017600271021365188, "epoch": 0.005133320500032083, "grad_norm": 0.19582928717136383, "learning_rate": 2e-07, "loss": 0.0382, "step": 55 }, { "clip_ratio/high_max": 0.002834614584571682, "clip_ratio/high_mean": 0.0011490672804939095, "clip_ratio/low_mean": 0.000831111219667946, "clip_ratio/low_min": 0.00013497215422830777, "clip_ratio/region_mean": 0.0019801784947048873, "epoch": 0.005226653600032666, "grad_norm": 0.16688285768032074, "learning_rate": 2e-07, "loss": 0.0097, "step": 56 }, { "clip_ratio/high_max": 0.0026971597981173545, "clip_ratio/high_mean": 0.0011881121790793259, "clip_ratio/low_mean": 0.0008297745735035278, "clip_ratio/low_min": 0.00011190869054189534, "clip_ratio/region_mean": 0.002017886727117002, "epoch": 0.00531998670003325, "grad_norm": 0.18028266727924347, "learning_rate": 2e-07, "loss": 0.0035, "step": 57 }, { "clip_ratio/high_max": 0.0023480103263864294, "clip_ratio/high_mean": 0.0010957255362882279, "clip_ratio/low_mean": 0.0008130651513056364, "clip_ratio/low_min": 7.304040173039539e-05, "clip_ratio/region_mean": 0.0019087906766799279, "epoch": 0.005413319800033833, "grad_norm": 0.19899523258209229, "learning_rate": 2e-07, "loss": 0.0106, "step": 58 }, { "clip_ratio/high_max": 0.002623686406877823, "clip_ratio/high_mean": 0.0010664626006473554, "clip_ratio/low_mean": 0.0008946610705606872, "clip_ratio/low_min": 0.00011749729765142547, "clip_ratio/region_mean": 0.0019611237148637883, "epoch": 0.005506652900034417, "grad_norm": 0.2230866253376007, "learning_rate": 2e-07, "loss": 0.0208, "step": 59 }, { "clip_ratio/high_max": 0.0027665546294883825, "clip_ratio/high_mean": 0.0011527124734129757, "clip_ratio/low_mean": 0.0009560649377817754, "clip_ratio/low_min": 7.160534823924536e-05, "clip_ratio/region_mean": 0.0021087773420731537, "epoch": 0.005599986000035, "grad_norm": 0.20928668975830078, "learning_rate": 2e-07, "loss": 0.0183, "step": 60 }, { "clip_ratio/high_max": 0.0025118674675468355, "clip_ratio/high_mean": 0.0012051414450979792, "clip_ratio/low_mean": 0.0008169338907464407, "clip_ratio/low_min": 8.125139174808282e-05, "clip_ratio/region_mean": 0.0020220753285684623, "epoch": 0.005693319100035584, "grad_norm": 0.19858625531196594, "learning_rate": 2e-07, "loss": 0.0223, "step": 61 }, { "clip_ratio/high_max": 0.0023063628177624196, "clip_ratio/high_mean": 0.000974755666902638, "clip_ratio/low_mean": 0.0009981992589018773, "clip_ratio/low_min": 0.00015467244065803243, "clip_ratio/region_mean": 0.0019729549676412717, "epoch": 0.005786652200036166, "grad_norm": 0.24347659945487976, "learning_rate": 2e-07, "loss": 0.082, "step": 62 }, { "clip_ratio/high_max": 0.0023434290196746588, "clip_ratio/high_mean": 0.0010790873930091038, "clip_ratio/low_mean": 0.0009798472019610927, "clip_ratio/low_min": 0.00010796214883157518, "clip_ratio/region_mean": 0.0020589346531778574, "epoch": 0.00587998530003675, "grad_norm": 0.19212107360363007, "learning_rate": 2e-07, "loss": 0.0528, "step": 63 }, { "clip_ratio/high_max": 0.0021897237020311877, "clip_ratio/high_mean": 0.0009583683568052948, "clip_ratio/low_mean": 0.0008091209365375107, "clip_ratio/low_min": 2.683438106032554e-05, "clip_ratio/region_mean": 0.001767489273333922, "epoch": 0.005973318400037333, "grad_norm": 0.20925861597061157, "learning_rate": 2e-07, "loss": 0.0446, "step": 64 }, { "clip_ratio/high_max": 0.002384451530815568, "clip_ratio/high_mean": 0.0010315646941307932, "clip_ratio/low_mean": 0.0008973966396297328, "clip_ratio/low_min": 0.0001905183048620529, "clip_ratio/region_mean": 0.001928961333760526, "epoch": 0.006066651500037917, "grad_norm": 0.20721594989299774, "learning_rate": 2e-07, "loss": 0.0384, "step": 65 }, { "clip_ratio/high_max": 0.002292082950589247, "clip_ratio/high_mean": 0.0011816332225862425, "clip_ratio/low_mean": 0.0008940716670622351, "clip_ratio/low_min": 0.0001223996837325103, "clip_ratio/region_mean": 0.0020757048987434246, "epoch": 0.0061599846000385, "grad_norm": 0.18144945800304413, "learning_rate": 2e-07, "loss": 0.0435, "step": 66 }, { "clip_ratio/high_max": 0.00263256916514365, "clip_ratio/high_mean": 0.0012009260281047318, "clip_ratio/low_mean": 0.0010232093536615139, "clip_ratio/low_min": 0.00014230040051188553, "clip_ratio/region_mean": 0.002224135365395341, "epoch": 0.0062533177000390835, "grad_norm": 0.17394821345806122, "learning_rate": 2e-07, "loss": 0.0125, "step": 67 }, { "clip_ratio/high_max": 0.0025809501530602574, "clip_ratio/high_mean": 0.0011921191107830964, "clip_ratio/low_mean": 0.0008526073615939822, "clip_ratio/low_min": 0.00013056372790742898, "clip_ratio/region_mean": 0.0020447264687391, "epoch": 0.006346650800039666, "grad_norm": 0.23051761090755463, "learning_rate": 2e-07, "loss": 0.002, "step": 68 }, { "clip_ratio/high_max": 0.0025625708913139533, "clip_ratio/high_mean": 0.0011680446605168981, "clip_ratio/low_mean": 0.0009182084868371021, "clip_ratio/low_min": 0.0001425448208465241, "clip_ratio/region_mean": 0.0020862531673628837, "epoch": 0.00643998390004025, "grad_norm": 0.1754886507987976, "learning_rate": 2e-07, "loss": 0.0332, "step": 69 }, { "clip_ratio/high_max": 0.0025144152823486365, "clip_ratio/high_mean": 0.0012188743203296326, "clip_ratio/low_mean": 0.0008123854950099485, "clip_ratio/low_min": 5.107884408062091e-05, "clip_ratio/region_mean": 0.002031259769864846, "epoch": 0.006533317000040833, "grad_norm": 0.1733425408601761, "learning_rate": 2e-07, "loss": -0.0094, "step": 70 }, { "clip_ratio/high_max": 0.0026027825952041894, "clip_ratio/high_mean": 0.0011562062136363238, "clip_ratio/low_mean": 0.0008388588776142569, "clip_ratio/low_min": 8.64647195157886e-05, "clip_ratio/region_mean": 0.0019950651258113794, "epoch": 0.0066266501000414165, "grad_norm": 0.15402637422084808, "learning_rate": 2e-07, "loss": -0.0268, "step": 71 }, { "clip_ratio/high_max": 0.0027323203757987358, "clip_ratio/high_mean": 0.0011127118887088727, "clip_ratio/low_mean": 0.0009276796990889125, "clip_ratio/low_min": 0.00011783379432017682, "clip_ratio/region_mean": 0.002040391605987679, "epoch": 0.006719983200042, "grad_norm": 0.14106644690036774, "learning_rate": 2e-07, "loss": 0.0246, "step": 72 }, { "clip_ratio/high_max": 0.0025552997904014774, "clip_ratio/high_mean": 0.0012162266830273438, "clip_ratio/low_mean": 0.0010613461454340722, "clip_ratio/low_min": 4.4688086290989304e-05, "clip_ratio/region_mean": 0.0022775727848056704, "epoch": 0.0068133163000425835, "grad_norm": 0.1692935973405838, "learning_rate": 2e-07, "loss": 0.0199, "step": 73 }, { "clip_ratio/high_max": 0.002438432005874347, "clip_ratio/high_mean": 0.0010961007137666456, "clip_ratio/low_mean": 0.0009506063834123779, "clip_ratio/low_min": 9.589870523996069e-05, "clip_ratio/region_mean": 0.002046707144472748, "epoch": 0.006906649400043167, "grad_norm": 0.13591258227825165, "learning_rate": 2e-07, "loss": -0.0085, "step": 74 }, { "clip_ratio/high_max": 0.002877588711271528, "clip_ratio/high_mean": 0.001249594170076307, "clip_ratio/low_mean": 0.0009949232189683244, "clip_ratio/low_min": 0.00010380656294728396, "clip_ratio/region_mean": 0.0022445174035965465, "epoch": 0.0069999825000437495, "grad_norm": 0.15854483842849731, "learning_rate": 2e-07, "loss": 0.0233, "step": 75 }, { "clip_ratio/high_max": 0.0024982274771900848, "clip_ratio/high_mean": 0.0012117296500946395, "clip_ratio/low_mean": 0.0010544225078774616, "clip_ratio/low_min": 0.00010851422484847717, "clip_ratio/region_mean": 0.0022661521143163554, "epoch": 0.007093315600044333, "grad_norm": 0.1535879522562027, "learning_rate": 2e-07, "loss": -0.0224, "step": 76 }, { "clip_ratio/high_max": 0.002386713145824615, "clip_ratio/high_mean": 0.0011644631485978607, "clip_ratio/low_mean": 0.0010858478963200469, "clip_ratio/low_min": 0.00015373233145510312, "clip_ratio/region_mean": 0.002250311074021738, "epoch": 0.0071866487000449165, "grad_norm": 0.1576538383960724, "learning_rate": 2e-07, "loss": 0.0255, "step": 77 }, { "clip_ratio/high_max": 0.002220553367806133, "clip_ratio/high_mean": 0.0010531980151426978, "clip_ratio/low_mean": 0.0011598556411627214, "clip_ratio/low_min": 0.0002525006675568875, "clip_ratio/region_mean": 0.0022130536526674405, "epoch": 0.0072799818000455, "grad_norm": 0.17964158952236176, "learning_rate": 2e-07, "loss": 0.0435, "step": 78 }, { "clip_ratio/high_max": 0.002580029242380988, "clip_ratio/high_mean": 0.001158521707111504, "clip_ratio/low_mean": 0.0011037067124561872, "clip_ratio/low_min": 0.00012353784586593974, "clip_ratio/region_mean": 0.002262228386825882, "epoch": 0.007373314900046083, "grad_norm": 0.16844771802425385, "learning_rate": 2e-07, "loss": 0.0034, "step": 79 }, { "clip_ratio/high_max": 0.0026624569291016087, "clip_ratio/high_mean": 0.0011551221468835138, "clip_ratio/low_mean": 0.001111339279304957, "clip_ratio/low_min": 0.000121137491078116, "clip_ratio/region_mean": 0.002266461400722619, "epoch": 0.007466648000046667, "grad_norm": 0.13643594086170197, "learning_rate": 2e-07, "loss": -0.0177, "step": 80 }, { "clip_ratio/high_max": 0.0027351634416845627, "clip_ratio/high_mean": 0.0012743008373945486, "clip_ratio/low_mean": 0.001009280480502639, "clip_ratio/low_min": 0.00012832949869334698, "clip_ratio/region_mean": 0.002283581306983251, "epoch": 0.0075599811000472495, "grad_norm": 0.1346885859966278, "learning_rate": 2e-07, "loss": 0.0206, "step": 81 }, { "clip_ratio/high_max": 0.002482824827893637, "clip_ratio/high_mean": 0.0011054424139729235, "clip_ratio/low_mean": 0.0008900915636331774, "clip_ratio/low_min": 9.853973824647255e-05, "clip_ratio/region_mean": 0.0019955339885200374, "epoch": 0.007653314200047833, "grad_norm": 0.17153213918209076, "learning_rate": 2e-07, "loss": 0.0439, "step": 82 }, { "clip_ratio/high_max": 0.0027423274441389367, "clip_ratio/high_mean": 0.0010962784908770118, "clip_ratio/low_mean": 0.0010538176647969522, "clip_ratio/low_min": 0.0001511997870693449, "clip_ratio/region_mean": 0.0021500961593119428, "epoch": 0.007746647300048416, "grad_norm": 0.2059258222579956, "learning_rate": 2e-07, "loss": 0.0421, "step": 83 }, { "clip_ratio/high_max": 0.002741073549259454, "clip_ratio/high_mean": 0.0012883245362900198, "clip_ratio/low_mean": 0.0011239956693316344, "clip_ratio/low_min": 0.00011476413237687666, "clip_ratio/region_mean": 0.0024123201801558025, "epoch": 0.007839980400049, "grad_norm": 0.1896880716085434, "learning_rate": 2e-07, "loss": 0.0026, "step": 84 }, { "clip_ratio/high_max": 0.002554102691647131, "clip_ratio/high_mean": 0.0010734221432358027, "clip_ratio/low_mean": 0.0011634019265329698, "clip_ratio/low_min": 9.160513491224265e-05, "clip_ratio/region_mean": 0.002236824067949783, "epoch": 0.007933313500049582, "grad_norm": 0.20716962218284607, "learning_rate": 2e-07, "loss": 0.0526, "step": 85 }, { "clip_ratio/high_max": 0.002843539434252307, "clip_ratio/high_mean": 0.0013428274978650734, "clip_ratio/low_mean": 0.0010562507377471775, "clip_ratio/low_min": 0.00012423711450537667, "clip_ratio/region_mean": 0.0023990782283362933, "epoch": 0.008026646600050166, "grad_norm": 0.21359005570411682, "learning_rate": 2e-07, "loss": -0.0102, "step": 86 }, { "clip_ratio/high_max": 0.0025335026111861225, "clip_ratio/high_mean": 0.0011242671971558593, "clip_ratio/low_mean": 0.001072051458322676, "clip_ratio/low_min": 0.00014780018227611436, "clip_ratio/region_mean": 0.002196318717324175, "epoch": 0.00811997970005075, "grad_norm": 0.18297268450260162, "learning_rate": 2e-07, "loss": 0.0414, "step": 87 }, { "clip_ratio/high_max": 0.003101074842561502, "clip_ratio/high_mean": 0.0013181200993130915, "clip_ratio/low_mean": 0.0010754356235338491, "clip_ratio/low_min": 0.00012925146711495472, "clip_ratio/region_mean": 0.002393555703747552, "epoch": 0.008213312800051333, "grad_norm": 0.1821269690990448, "learning_rate": 2e-07, "loss": 0.0263, "step": 88 }, { "clip_ratio/high_max": 0.0029333929778658785, "clip_ratio/high_mean": 0.001314161727350438, "clip_ratio/low_mean": 0.0011944041834794916, "clip_ratio/low_min": 0.00015262290435202885, "clip_ratio/region_mean": 0.0025085660236072727, "epoch": 0.008306645900051916, "grad_norm": 0.2102108597755432, "learning_rate": 2e-07, "loss": 0.0119, "step": 89 }, { "clip_ratio/high_max": 0.0030170710451784544, "clip_ratio/high_mean": 0.001282726396311773, "clip_ratio/low_mean": 0.0010507773695280775, "clip_ratio/low_min": 0.0001308621458520065, "clip_ratio/region_mean": 0.0023335037330980413, "epoch": 0.0083999790000525, "grad_norm": 0.15292580425739288, "learning_rate": 2e-07, "loss": -0.0058, "step": 90 }, { "clip_ratio/high_max": 0.0030129971128189936, "clip_ratio/high_mean": 0.0013702320720767602, "clip_ratio/low_mean": 0.001032977601425955, "clip_ratio/low_min": 0.00014014928092365153, "clip_ratio/region_mean": 0.0024032096698647365, "epoch": 0.008493312100053083, "grad_norm": 0.16533470153808594, "learning_rate": 2e-07, "loss": -0.0434, "step": 91 }, { "clip_ratio/high_max": 0.0023833426675992087, "clip_ratio/high_mean": 0.001035780762322247, "clip_ratio/low_mean": 0.0010435842268634588, "clip_ratio/low_min": 6.538681736856233e-05, "clip_ratio/region_mean": 0.0020793649237020873, "epoch": 0.008586645200053667, "grad_norm": 0.16801509261131287, "learning_rate": 2e-07, "loss": 0.0787, "step": 92 }, { "clip_ratio/high_max": 0.0026365415324107744, "clip_ratio/high_mean": 0.0011868597866850905, "clip_ratio/low_mean": 0.0010226778304058826, "clip_ratio/low_min": 0.00010679325168894138, "clip_ratio/region_mean": 0.0022095375825301744, "epoch": 0.00867997830005425, "grad_norm": 0.1672240048646927, "learning_rate": 2e-07, "loss": 0.0142, "step": 93 }, { "clip_ratio/high_max": 0.0027250569037278183, "clip_ratio/high_mean": 0.0013217675768828485, "clip_ratio/low_mean": 0.000998929939669324, "clip_ratio/low_min": 6.232082614587853e-05, "clip_ratio/region_mean": 0.0023206975020002574, "epoch": 0.008773311400054834, "grad_norm": 0.1793237030506134, "learning_rate": 2e-07, "loss": -0.0144, "step": 94 }, { "clip_ratio/high_max": 0.0027768419167841785, "clip_ratio/high_mean": 0.0012436797042028047, "clip_ratio/low_mean": 0.0009854092913883505, "clip_ratio/low_min": 0.00013467337521433365, "clip_ratio/region_mean": 0.002229089048341848, "epoch": 0.008866644500055417, "grad_norm": 0.18953968584537506, "learning_rate": 2e-07, "loss": 0.0018, "step": 95 }, { "clip_ratio/high_max": 0.0027178080927114934, "clip_ratio/high_mean": 0.0012212964938953519, "clip_ratio/low_mean": 0.000985382550425129, "clip_ratio/low_min": 0.0001628349718885147, "clip_ratio/region_mean": 0.0022066790479584597, "epoch": 0.008959977600056, "grad_norm": 0.1701854169368744, "learning_rate": 2e-07, "loss": -0.027, "step": 96 }, { "clip_ratio/high_max": 0.0024119935842463747, "clip_ratio/high_mean": 0.0010568742145551369, "clip_ratio/low_mean": 0.0011819798237411305, "clip_ratio/low_min": 0.00012788825915777124, "clip_ratio/region_mean": 0.0022388540382962674, "epoch": 0.009053310700056582, "grad_norm": 0.18675607442855835, "learning_rate": 2e-07, "loss": 0.0293, "step": 97 }, { "clip_ratio/high_max": 0.0035491284070303664, "clip_ratio/high_mean": 0.0013820905624015722, "clip_ratio/low_mean": 0.0009762960398802534, "clip_ratio/low_min": 0.00016453116586490069, "clip_ratio/region_mean": 0.0023583865622640587, "epoch": 0.009146643800057166, "grad_norm": 0.16908271610736847, "learning_rate": 2e-07, "loss": -0.0105, "step": 98 }, { "clip_ratio/high_max": 0.00247835525078699, "clip_ratio/high_mean": 0.0011156640684930608, "clip_ratio/low_mean": 0.001052439238264924, "clip_ratio/low_min": 8.926506234274711e-05, "clip_ratio/region_mean": 0.0021681033103959635, "epoch": 0.00923997690005775, "grad_norm": 0.2073219120502472, "learning_rate": 2e-07, "loss": 0.0423, "step": 99 }, { "clip_ratio/high_max": 0.002534984640078619, "clip_ratio/high_mean": 0.0012455285832402296, "clip_ratio/low_mean": 0.0011557196667126846, "clip_ratio/low_min": 0.00010639028550940566, "clip_ratio/region_mean": 0.002401248253590893, "epoch": 0.009333310000058333, "grad_norm": 0.19808122515678406, "learning_rate": 2e-07, "loss": 0.0224, "step": 100 }, { "clip_ratio/high_max": 0.0024680399073986337, "clip_ratio/high_mean": 0.0010831113177118823, "clip_ratio/low_mean": 0.0012592012317327317, "clip_ratio/low_min": 0.00017032107734848978, "clip_ratio/region_mean": 0.00234231253125472, "epoch": 0.009426643100058916, "grad_norm": 0.19438901543617249, "learning_rate": 2e-07, "loss": 0.0343, "step": 101 }, { "clip_ratio/high_max": 0.0026824113374459557, "clip_ratio/high_mean": 0.0011952072600251995, "clip_ratio/low_mean": 0.0012860456881753635, "clip_ratio/low_min": 0.00022265155894274358, "clip_ratio/region_mean": 0.002481252930010669, "epoch": 0.0095199762000595, "grad_norm": 0.21726852655410767, "learning_rate": 2e-07, "loss": 0.0397, "step": 102 }, { "clip_ratio/high_max": 0.002726063357840758, "clip_ratio/high_mean": 0.0011511658522067592, "clip_ratio/low_mean": 0.0011307314161967952, "clip_ratio/low_min": 0.0001876334790722467, "clip_ratio/region_mean": 0.002281897257489618, "epoch": 0.009613309300060083, "grad_norm": 0.18289121985435486, "learning_rate": 2e-07, "loss": 0.0206, "step": 103 }, { "clip_ratio/high_max": 0.0025665276989457197, "clip_ratio/high_mean": 0.0010960943291138392, "clip_ratio/low_mean": 0.0011031528047169559, "clip_ratio/low_min": 0.0001309262715949444, "clip_ratio/region_mean": 0.0021992471301928163, "epoch": 0.009706642400060667, "grad_norm": 0.18796053528785706, "learning_rate": 2e-07, "loss": 0.0077, "step": 104 }, { "clip_ratio/high_max": 0.002689894405193627, "clip_ratio/high_mean": 0.001097467466024682, "clip_ratio/low_mean": 0.001141650674981065, "clip_ratio/low_min": 0.00011748064662242541, "clip_ratio/region_mean": 0.002239118199213408, "epoch": 0.00979997550006125, "grad_norm": 0.18031446635723114, "learning_rate": 2e-07, "loss": 0.0174, "step": 105 }, { "clip_ratio/high_max": 0.0029994639844517224, "clip_ratio/high_mean": 0.0012087695286027156, "clip_ratio/low_mean": 0.0014255132009566296, "clip_ratio/low_min": 0.00014565794845111668, "clip_ratio/region_mean": 0.002634282733197324, "epoch": 0.009893308600061834, "grad_norm": 0.2437736988067627, "learning_rate": 2e-07, "loss": 0.0285, "step": 106 }, { "clip_ratio/high_max": 0.002619189559482038, "clip_ratio/high_mean": 0.0011916592484340072, "clip_ratio/low_mean": 0.0012260313760634745, "clip_ratio/low_min": 0.00012317240907577798, "clip_ratio/region_mean": 0.002417690571746789, "epoch": 0.009986641700062417, "grad_norm": 0.19500799477100372, "learning_rate": 2e-07, "loss": 0.0023, "step": 107 }, { "clip_ratio/high_max": 0.002805651289236266, "clip_ratio/high_mean": 0.0012430563256202731, "clip_ratio/low_mean": 0.001070772581442725, "clip_ratio/low_min": 0.00013049281460553175, "clip_ratio/region_mean": 0.0023138288815971464, "epoch": 0.010079974800063, "grad_norm": 0.197968527674675, "learning_rate": 2e-07, "loss": 0.0135, "step": 108 }, { "clip_ratio/high_max": 0.002827042633725796, "clip_ratio/high_mean": 0.001283066547330236, "clip_ratio/low_mean": 0.001106328918467625, "clip_ratio/low_min": 0.00013226754708739463, "clip_ratio/region_mean": 0.0023893954712548293, "epoch": 0.010173307900063584, "grad_norm": 0.1845165491104126, "learning_rate": 2e-07, "loss": -0.023, "step": 109 }, { "clip_ratio/high_max": 0.0026356971357017756, "clip_ratio/high_mean": 0.0012250515246705618, "clip_ratio/low_mean": 0.0012772790651069954, "clip_ratio/low_min": 0.00022481046380562475, "clip_ratio/region_mean": 0.0025023304842761718, "epoch": 0.010266641000064166, "grad_norm": 0.2182450294494629, "learning_rate": 2e-07, "loss": 0.0135, "step": 110 }, { "clip_ratio/high_max": 0.003072882151172962, "clip_ratio/high_mean": 0.001378587996441638, "clip_ratio/low_mean": 0.0012424594551703194, "clip_ratio/low_min": 0.00019686294808707316, "clip_ratio/region_mean": 0.002621047489810735, "epoch": 0.010359974100064749, "grad_norm": 0.19440597295761108, "learning_rate": 2e-07, "loss": -0.0176, "step": 111 }, { "clip_ratio/high_max": 0.002709905631490983, "clip_ratio/high_mean": 0.0011709596219589002, "clip_ratio/low_mean": 0.0012642902474908624, "clip_ratio/low_min": 0.00017267084967897972, "clip_ratio/region_mean": 0.0024352498949156143, "epoch": 0.010453307200065333, "grad_norm": 0.18936507403850555, "learning_rate": 2e-07, "loss": 0.0282, "step": 112 }, { "clip_ratio/high_max": 0.002843185699020978, "clip_ratio/high_mean": 0.0012608690267370548, "clip_ratio/low_mean": 0.0013796827042824589, "clip_ratio/low_min": 0.0002849881884685601, "clip_ratio/region_mean": 0.0026405517710372806, "epoch": 0.010546640300065916, "grad_norm": 0.2426442950963974, "learning_rate": 2e-07, "loss": 0.0825, "step": 113 }, { "clip_ratio/high_max": 0.002851191529771313, "clip_ratio/high_mean": 0.0014006964629516006, "clip_ratio/low_mean": 0.0012772772097378038, "clip_ratio/low_min": 0.0002610153442219598, "clip_ratio/region_mean": 0.0026779736508615315, "epoch": 0.0106399734000665, "grad_norm": 0.24099399149417877, "learning_rate": 2e-07, "loss": 0.0139, "step": 114 }, { "clip_ratio/high_max": 0.0023237135137605947, "clip_ratio/high_mean": 0.0010651344891812187, "clip_ratio/low_mean": 0.0012888269666291308, "clip_ratio/low_min": 0.00019807125499937683, "clip_ratio/region_mean": 0.0023539614558103494, "epoch": 0.010733306500067083, "grad_norm": 0.18726374208927155, "learning_rate": 2e-07, "loss": 0.0387, "step": 115 }, { "clip_ratio/high_max": 0.003235339478123933, "clip_ratio/high_mean": 0.0015591698756907135, "clip_ratio/low_mean": 0.0009970408154913457, "clip_ratio/low_min": 8.346320646523964e-05, "clip_ratio/region_mean": 0.002556210725742858, "epoch": 0.010826639600067666, "grad_norm": 0.16004793345928192, "learning_rate": 2e-07, "loss": -0.0522, "step": 116 }, { "clip_ratio/high_max": 0.0033125573536381125, "clip_ratio/high_mean": 0.0014769402769161388, "clip_ratio/low_mean": 0.0013356961062527262, "clip_ratio/low_min": 0.00014130645104160067, "clip_ratio/region_mean": 0.0028126363977207802, "epoch": 0.01091997270006825, "grad_norm": 0.24664698541164398, "learning_rate": 2e-07, "loss": 0.0161, "step": 117 }, { "clip_ratio/high_max": 0.0027916423860006034, "clip_ratio/high_mean": 0.001320090850640554, "clip_ratio/low_mean": 0.0012178942088212352, "clip_ratio/low_min": 0.00011255316167080309, "clip_ratio/region_mean": 0.0025379850339959376, "epoch": 0.011013305800068833, "grad_norm": 0.23122738301753998, "learning_rate": 2e-07, "loss": 0.0094, "step": 118 }, { "clip_ratio/high_max": 0.003184327499184292, "clip_ratio/high_mean": 0.0014856331144983415, "clip_ratio/low_mean": 0.0011788228257501032, "clip_ratio/low_min": 0.00020759128165082075, "clip_ratio/region_mean": 0.0026644559839041904, "epoch": 0.011106638900069417, "grad_norm": 0.1840520203113556, "learning_rate": 2e-07, "loss": -0.0093, "step": 119 }, { "clip_ratio/high_max": 0.0029894941035308875, "clip_ratio/high_mean": 0.0013333193819562439, "clip_ratio/low_mean": 0.0012309773919696454, "clip_ratio/low_min": 0.00013832677723257802, "clip_ratio/region_mean": 0.0025642967157182284, "epoch": 0.01119997200007, "grad_norm": 0.19213904440402985, "learning_rate": 2e-07, "loss": -0.0044, "step": 120 }, { "clip_ratio/high_max": 0.002906564681325108, "clip_ratio/high_mean": 0.0013738416928390507, "clip_ratio/low_mean": 0.0012525788151833694, "clip_ratio/low_min": 0.0001482012066844618, "clip_ratio/region_mean": 0.0026264205807819963, "epoch": 0.011293305100070584, "grad_norm": 0.2320069968700409, "learning_rate": 2e-07, "loss": 0.0125, "step": 121 }, { "clip_ratio/high_max": 0.0030602143888245337, "clip_ratio/high_mean": 0.0014149211710901, "clip_ratio/low_mean": 0.0012173053855804028, "clip_ratio/low_min": 0.0001835508919612039, "clip_ratio/region_mean": 0.0026322265912313014, "epoch": 0.011386638200071167, "grad_norm": 0.21284720301628113, "learning_rate": 2e-07, "loss": -0.0294, "step": 122 }, { "clip_ratio/high_max": 0.0031788112537469715, "clip_ratio/high_mean": 0.0014349647535709664, "clip_ratio/low_mean": 0.0012942128341819625, "clip_ratio/low_min": 0.000158605927026656, "clip_ratio/region_mean": 0.0027291775913909078, "epoch": 0.011479971300071749, "grad_norm": 0.2327825278043747, "learning_rate": 2e-07, "loss": 0.0084, "step": 123 }, { "clip_ratio/high_max": 0.003305037462268956, "clip_ratio/high_mean": 0.0013898545439587906, "clip_ratio/low_mean": 0.0014036611064511817, "clip_ratio/low_min": 0.00019112394238618435, "clip_ratio/region_mean": 0.0027935156322200783, "epoch": 0.011573304400072332, "grad_norm": 0.2331952601671219, "learning_rate": 2e-07, "loss": 0.0311, "step": 124 }, { "clip_ratio/high_max": 0.003134739337838255, "clip_ratio/high_mean": 0.0013305547217896674, "clip_ratio/low_mean": 0.0013800325832562521, "clip_ratio/low_min": 0.0002458781782479491, "clip_ratio/region_mean": 0.0027105873377877288, "epoch": 0.011666637500072916, "grad_norm": 0.17725017666816711, "learning_rate": 2e-07, "loss": 0.011, "step": 125 }, { "clip_ratio/high_max": 0.00263468536286382, "clip_ratio/high_mean": 0.0011493472047732212, "clip_ratio/low_mean": 0.0013665114602190442, "clip_ratio/low_min": 0.00023296094877878204, "clip_ratio/region_mean": 0.0025158586868201382, "epoch": 0.0117599706000735, "grad_norm": 0.21296899020671844, "learning_rate": 2e-07, "loss": -0.0004, "step": 126 }, { "clip_ratio/high_max": 0.002760658295301255, "clip_ratio/high_mean": 0.0012749816851282958, "clip_ratio/low_mean": 0.001276443770620972, "clip_ratio/low_min": 0.00023521079128840938, "clip_ratio/region_mean": 0.002551425430283416, "epoch": 0.011853303700074083, "grad_norm": 0.17043305933475494, "learning_rate": 2e-07, "loss": 0.0375, "step": 127 }, { "clip_ratio/high_max": 0.0031045092109707184, "clip_ratio/high_mean": 0.0014013611144036986, "clip_ratio/low_mean": 0.001528628639789531, "clip_ratio/low_min": 0.0002908705291702063, "clip_ratio/region_mean": 0.002929989743279293, "epoch": 0.011946636800074666, "grad_norm": 0.22638551890850067, "learning_rate": 2e-07, "loss": -0.0057, "step": 128 }, { "clip_ratio/high_max": 0.00232501098071225, "clip_ratio/high_mean": 0.0009063930956472177, "clip_ratio/low_mean": 0.0006178411276778206, "clip_ratio/low_min": 4.4985728891333565e-05, "clip_ratio/region_mean": 0.0015242342196870595, "completions/clipped_ratio": 0.014003208705357095, "completions/max_length": 4096.0, "completions/max_terminated_length": 4089.0, "completions/mean_length": 612.0515747070312, "completions/mean_terminated_length": 562.572265625, "completions/min_length": 4.0, "completions/min_terminated_length": 4.0, "epoch": 0.01203996990007525, "grad_norm": 0.11548373103141785, "learning_rate": 2e-07, "loss": -0.0007, "num_tokens": 162088634.0, "reward": 0.557320773601532, "reward_std": 0.20648711919784546, "rewards/simpleverify_reward/mean": 0.5573207139968872, "rewards/simpleverify_reward/std": 0.4967055320739746, "step": 129 }, { "clip_ratio/high_max": 0.0018056633925880305, "clip_ratio/high_mean": 0.0007321788743865909, "clip_ratio/low_mean": 0.0005952819738013204, "clip_ratio/low_min": 2.6674157197703607e-05, "clip_ratio/region_mean": 0.00132746088274871, "epoch": 0.012133303000075833, "grad_norm": 0.10720103979110718, "learning_rate": 2e-07, "loss": 0.0511, "step": 130 }, { "clip_ratio/high_max": 0.001899360511742998, "clip_ratio/high_mean": 0.000749359508517955, "clip_ratio/low_mean": 0.0005399876754381694, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012893471721326932, "epoch": 0.012226636100076417, "grad_norm": 0.11088676750659943, "learning_rate": 2e-07, "loss": 0.0457, "step": 131 }, { "clip_ratio/high_max": 0.0019340096077939961, "clip_ratio/high_mean": 0.0008524430086254142, "clip_ratio/low_mean": 0.0005190552565181861, "clip_ratio/low_min": 5.66173875995446e-05, "clip_ratio/region_mean": 0.0013714982669625897, "epoch": 0.012319969200077, "grad_norm": 0.12459676712751389, "learning_rate": 2e-07, "loss": -0.0413, "step": 132 }, { "clip_ratio/high_max": 0.0020766235393239185, "clip_ratio/high_mean": 0.0008440274596068775, "clip_ratio/low_mean": 0.0005440281615847198, "clip_ratio/low_min": 2.679387034731917e-05, "clip_ratio/region_mean": 0.0013880556398362387, "epoch": 0.012413302300077584, "grad_norm": 0.1213875338435173, "learning_rate": 2e-07, "loss": 0.016, "step": 133 }, { "clip_ratio/high_max": 0.0019092861621174961, "clip_ratio/high_mean": 0.0008111224851745646, "clip_ratio/low_mean": 0.0006635082054344821, "clip_ratio/low_min": 5.902294378756778e-05, "clip_ratio/region_mean": 0.0014746306696906686, "epoch": 0.012506635400078167, "grad_norm": 0.12240087240934372, "learning_rate": 2e-07, "loss": 0.0521, "step": 134 }, { "clip_ratio/high_max": 0.0022147839845274575, "clip_ratio/high_mean": 0.0009147003984253388, "clip_ratio/low_mean": 0.0005792223582830047, "clip_ratio/low_min": 3.486750210868195e-05, "clip_ratio/region_mean": 0.0014939227621653117, "epoch": 0.01259996850007875, "grad_norm": 0.12430458515882492, "learning_rate": 2e-07, "loss": 0.0088, "step": 135 }, { "clip_ratio/high_max": 0.0020741950575029477, "clip_ratio/high_mean": 0.0008437033011432504, "clip_ratio/low_mean": 0.0006329828993330011, "clip_ratio/low_min": 7.527784418925876e-05, "clip_ratio/region_mean": 0.0014766861750104, "epoch": 0.012693301600079332, "grad_norm": 0.1303962767124176, "learning_rate": 2e-07, "loss": 0.0376, "step": 136 }, { "clip_ratio/high_max": 0.002193069230997935, "clip_ratio/high_mean": 0.0008065390229603508, "clip_ratio/low_mean": 0.000517238368956896, "clip_ratio/low_min": 4.489570528676268e-05, "clip_ratio/region_mean": 0.0013237773891887628, "epoch": 0.012786634700079916, "grad_norm": 0.11874948441982269, "learning_rate": 2e-07, "loss": 0.0032, "step": 137 }, { "clip_ratio/high_max": 0.0021708905915147625, "clip_ratio/high_mean": 0.0007834745156287681, "clip_ratio/low_mean": 0.0006067644280847162, "clip_ratio/low_min": 7.887519996074843e-05, "clip_ratio/region_mean": 0.001390238932799548, "epoch": 0.0128799678000805, "grad_norm": 0.12342286109924316, "learning_rate": 2e-07, "loss": 0.0361, "step": 138 }, { "clip_ratio/high_max": 0.0021549754237639718, "clip_ratio/high_mean": 0.000825220446131425, "clip_ratio/low_mean": 0.0006075554483686574, "clip_ratio/low_min": 3.948605626646895e-05, "clip_ratio/region_mean": 0.0014327758763101883, "epoch": 0.012973300900081083, "grad_norm": 0.11775846779346466, "learning_rate": 2e-07, "loss": 0.023, "step": 139 }, { "clip_ratio/high_max": 0.00231831432756735, "clip_ratio/high_mean": 0.0009050001281138975, "clip_ratio/low_mean": 0.0006249633875086147, "clip_ratio/low_min": 3.1702074920758605e-05, "clip_ratio/region_mean": 0.0015299634833354503, "epoch": 0.013066634000081666, "grad_norm": 0.12300993502140045, "learning_rate": 2e-07, "loss": 0.0158, "step": 140 }, { "clip_ratio/high_max": 0.002264536487928126, "clip_ratio/high_mean": 0.0009316449559264584, "clip_ratio/low_mean": 0.0005919352079217788, "clip_ratio/low_min": 5.5487436839030124e-05, "clip_ratio/region_mean": 0.001523580176581163, "epoch": 0.01315996710008225, "grad_norm": 0.1228451356291771, "learning_rate": 2e-07, "loss": -0.0034, "step": 141 }, { "clip_ratio/high_max": 0.001788812551239971, "clip_ratio/high_mean": 0.000803649241788662, "clip_ratio/low_mean": 0.0005823452993354294, "clip_ratio/low_min": 3.0763343602302484e-05, "clip_ratio/region_mean": 0.0013859945756848902, "epoch": 0.013253300200082833, "grad_norm": 0.13015413284301758, "learning_rate": 2e-07, "loss": 0.0245, "step": 142 }, { "clip_ratio/high_max": 0.0020430958247743547, "clip_ratio/high_mean": 0.0008549613594368566, "clip_ratio/low_mean": 0.000667634887577151, "clip_ratio/low_min": 6.104574094933923e-05, "clip_ratio/region_mean": 0.0015225962197291665, "epoch": 0.013346633300083417, "grad_norm": 0.1261974275112152, "learning_rate": 2e-07, "loss": 0.0207, "step": 143 }, { "clip_ratio/high_max": 0.00218324024535832, "clip_ratio/high_mean": 0.0009062221979547758, "clip_ratio/low_mean": 0.0006309419629815238, "clip_ratio/low_min": 3.0008367502887268e-05, "clip_ratio/region_mean": 0.0015371641857200302, "epoch": 0.013439966400084, "grad_norm": 0.13090796768665314, "learning_rate": 2e-07, "loss": -0.0454, "step": 144 }, { "clip_ratio/high_max": 0.0018428340117679909, "clip_ratio/high_mean": 0.0008301965517603094, "clip_ratio/low_mean": 0.0006295781695371261, "clip_ratio/low_min": 7.841734441171866e-05, "clip_ratio/region_mean": 0.0014597747212974355, "epoch": 0.013533299500084583, "grad_norm": 0.12244794517755508, "learning_rate": 2e-07, "loss": 0.0488, "step": 145 }, { "clip_ratio/high_max": 0.0023128381071728654, "clip_ratio/high_mean": 0.0009522552863927558, "clip_ratio/low_mean": 0.0005213593412918271, "clip_ratio/low_min": 4.9851912990561686e-05, "clip_ratio/region_mean": 0.0014736146149516571, "epoch": 0.013626632600085167, "grad_norm": 0.12503549456596375, "learning_rate": 2e-07, "loss": 0.0071, "step": 146 }, { "clip_ratio/high_max": 0.001932383507664781, "clip_ratio/high_mean": 0.0007960152215673588, "clip_ratio/low_mean": 0.0006826122607890284, "clip_ratio/low_min": 4.7681163778179325e-05, "clip_ratio/region_mean": 0.0014786274623475038, "epoch": 0.01371996570008575, "grad_norm": 0.1202053651213646, "learning_rate": 2e-07, "loss": 0.0391, "step": 147 }, { "clip_ratio/high_max": 0.0017457220310461707, "clip_ratio/high_mean": 0.0007050544427329442, "clip_ratio/low_mean": 0.0005960238204352208, "clip_ratio/low_min": 4.33173481724225e-05, "clip_ratio/region_mean": 0.0013010782822675537, "epoch": 0.013813298800086334, "grad_norm": 0.11331025511026382, "learning_rate": 2e-07, "loss": 0.0495, "step": 148 }, { "clip_ratio/high_max": 0.0022634210690739565, "clip_ratio/high_mean": 0.0008236529283749405, "clip_ratio/low_mean": 0.0006043082776159281, "clip_ratio/low_min": 3.8978926568233874e-05, "clip_ratio/region_mean": 0.0014279611787060276, "epoch": 0.013906631900086916, "grad_norm": 0.12493271380662918, "learning_rate": 2e-07, "loss": 0.0366, "step": 149 }, { "clip_ratio/high_max": 0.001948405279108556, "clip_ratio/high_mean": 0.0008463402809866238, "clip_ratio/low_mean": 0.0006194850448082434, "clip_ratio/low_min": 4.4843030991614796e-05, "clip_ratio/region_mean": 0.0014658253494417295, "epoch": 0.013999965000087499, "grad_norm": 0.11433596163988113, "learning_rate": 2e-07, "loss": 0.0382, "step": 150 }, { "clip_ratio/high_max": 0.001643922398216091, "clip_ratio/high_mean": 0.0007427159853250487, "clip_ratio/low_mean": 0.00062657557282364, "clip_ratio/low_min": 3.7957225686113816e-05, "clip_ratio/region_mean": 0.0013692915454157628, "epoch": 0.014093298100088083, "grad_norm": 0.1152142584323883, "learning_rate": 2e-07, "loss": 0.0254, "step": 151 }, { "clip_ratio/high_max": 0.002095764662954025, "clip_ratio/high_mean": 0.0008691585862834472, "clip_ratio/low_mean": 0.0004901157299173065, "clip_ratio/low_min": 2.459009465383133e-05, "clip_ratio/region_mean": 0.0013592743343906477, "epoch": 0.014186631200088666, "grad_norm": 0.12261440604925156, "learning_rate": 2e-07, "loss": 0.0003, "step": 152 }, { "clip_ratio/high_max": 0.001958136970642954, "clip_ratio/high_mean": 0.0007773633205943042, "clip_ratio/low_mean": 0.0006693352206639247, "clip_ratio/low_min": 3.299753188912291e-05, "clip_ratio/region_mean": 0.0014466985558101442, "epoch": 0.01427996430008925, "grad_norm": 0.12206938117742538, "learning_rate": 2e-07, "loss": 0.0449, "step": 153 }, { "clip_ratio/high_max": 0.0017215214429597836, "clip_ratio/high_mean": 0.0006703513117827242, "clip_ratio/low_mean": 0.0006140235982456943, "clip_ratio/low_min": 7.891191398812225e-05, "clip_ratio/region_mean": 0.001284374902752461, "epoch": 0.014373297400089833, "grad_norm": 0.12928135693073273, "learning_rate": 2e-07, "loss": 0.0666, "step": 154 }, { "clip_ratio/high_max": 0.0025190896849380806, "clip_ratio/high_mean": 0.0010315882609575056, "clip_ratio/low_mean": 0.000549425041754148, "clip_ratio/low_min": 4.4392332711140625e-05, "clip_ratio/region_mean": 0.00158101328270277, "epoch": 0.014466630500090416, "grad_norm": 0.11839871853590012, "learning_rate": 2e-07, "loss": 0.0097, "step": 155 }, { "clip_ratio/high_max": 0.002192861051298678, "clip_ratio/high_mean": 0.0007855405292502837, "clip_ratio/low_mean": 0.0005968113728158642, "clip_ratio/low_min": 2.736394071689574e-05, "clip_ratio/region_mean": 0.001382351911161095, "epoch": 0.014559963600091, "grad_norm": 0.12960529327392578, "learning_rate": 2e-07, "loss": 0.0402, "step": 156 }, { "clip_ratio/high_max": 0.0022708648903062567, "clip_ratio/high_mean": 0.0008667979527672287, "clip_ratio/low_mean": 0.0005383280395108159, "clip_ratio/low_min": 2.439804211462615e-05, "clip_ratio/region_mean": 0.001405126036843285, "epoch": 0.014653296700091583, "grad_norm": 0.11870219558477402, "learning_rate": 2e-07, "loss": -0.012, "step": 157 }, { "clip_ratio/high_max": 0.001813713042793097, "clip_ratio/high_mean": 0.0007951536545078852, "clip_ratio/low_mean": 0.0005980955738777993, "clip_ratio/low_min": 3.533307335601421e-05, "clip_ratio/region_mean": 0.001393249214743264, "epoch": 0.014746629800092167, "grad_norm": 0.17155663669109344, "learning_rate": 2e-07, "loss": 0.0321, "step": 158 }, { "clip_ratio/high_max": 0.0020153238510829397, "clip_ratio/high_mean": 0.0009111322378885234, "clip_ratio/low_mean": 0.0006379829883371713, "clip_ratio/low_min": 8.379660630453145e-05, "clip_ratio/region_mean": 0.0015491152043978218, "epoch": 0.01483996290009275, "grad_norm": 0.12098103761672974, "learning_rate": 2e-07, "loss": 0.0349, "step": 159 }, { "clip_ratio/high_max": 0.002073839037620928, "clip_ratio/high_mean": 0.0007979708934726659, "clip_ratio/low_mean": 0.00058883255951514, "clip_ratio/low_min": 2.1562566871580202e-05, "clip_ratio/region_mean": 0.0013868034657207318, "epoch": 0.014933296000093334, "grad_norm": 0.12484041601419449, "learning_rate": 2e-07, "loss": 0.0343, "step": 160 }, { "clip_ratio/high_max": 0.0020564257065416314, "clip_ratio/high_mean": 0.0007869898327044211, "clip_ratio/low_mean": 0.0006445478393288795, "clip_ratio/low_min": 5.639357368636411e-05, "clip_ratio/region_mean": 0.0014315376720333006, "epoch": 0.015026629100093917, "grad_norm": 0.12660476565361023, "learning_rate": 2e-07, "loss": -0.0019, "step": 161 }, { "clip_ratio/high_max": 0.001999727690417785, "clip_ratio/high_mean": 0.000846094626467675, "clip_ratio/low_mean": 0.0005975343974569114, "clip_ratio/low_min": 4.258763010511757e-05, "clip_ratio/region_mean": 0.0014436290512094274, "epoch": 0.015119962200094499, "grad_norm": 0.13701540231704712, "learning_rate": 2e-07, "loss": 0.0211, "step": 162 }, { "clip_ratio/high_max": 0.0019190769125998486, "clip_ratio/high_mean": 0.000797158378190943, "clip_ratio/low_mean": 0.0005433618653114536, "clip_ratio/low_min": 2.7918146315641934e-05, "clip_ratio/region_mean": 0.001340520229859976, "epoch": 0.015213295300095082, "grad_norm": 0.11606060713529587, "learning_rate": 2e-07, "loss": 0.0183, "step": 163 }, { "clip_ratio/high_max": 0.002203612013545353, "clip_ratio/high_mean": 0.0008608292901044479, "clip_ratio/low_mean": 0.0006327167684503365, "clip_ratio/low_min": 2.1316264792403672e-05, "clip_ratio/region_mean": 0.0014935460130800493, "epoch": 0.015306628400095666, "grad_norm": 0.11794206500053406, "learning_rate": 2e-07, "loss": 0.0264, "step": 164 }, { "clip_ratio/high_max": 0.0024271376241813414, "clip_ratio/high_mean": 0.0008782749355304986, "clip_ratio/low_mean": 0.0006297012096183607, "clip_ratio/low_min": 6.613056939386297e-06, "clip_ratio/region_mean": 0.0015079761542438064, "epoch": 0.01539996150009625, "grad_norm": 0.11385703831911087, "learning_rate": 2e-07, "loss": -0.0237, "step": 165 }, { "clip_ratio/high_max": 0.001960954876267351, "clip_ratio/high_mean": 0.0008519470029568765, "clip_ratio/low_mean": 0.0005204352673899848, "clip_ratio/low_min": 4.249007997714216e-05, "clip_ratio/region_mean": 0.0013723822885367554, "epoch": 0.015493294600096833, "grad_norm": 0.1304963231086731, "learning_rate": 2e-07, "loss": -0.0087, "step": 166 }, { "clip_ratio/high_max": 0.002020803585764952, "clip_ratio/high_mean": 0.00089911196664616, "clip_ratio/low_mean": 0.0005822664370498387, "clip_ratio/low_min": 3.342315949339536e-05, "clip_ratio/region_mean": 0.001481378436437808, "epoch": 0.015586627700097416, "grad_norm": 0.12180845439434052, "learning_rate": 2e-07, "loss": 0.0062, "step": 167 }, { "clip_ratio/high_max": 0.002204894812166458, "clip_ratio/high_mean": 0.0008095046287053265, "clip_ratio/low_mean": 0.0005879498394278926, "clip_ratio/low_min": 4.449742209544638e-05, "clip_ratio/region_mean": 0.0013974544708617032, "epoch": 0.015679960800098, "grad_norm": 0.1236138641834259, "learning_rate": 2e-07, "loss": -0.0035, "step": 168 }, { "clip_ratio/high_max": 0.002182902149797883, "clip_ratio/high_mean": 0.0009125107626459794, "clip_ratio/low_mean": 0.0005880029348190874, "clip_ratio/low_min": 8.073392109508859e-05, "clip_ratio/region_mean": 0.0015005137320258655, "epoch": 0.015773293900098583, "grad_norm": 0.11757367104291916, "learning_rate": 2e-07, "loss": -0.0114, "step": 169 }, { "clip_ratio/high_max": 0.0018806407606462017, "clip_ratio/high_mean": 0.0008008478471310809, "clip_ratio/low_mean": 0.0005428344657048001, "clip_ratio/low_min": 4.654940858017653e-05, "clip_ratio/region_mean": 0.0013436823355732486, "epoch": 0.015866627000099165, "grad_norm": 0.12251515686511993, "learning_rate": 2e-07, "loss": 0.0393, "step": 170 }, { "clip_ratio/high_max": 0.002171648222429212, "clip_ratio/high_mean": 0.0009175659051834373, "clip_ratio/low_mean": 0.0007178434643719811, "clip_ratio/low_min": 4.9187318836629856e-05, "clip_ratio/region_mean": 0.0016354093968402594, "epoch": 0.01595996010009975, "grad_norm": 0.12177026271820068, "learning_rate": 2e-07, "loss": 0.043, "step": 171 }, { "clip_ratio/high_max": 0.002654982323292643, "clip_ratio/high_mean": 0.0009469276774325408, "clip_ratio/low_mean": 0.0006065974885132164, "clip_ratio/low_min": 3.3059307952498784e-05, "clip_ratio/region_mean": 0.0015535251477558631, "epoch": 0.016053293200100332, "grad_norm": 0.13682252168655396, "learning_rate": 2e-07, "loss": 0.0542, "step": 172 }, { "clip_ratio/high_max": 0.0023487524631491397, "clip_ratio/high_mean": 0.0009592491569492267, "clip_ratio/low_mean": 0.000556267376850883, "clip_ratio/low_min": 1.2698090358753689e-05, "clip_ratio/region_mean": 0.0015155165310716256, "epoch": 0.016146626300100917, "grad_norm": 0.12156112492084503, "learning_rate": 2e-07, "loss": 0.024, "step": 173 }, { "clip_ratio/high_max": 0.0018505962652852759, "clip_ratio/high_mean": 0.0008832064231683034, "clip_ratio/low_mean": 0.0005731603655476647, "clip_ratio/low_min": 6.192695309437113e-05, "clip_ratio/region_mean": 0.0014563667537004221, "epoch": 0.0162399594001015, "grad_norm": 0.12489704042673111, "learning_rate": 2e-07, "loss": 0.0087, "step": 174 }, { "clip_ratio/high_max": 0.002171798922063317, "clip_ratio/high_mean": 0.0008508539922331693, "clip_ratio/low_mean": 0.0006780292915209429, "clip_ratio/low_min": 4.7501392145932186e-05, "clip_ratio/region_mean": 0.001528883287392091, "epoch": 0.016333292500102084, "grad_norm": 0.13400530815124512, "learning_rate": 2e-07, "loss": 0.0532, "step": 175 }, { "clip_ratio/high_max": 0.002305213231011294, "clip_ratio/high_mean": 0.0008562942002754426, "clip_ratio/low_mean": 0.0005815803388031782, "clip_ratio/low_min": 6.998276694503147e-05, "clip_ratio/region_mean": 0.0014378745509020519, "epoch": 0.016426625600102666, "grad_norm": 0.11236870288848877, "learning_rate": 2e-07, "loss": 0.043, "step": 176 }, { "clip_ratio/high_max": 0.0018417850660625845, "clip_ratio/high_mean": 0.000804103725386085, "clip_ratio/low_mean": 0.0007310412802326027, "clip_ratio/low_min": 3.313874276500428e-05, "clip_ratio/region_mean": 0.0015351450128946453, "epoch": 0.01651995870010325, "grad_norm": 0.12231137603521347, "learning_rate": 2e-07, "loss": 0.0236, "step": 177 }, { "clip_ratio/high_max": 0.002042430314759258, "clip_ratio/high_mean": 0.0009092611999221845, "clip_ratio/low_mean": 0.000634656498732511, "clip_ratio/low_min": 6.356720950861927e-05, "clip_ratio/region_mean": 0.0015439177223015577, "epoch": 0.016613291800103833, "grad_norm": 0.1261111944913864, "learning_rate": 2e-07, "loss": 0.0479, "step": 178 }, { "clip_ratio/high_max": 0.002036279100138927, "clip_ratio/high_mean": 0.0008984946543932892, "clip_ratio/low_mean": 0.0005924105644226074, "clip_ratio/low_min": 5.5581668675586116e-05, "clip_ratio/region_mean": 0.0014909052333678119, "epoch": 0.016706624900104418, "grad_norm": 0.11070719361305237, "learning_rate": 2e-07, "loss": 0.0142, "step": 179 }, { "clip_ratio/high_max": 0.0020525919608189724, "clip_ratio/high_mean": 0.0008605773182353005, "clip_ratio/low_mean": 0.0006487906503025442, "clip_ratio/low_min": 2.7856665838044137e-05, "clip_ratio/region_mean": 0.0015093679394340143, "epoch": 0.016799958000105, "grad_norm": 0.12303735315799713, "learning_rate": 2e-07, "loss": 0.0351, "step": 180 }, { "clip_ratio/high_max": 0.002129068205249496, "clip_ratio/high_mean": 0.0009720463349367492, "clip_ratio/low_mean": 0.0005559332394113881, "clip_ratio/low_min": 5.655848144670017e-05, "clip_ratio/region_mean": 0.001527979602542473, "epoch": 0.016893291100105585, "grad_norm": 0.11540050804615021, "learning_rate": 2e-07, "loss": -0.0366, "step": 181 }, { "clip_ratio/high_max": 0.0020038939765072428, "clip_ratio/high_mean": 0.0007371270830844878, "clip_ratio/low_mean": 0.0005834991679876111, "clip_ratio/low_min": 1.9800412701442838e-05, "clip_ratio/region_mean": 0.0013206262374296784, "epoch": 0.016986624200106167, "grad_norm": 0.1305420696735382, "learning_rate": 2e-07, "loss": 0.0584, "step": 182 }, { "clip_ratio/high_max": 0.0018414657206449192, "clip_ratio/high_mean": 0.0008148505439748988, "clip_ratio/low_mean": 0.0006569972038050764, "clip_ratio/low_min": 5.621243781206431e-05, "clip_ratio/region_mean": 0.0014718477468704805, "epoch": 0.017079957300106748, "grad_norm": 0.1262456476688385, "learning_rate": 2e-07, "loss": 0.0285, "step": 183 }, { "clip_ratio/high_max": 0.0019878515377058648, "clip_ratio/high_mean": 0.0008784572219155962, "clip_ratio/low_mean": 0.0006598397776542697, "clip_ratio/low_min": 3.8797970773885027e-05, "clip_ratio/region_mean": 0.001538297008664813, "epoch": 0.017173290400107333, "grad_norm": 0.13227629661560059, "learning_rate": 2e-07, "loss": 0.0264, "step": 184 }, { "clip_ratio/high_max": 0.0021797280860482715, "clip_ratio/high_mean": 0.0008352215572813293, "clip_ratio/low_mean": 0.0006388653982867254, "clip_ratio/low_min": 7.673253639950417e-05, "clip_ratio/region_mean": 0.001474086948292097, "epoch": 0.017266623500107915, "grad_norm": 0.12080742418766022, "learning_rate": 2e-07, "loss": 0.0371, "step": 185 }, { "clip_ratio/high_max": 0.0018475940742064267, "clip_ratio/high_mean": 0.0007771433411107864, "clip_ratio/low_mean": 0.0007134704446798423, "clip_ratio/low_min": 0.00010657935035851551, "clip_ratio/region_mean": 0.001490613791247597, "epoch": 0.0173599566001085, "grad_norm": 0.12375640124082565, "learning_rate": 2e-07, "loss": 0.0561, "step": 186 }, { "clip_ratio/high_max": 0.0020230036607244983, "clip_ratio/high_mean": 0.0009091730371437734, "clip_ratio/low_mean": 0.0006899904965393944, "clip_ratio/low_min": 2.9107782211212907e-05, "clip_ratio/region_mean": 0.0015991635445971042, "epoch": 0.017453289700109082, "grad_norm": 0.12571005523204803, "learning_rate": 2e-07, "loss": 0.0092, "step": 187 }, { "clip_ratio/high_max": 0.002062592528091045, "clip_ratio/high_mean": 0.0008402757284784457, "clip_ratio/low_mean": 0.0005597691742877942, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014000449009472504, "epoch": 0.017546622800109667, "grad_norm": 0.11693722754716873, "learning_rate": 2e-07, "loss": 0.0278, "step": 188 }, { "clip_ratio/high_max": 0.0019465201330604032, "clip_ratio/high_mean": 0.0009197594208671944, "clip_ratio/low_mean": 0.0006881533590785693, "clip_ratio/low_min": 9.600930297892774e-05, "clip_ratio/region_mean": 0.0016079127744887955, "epoch": 0.01763995590011025, "grad_norm": 0.12828868627548218, "learning_rate": 2e-07, "loss": -0.0006, "step": 189 }, { "clip_ratio/high_max": 0.0020874080291832797, "clip_ratio/high_mean": 0.0007725537889200496, "clip_ratio/low_mean": 0.0005197366672291537, "clip_ratio/low_min": 3.6643272323999554e-05, "clip_ratio/region_mean": 0.001292290435230825, "epoch": 0.017733289000110834, "grad_norm": 0.10893333703279495, "learning_rate": 2e-07, "loss": 0.0169, "step": 190 }, { "clip_ratio/high_max": 0.002281424618558958, "clip_ratio/high_mean": 0.0009387596983287949, "clip_ratio/low_mean": 0.0006478988307208056, "clip_ratio/low_min": 3.0447034077951685e-05, "clip_ratio/region_mean": 0.001586658549058484, "epoch": 0.017826622100111416, "grad_norm": 0.12853072583675385, "learning_rate": 2e-07, "loss": 0.007, "step": 191 }, { "clip_ratio/high_max": 0.0021231984719634056, "clip_ratio/high_mean": 0.000808759628853295, "clip_ratio/low_mean": 0.0007177643765317043, "clip_ratio/low_min": 4.869007261731895e-05, "clip_ratio/region_mean": 0.0015265239671862219, "epoch": 0.017919955200112, "grad_norm": 0.11824563145637512, "learning_rate": 2e-07, "loss": 0.0364, "step": 192 }, { "clip_ratio/high_max": 0.002120645051036263, "clip_ratio/high_mean": 0.0008683730820848723, "clip_ratio/low_mean": 0.0006714710198139073, "clip_ratio/low_min": 5.060790681454819e-05, "clip_ratio/region_mean": 0.0015398441319121048, "epoch": 0.018013288300112583, "grad_norm": 0.12373674660921097, "learning_rate": 2e-07, "loss": 0.0492, "step": 193 }, { "clip_ratio/high_max": 0.001904488555737771, "clip_ratio/high_mean": 0.0008250911832874408, "clip_ratio/low_mean": 0.0007683680996706244, "clip_ratio/low_min": 0.00011069276388298022, "clip_ratio/region_mean": 0.0015934593175188638, "epoch": 0.018106621400113165, "grad_norm": 0.12099580466747284, "learning_rate": 2e-07, "loss": 0.0522, "step": 194 }, { "clip_ratio/high_max": 0.0019143188110319898, "clip_ratio/high_mean": 0.0008691905422892887, "clip_ratio/low_mean": 0.0007370821895165136, "clip_ratio/low_min": 5.501925807038788e-05, "clip_ratio/region_mean": 0.0016062727518146858, "epoch": 0.01819995450011375, "grad_norm": 0.1140805333852768, "learning_rate": 2e-07, "loss": 0.0469, "step": 195 }, { "clip_ratio/high_max": 0.0023856419502408244, "clip_ratio/high_mean": 0.0009381688960274914, "clip_ratio/low_mean": 0.0006135028197604697, "clip_ratio/low_min": 1.7375590687151998e-05, "clip_ratio/region_mean": 0.0015516717030550353, "epoch": 0.01829328760011433, "grad_norm": 0.12470970302820206, "learning_rate": 2e-07, "loss": -0.0014, "step": 196 }, { "clip_ratio/high_max": 0.0019531519392330665, "clip_ratio/high_mean": 0.0008460121443931712, "clip_ratio/low_mean": 0.0005158997146281763, "clip_ratio/low_min": 1.3069845408608671e-05, "clip_ratio/region_mean": 0.001361911854473874, "epoch": 0.018386620700114917, "grad_norm": 0.11249268800020218, "learning_rate": 2e-07, "loss": 0.012, "step": 197 }, { "clip_ratio/high_max": 0.0018231346657557879, "clip_ratio/high_mean": 0.000771814208746946, "clip_ratio/low_mean": 0.0006186212776810862, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013904354855185375, "epoch": 0.0184799538001155, "grad_norm": 0.10832629352807999, "learning_rate": 2e-07, "loss": -0.0003, "step": 198 }, { "clip_ratio/high_max": 0.0018588646562420763, "clip_ratio/high_mean": 0.0008382239229831612, "clip_ratio/low_mean": 0.0006132038415671559, "clip_ratio/low_min": 3.903640526914387e-05, "clip_ratio/region_mean": 0.0014514277936541475, "epoch": 0.018573286900116084, "grad_norm": 0.13379481434822083, "learning_rate": 2e-07, "loss": 0.0241, "step": 199 }, { "clip_ratio/high_max": 0.0018978752123075537, "clip_ratio/high_mean": 0.0008271708757092711, "clip_ratio/low_mean": 0.0005572783848037943, "clip_ratio/low_min": 7.607373572682263e-05, "clip_ratio/region_mean": 0.0013844492386851925, "epoch": 0.018666620000116665, "grad_norm": 0.12732315063476562, "learning_rate": 2e-07, "loss": 0.0319, "step": 200 }, { "clip_ratio/high_max": 0.0018908710262621753, "clip_ratio/high_mean": 0.0008043178959269426, "clip_ratio/low_mean": 0.0006495732359326212, "clip_ratio/low_min": 6.671647952316562e-06, "clip_ratio/region_mean": 0.0014538910872943234, "epoch": 0.01875995310011725, "grad_norm": 0.1388690173625946, "learning_rate": 2e-07, "loss": 0.0744, "step": 201 }, { "clip_ratio/high_max": 0.0022086134558776394, "clip_ratio/high_mean": 0.0010252107313135639, "clip_ratio/low_mean": 0.0006244538890314288, "clip_ratio/low_min": 8.090225765045034e-05, "clip_ratio/region_mean": 0.0016496646276209503, "epoch": 0.018853286200117832, "grad_norm": 0.13050539791584015, "learning_rate": 2e-07, "loss": 0.0184, "step": 202 }, { "clip_ratio/high_max": 0.0024375929788220674, "clip_ratio/high_mean": 0.0008924123594624689, "clip_ratio/low_mean": 0.0006478424302258645, "clip_ratio/low_min": 8.190616108549875e-05, "clip_ratio/region_mean": 0.0015402547833218705, "epoch": 0.018946619300118418, "grad_norm": 0.12808597087860107, "learning_rate": 2e-07, "loss": 0.0373, "step": 203 }, { "clip_ratio/high_max": 0.002246012882096693, "clip_ratio/high_mean": 0.0009331771743745776, "clip_ratio/low_mean": 0.0006590158027393045, "clip_ratio/low_min": 5.869673532288289e-05, "clip_ratio/region_mean": 0.0015921929807518609, "epoch": 0.019039952400119, "grad_norm": 0.12071757018566132, "learning_rate": 2e-07, "loss": 0.0255, "step": 204 }, { "clip_ratio/high_max": 0.0019939406774938107, "clip_ratio/high_mean": 0.000842538730466913, "clip_ratio/low_mean": 0.000595607759350969, "clip_ratio/low_min": 3.1035665415402036e-05, "clip_ratio/region_mean": 0.0014381464861799031, "epoch": 0.019133285500119585, "grad_norm": 0.14199955761432648, "learning_rate": 2e-07, "loss": 0.0055, "step": 205 }, { "clip_ratio/high_max": 0.0021363764099078253, "clip_ratio/high_mean": 0.0007927258275230997, "clip_ratio/low_mean": 0.0005810762522742152, "clip_ratio/low_min": 2.1845507944817655e-05, "clip_ratio/region_mean": 0.0013738020788878202, "epoch": 0.019226618600120166, "grad_norm": 0.12020599842071533, "learning_rate": 2e-07, "loss": -0.0025, "step": 206 }, { "clip_ratio/high_max": 0.0020698091757367365, "clip_ratio/high_mean": 0.0008564557629142655, "clip_ratio/low_mean": 0.0006409840661945054, "clip_ratio/low_min": 6.717782616760815e-05, "clip_ratio/region_mean": 0.0014974398145568557, "epoch": 0.019319951700120748, "grad_norm": 0.11984051764011383, "learning_rate": 2e-07, "loss": 0.0589, "step": 207 }, { "clip_ratio/high_max": 0.0018970263517985586, "clip_ratio/high_mean": 0.0008585091727582039, "clip_ratio/low_mean": 0.0006195463920448674, "clip_ratio/low_min": 7.11634302206221e-05, "clip_ratio/region_mean": 0.0014780555866309442, "epoch": 0.019413284800121333, "grad_norm": 0.11766231060028076, "learning_rate": 2e-07, "loss": 0.0307, "step": 208 }, { "clip_ratio/high_max": 0.0020743853601743467, "clip_ratio/high_mean": 0.000938547178520821, "clip_ratio/low_mean": 0.0005018305428166059, "clip_ratio/low_min": 1.1007397006324027e-05, "clip_ratio/region_mean": 0.0014403777313418686, "epoch": 0.019506617900121915, "grad_norm": 0.17046383023262024, "learning_rate": 2e-07, "loss": -0.0306, "step": 209 }, { "clip_ratio/high_max": 0.0023688607616350055, "clip_ratio/high_mean": 0.0009493983488937374, "clip_ratio/low_mean": 0.0006116517097325413, "clip_ratio/low_min": 7.67191659178934e-05, "clip_ratio/region_mean": 0.0015610500631737523, "epoch": 0.0195999510001225, "grad_norm": 0.12418784201145172, "learning_rate": 2e-07, "loss": 0.004, "step": 210 }, { "clip_ratio/high_max": 0.0020592273795045912, "clip_ratio/high_mean": 0.0008802560405456461, "clip_ratio/low_mean": 0.0005679554888047278, "clip_ratio/low_min": 8.43796442495659e-06, "clip_ratio/region_mean": 0.001448211547540268, "epoch": 0.019693284100123082, "grad_norm": 0.12844562530517578, "learning_rate": 2e-07, "loss": -0.0161, "step": 211 }, { "clip_ratio/high_max": 0.0016921541209740099, "clip_ratio/high_mean": 0.0006915236772329081, "clip_ratio/low_mean": 0.0005233117135503562, "clip_ratio/low_min": 1.605445686436724e-05, "clip_ratio/region_mean": 0.0012148354035161901, "epoch": 0.019786617200123667, "grad_norm": 0.11651170998811722, "learning_rate": 2e-07, "loss": 0.0371, "step": 212 }, { "clip_ratio/high_max": 0.002091900125378743, "clip_ratio/high_mean": 0.000989441541605629, "clip_ratio/low_mean": 0.0005216923248099192, "clip_ratio/low_min": 4.924762106384151e-05, "clip_ratio/region_mean": 0.0015111338761926163, "epoch": 0.01987995030012425, "grad_norm": 0.1365940421819687, "learning_rate": 2e-07, "loss": -0.0398, "step": 213 }, { "clip_ratio/high_max": 0.0019792670027527492, "clip_ratio/high_mean": 0.0008368796552531421, "clip_ratio/low_mean": 0.0006006825406075222, "clip_ratio/low_min": 8.510277257300913e-05, "clip_ratio/region_mean": 0.0014375622085935902, "epoch": 0.019973283400124834, "grad_norm": 0.12004130333662033, "learning_rate": 2e-07, "loss": 0.0118, "step": 214 }, { "clip_ratio/high_max": 0.0021902737134951167, "clip_ratio/high_mean": 0.000975384384219069, "clip_ratio/low_mean": 0.0005957031271464075, "clip_ratio/low_min": 1.4733615898876451e-05, "clip_ratio/region_mean": 0.00157108751591295, "epoch": 0.020066616500125416, "grad_norm": 0.1309654414653778, "learning_rate": 2e-07, "loss": -0.0069, "step": 215 }, { "clip_ratio/high_max": 0.0023474106055800803, "clip_ratio/high_mean": 0.0009294964311266085, "clip_ratio/low_mean": 0.0005873216287000105, "clip_ratio/low_min": 3.043337164854165e-05, "clip_ratio/region_mean": 0.001516818068921566, "epoch": 0.020159949600126, "grad_norm": 0.1190454363822937, "learning_rate": 2e-07, "loss": 0.0231, "step": 216 }, { "clip_ratio/high_max": 0.0019075889940722845, "clip_ratio/high_mean": 0.0007962704221426975, "clip_ratio/low_mean": 0.0006732344918418676, "clip_ratio/low_min": 3.263189910285291e-05, "clip_ratio/region_mean": 0.0014695049176225439, "epoch": 0.020253282700126583, "grad_norm": 0.11360644549131393, "learning_rate": 2e-07, "loss": 0.0529, "step": 217 }, { "clip_ratio/high_max": 0.002196177352743689, "clip_ratio/high_mean": 0.000924994150409475, "clip_ratio/low_mean": 0.0006425815399779822, "clip_ratio/low_min": 4.389654623082606e-05, "clip_ratio/region_mean": 0.0015675757022108883, "epoch": 0.020346615800127168, "grad_norm": 0.1343088448047638, "learning_rate": 2e-07, "loss": 0.019, "step": 218 }, { "clip_ratio/high_max": 0.002262892019643914, "clip_ratio/high_mean": 0.0008878271401044913, "clip_ratio/low_mean": 0.0006303579539235216, "clip_ratio/low_min": 7.65380800658022e-05, "clip_ratio/region_mean": 0.0015181850831140764, "epoch": 0.02043994890012775, "grad_norm": 0.1264265477657318, "learning_rate": 2e-07, "loss": 0.0236, "step": 219 }, { "clip_ratio/high_max": 0.0021603289133054204, "clip_ratio/high_mean": 0.0009070491578313522, "clip_ratio/low_mean": 0.000757922709453851, "clip_ratio/low_min": 4.2304596718167886e-05, "clip_ratio/region_mean": 0.001664971852733288, "epoch": 0.02053328200012833, "grad_norm": 0.12694722414016724, "learning_rate": 2e-07, "loss": 0.0311, "step": 220 }, { "clip_ratio/high_max": 0.0022762834596505854, "clip_ratio/high_mean": 0.0009774646732694237, "clip_ratio/low_mean": 0.0006077966099837795, "clip_ratio/low_min": 2.3159584088716656e-05, "clip_ratio/region_mean": 0.0015852612705202773, "epoch": 0.020626615100128916, "grad_norm": 0.12244313955307007, "learning_rate": 2e-07, "loss": -0.019, "step": 221 }, { "clip_ratio/high_max": 0.002211397029896034, "clip_ratio/high_mean": 0.0009040622499014717, "clip_ratio/low_mean": 0.0006667512880085269, "clip_ratio/low_min": 7.68256049923366e-05, "clip_ratio/region_mean": 0.0015708135033491999, "epoch": 0.020719948200129498, "grad_norm": 0.13032586872577667, "learning_rate": 2e-07, "loss": 0.0241, "step": 222 }, { "clip_ratio/high_max": 0.002108948800014332, "clip_ratio/high_mean": 0.0008103989730443573, "clip_ratio/low_mean": 0.000653546971079777, "clip_ratio/low_min": 6.348203896777704e-05, "clip_ratio/region_mean": 0.0014639459222962614, "epoch": 0.020813281300130083, "grad_norm": 0.12284176796674728, "learning_rate": 2e-07, "loss": 0.0439, "step": 223 }, { "clip_ratio/high_max": 0.00211102441244293, "clip_ratio/high_mean": 0.0007945753404783318, "clip_ratio/low_mean": 0.0006232873874978395, "clip_ratio/low_min": 4.631489900930319e-05, "clip_ratio/region_mean": 0.0014178627243381925, "epoch": 0.020906614400130665, "grad_norm": 0.11898179352283478, "learning_rate": 2e-07, "loss": 0.0375, "step": 224 }, { "clip_ratio/high_max": 0.002252078615128994, "clip_ratio/high_mean": 0.0009375910904054763, "clip_ratio/low_mean": 0.0006963056184758898, "clip_ratio/low_min": 8.316086405102396e-05, "clip_ratio/region_mean": 0.0016338967507181223, "epoch": 0.02099994750013125, "grad_norm": 0.12768003344535828, "learning_rate": 2e-07, "loss": 0.0252, "step": 225 }, { "clip_ratio/high_max": 0.002265541013912298, "clip_ratio/high_mean": 0.0008136722208291758, "clip_ratio/low_mean": 0.0006160023294796702, "clip_ratio/low_min": 4.2888646476058057e-05, "clip_ratio/region_mean": 0.0014296745357569307, "epoch": 0.021093280600131832, "grad_norm": 0.11961844563484192, "learning_rate": 2e-07, "loss": 0.0348, "step": 226 }, { "clip_ratio/high_max": 0.0019697210809681565, "clip_ratio/high_mean": 0.0008097562622424448, "clip_ratio/low_mean": 0.0006163530852063559, "clip_ratio/low_min": 1.7831669538281858e-05, "clip_ratio/region_mean": 0.001426109352905769, "epoch": 0.021186613700132417, "grad_norm": 0.12004849314689636, "learning_rate": 2e-07, "loss": 0.0196, "step": 227 }, { "clip_ratio/high_max": 0.002094672596285818, "clip_ratio/high_mean": 0.0008744771039346233, "clip_ratio/low_mean": 0.0006770050185878063, "clip_ratio/low_min": 7.927943534014048e-05, "clip_ratio/region_mean": 0.0015514821570832282, "epoch": 0.021279946800133, "grad_norm": 0.12620028853416443, "learning_rate": 2e-07, "loss": 0.0378, "step": 228 }, { "clip_ratio/high_max": 0.001894623928819783, "clip_ratio/high_mean": 0.0008117662764561828, "clip_ratio/low_mean": 0.000755739845772041, "clip_ratio/low_min": 9.920384854922304e-05, "clip_ratio/region_mean": 0.0015675060858484358, "epoch": 0.021373279900133584, "grad_norm": 0.12599003314971924, "learning_rate": 2e-07, "loss": 0.0801, "step": 229 }, { "clip_ratio/high_max": 0.0024509444119757973, "clip_ratio/high_mean": 0.0009406036078871693, "clip_ratio/low_mean": 0.0006328536401269957, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015734572552901227, "epoch": 0.021466613000134166, "grad_norm": 0.1274123638868332, "learning_rate": 2e-07, "loss": -0.0088, "step": 230 }, { "clip_ratio/high_max": 0.002168775081372587, "clip_ratio/high_mean": 0.000913401207071729, "clip_ratio/low_mean": 0.0006764365334674949, "clip_ratio/low_min": 2.962235430459259e-05, "clip_ratio/region_mean": 0.0015898377751000226, "epoch": 0.02155994610013475, "grad_norm": 0.12550295889377594, "learning_rate": 2e-07, "loss": 0.0083, "step": 231 }, { "clip_ratio/high_max": 0.0023953377967700362, "clip_ratio/high_mean": 0.0009201346529152943, "clip_ratio/low_mean": 0.0005647012367262505, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014848358841845766, "epoch": 0.021653279200135333, "grad_norm": 0.12987202405929565, "learning_rate": 2e-07, "loss": 0.0124, "step": 232 }, { "clip_ratio/high_max": 0.0022483248612843454, "clip_ratio/high_mean": 0.0009150952646450605, "clip_ratio/low_mean": 0.0006199993486006861, "clip_ratio/low_min": 7.445794835803099e-06, "clip_ratio/region_mean": 0.0015350945977843367, "epoch": 0.021746612300135915, "grad_norm": 0.12124481052160263, "learning_rate": 2e-07, "loss": 0.0006, "step": 233 }, { "clip_ratio/high_max": 0.0023239919464685954, "clip_ratio/high_mean": 0.0009098473856283817, "clip_ratio/low_mean": 0.0007513733671657974, "clip_ratio/low_min": 3.521126927807927e-05, "clip_ratio/region_mean": 0.001661220765527105, "epoch": 0.0218399454001365, "grad_norm": 0.1301000863313675, "learning_rate": 2e-07, "loss": 0.0346, "step": 234 }, { "clip_ratio/high_max": 0.0019248932585469447, "clip_ratio/high_mean": 0.0007889168855399475, "clip_ratio/low_mean": 0.0006668601799901808, "clip_ratio/low_min": 5.919584600633243e-05, "clip_ratio/region_mean": 0.0014557770737155806, "epoch": 0.02193327850013708, "grad_norm": 0.1178794875741005, "learning_rate": 2e-07, "loss": 0.0583, "step": 235 }, { "clip_ratio/high_max": 0.002305488622369012, "clip_ratio/high_mean": 0.0008776024496910395, "clip_ratio/low_mean": 0.0006285295276029501, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015061319572851062, "epoch": 0.022026611600137667, "grad_norm": 0.13015718758106232, "learning_rate": 2e-07, "loss": 0.0349, "step": 236 }, { "clip_ratio/high_max": 0.0018065210570057388, "clip_ratio/high_mean": 0.0007725634404778248, "clip_ratio/low_mean": 0.000714817699190462, "clip_ratio/low_min": 7.064191595418379e-05, "clip_ratio/region_mean": 0.0014873811414872762, "epoch": 0.02211994470013825, "grad_norm": 0.12058298289775848, "learning_rate": 2e-07, "loss": 0.0614, "step": 237 }, { "clip_ratio/high_max": 0.0021439868505694903, "clip_ratio/high_mean": 0.0009421758077223785, "clip_ratio/low_mean": 0.0006501825992017984, "clip_ratio/low_min": 5.7884625221049646e-05, "clip_ratio/region_mean": 0.0015923583996482193, "epoch": 0.022213277800138834, "grad_norm": 0.12698669731616974, "learning_rate": 2e-07, "loss": 0.0132, "step": 238 }, { "clip_ratio/high_max": 0.0018056137632811442, "clip_ratio/high_mean": 0.0007831873281247681, "clip_ratio/low_mean": 0.0007068192389851902, "clip_ratio/low_min": 6.146418309072033e-05, "clip_ratio/region_mean": 0.0014900065507390536, "epoch": 0.022306610900139415, "grad_norm": 0.1340022087097168, "learning_rate": 2e-07, "loss": 0.0789, "step": 239 }, { "clip_ratio/high_max": 0.0019963944105256815, "clip_ratio/high_mean": 0.0008376951664104126, "clip_ratio/low_mean": 0.000524087370649795, "clip_ratio/low_min": 1.9165900084772147e-05, "clip_ratio/region_mean": 0.0013617825279652607, "epoch": 0.02239994400014, "grad_norm": 0.11847320944070816, "learning_rate": 2e-07, "loss": -0.0115, "step": 240 }, { "clip_ratio/high_max": 0.0018225862440885976, "clip_ratio/high_mean": 0.0007996987224032637, "clip_ratio/low_mean": 0.0006301447447185637, "clip_ratio/low_min": 8.690410140843596e-05, "clip_ratio/region_mean": 0.0014298434434749652, "epoch": 0.022493277100140582, "grad_norm": 0.11881936341524124, "learning_rate": 2e-07, "loss": 0.0153, "step": 241 }, { "clip_ratio/high_max": 0.0022151446355564985, "clip_ratio/high_mean": 0.0008623783942312002, "clip_ratio/low_mean": 0.0007498102095269132, "clip_ratio/low_min": 6.236381432245253e-05, "clip_ratio/region_mean": 0.001612188592844177, "epoch": 0.022586610200141168, "grad_norm": 0.12661243975162506, "learning_rate": 2e-07, "loss": 0.0523, "step": 242 }, { "clip_ratio/high_max": 0.0022538316989084706, "clip_ratio/high_mean": 0.0009073834698938299, "clip_ratio/low_mean": 0.0006987808628764469, "clip_ratio/low_min": 8.240480292442953e-05, "clip_ratio/region_mean": 0.0016061643327702768, "epoch": 0.02267994330014175, "grad_norm": 0.12892471253871918, "learning_rate": 2e-07, "loss": -0.001, "step": 243 }, { "clip_ratio/high_max": 0.001967452837561723, "clip_ratio/high_mean": 0.0009143860334006604, "clip_ratio/low_mean": 0.000711192606104305, "clip_ratio/low_min": 8.397249530389672e-05, "clip_ratio/region_mean": 0.001625578646780923, "epoch": 0.022773276400142334, "grad_norm": 0.13051381707191467, "learning_rate": 2e-07, "loss": 0.0363, "step": 244 }, { "clip_ratio/high_max": 0.0019133603418595158, "clip_ratio/high_mean": 0.0009222814060194651, "clip_ratio/low_mean": 0.0006695762713206932, "clip_ratio/low_min": 8.68677007019869e-05, "clip_ratio/region_mean": 0.0015918576791591477, "epoch": 0.022866609500142916, "grad_norm": 0.1326800286769867, "learning_rate": 2e-07, "loss": 0.0302, "step": 245 }, { "clip_ratio/high_max": 0.0018020188254013192, "clip_ratio/high_mean": 0.0008461367906420492, "clip_ratio/low_mean": 0.0006817813409725204, "clip_ratio/low_min": 4.238460496708285e-05, "clip_ratio/region_mean": 0.0015279181388905272, "epoch": 0.022959942600143498, "grad_norm": 0.13283300399780273, "learning_rate": 2e-07, "loss": 0.038, "step": 246 }, { "clip_ratio/high_max": 0.0017917208351718727, "clip_ratio/high_mean": 0.0007762907625874504, "clip_ratio/low_mean": 0.0007857133132347371, "clip_ratio/low_min": 0.00012034440169372829, "clip_ratio/region_mean": 0.0015620040830981452, "epoch": 0.023053275700144083, "grad_norm": 0.12876766920089722, "learning_rate": 2e-07, "loss": 0.0771, "step": 247 }, { "clip_ratio/high_max": 0.0019660468969959766, "clip_ratio/high_mean": 0.0008463696940452792, "clip_ratio/low_mean": 0.0006220601255790825, "clip_ratio/low_min": 0.00011619218639680184, "clip_ratio/region_mean": 0.0014684298184874933, "epoch": 0.023146608800144665, "grad_norm": 0.1330253779888153, "learning_rate": 2e-07, "loss": 0.0379, "step": 248 }, { "clip_ratio/high_max": 0.0020015579539176542, "clip_ratio/high_mean": 0.0009329316962976009, "clip_ratio/low_mean": 0.0006521208051708527, "clip_ratio/low_min": 2.6911314762401162e-05, "clip_ratio/region_mean": 0.001585052494192496, "epoch": 0.02323994190014525, "grad_norm": 0.1300746500492096, "learning_rate": 2e-07, "loss": 0.0627, "step": 249 }, { "clip_ratio/high_max": 0.0021018820552853867, "clip_ratio/high_mean": 0.0008103759328150772, "clip_ratio/low_mean": 0.0006727500585839152, "clip_ratio/low_min": 2.696406954783015e-05, "clip_ratio/region_mean": 0.0014831259759375826, "epoch": 0.023333275000145832, "grad_norm": 0.12445419281721115, "learning_rate": 2e-07, "loss": 0.0522, "step": 250 }, { "clip_ratio/high_max": 0.0021770705752715003, "clip_ratio/high_mean": 0.000824666729386081, "clip_ratio/low_mean": 0.0006186014716149657, "clip_ratio/low_min": 4.268763132131426e-05, "clip_ratio/region_mean": 0.0014432682219194248, "epoch": 0.023426608100146417, "grad_norm": 0.11207612603902817, "learning_rate": 2e-07, "loss": 0.0217, "step": 251 }, { "clip_ratio/high_max": 0.002213042385847075, "clip_ratio/high_mean": 0.0009350182481284719, "clip_ratio/low_mean": 0.0006762640168744838, "clip_ratio/low_min": 2.5805120458244346e-05, "clip_ratio/region_mean": 0.0016112822813738603, "epoch": 0.023519941200147, "grad_norm": 0.1253548264503479, "learning_rate": 2e-07, "loss": -0.0054, "step": 252 }, { "clip_ratio/high_max": 0.0018784883941407315, "clip_ratio/high_mean": 0.0007941289841255639, "clip_ratio/low_mean": 0.0006680781734758057, "clip_ratio/low_min": 1.177246213046601e-05, "clip_ratio/region_mean": 0.0014622071612393484, "epoch": 0.023613274300147584, "grad_norm": 0.5220147967338562, "learning_rate": 2e-07, "loss": 0.0434, "step": 253 }, { "clip_ratio/high_max": 0.0024451772187603638, "clip_ratio/high_mean": 0.0009733298647915944, "clip_ratio/low_mean": 0.0007166906198108336, "clip_ratio/low_min": 8.868995791999623e-05, "clip_ratio/region_mean": 0.0016900204645935446, "epoch": 0.023706607400148166, "grad_norm": 0.12459203600883484, "learning_rate": 2e-07, "loss": 0.0523, "step": 254 }, { "clip_ratio/high_max": 0.001992321362195071, "clip_ratio/high_mean": 0.0008766978153289529, "clip_ratio/low_mean": 0.0006099466163504985, "clip_ratio/low_min": 1.991079989238642e-05, "clip_ratio/region_mean": 0.001486644414399052, "epoch": 0.02379994050014875, "grad_norm": 0.13075274229049683, "learning_rate": 2e-07, "loss": 0.0133, "step": 255 }, { "clip_ratio/high_max": 0.002099009267112706, "clip_ratio/high_mean": 0.0009792022465262562, "clip_ratio/low_mean": 0.0005995883657305967, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015787906158948317, "epoch": 0.023893273600149333, "grad_norm": 0.12362360209226608, "learning_rate": 2e-07, "loss": -0.0122, "step": 256 }, { "clip_ratio/high_max": 0.001869269890448777, "clip_ratio/high_mean": 0.0007553544837719528, "clip_ratio/low_mean": 0.0005070197767054196, "clip_ratio/low_min": 2.2053752218198497e-05, "clip_ratio/region_mean": 0.0012623742550204042, "completions/clipped_ratio": 0.014761788504464302, "completions/max_length": 4096.0, "completions/max_terminated_length": 4093.0, "completions/mean_length": 621.0927734375, "completions/mean_terminated_length": 569.0284423828125, "completions/min_length": 20.0, "completions/min_terminated_length": 20.0, "epoch": 0.023986606700149918, "grad_norm": 0.11561097949743271, "learning_rate": 2e-07, "loss": -0.0013, "num_tokens": 244656625.0, "reward": 0.5687430500984192, "reward_std": 0.19936078786849976, "rewards/simpleverify_reward/mean": 0.5687430500984192, "rewards/simpleverify_reward/std": 0.4952539801597595, "step": 257 }, { "clip_ratio/high_max": 0.0019854989732266404, "clip_ratio/high_mean": 0.0007974641721375519, "clip_ratio/low_mean": 0.0005593140012933873, "clip_ratio/low_min": 8.648782022646628e-05, "clip_ratio/region_mean": 0.0013567781716119498, "epoch": 0.0240799398001505, "grad_norm": 0.1243252232670784, "learning_rate": 2e-07, "loss": 0.0187, "step": 258 }, { "clip_ratio/high_max": 0.0016719138839107472, "clip_ratio/high_mean": 0.0007781998083373765, "clip_ratio/low_mean": 0.00047932440702425083, "clip_ratio/low_min": 7.351211479544872e-06, "clip_ratio/region_mean": 0.0012575241962622385, "epoch": 0.02417327290015108, "grad_norm": 0.2903256118297577, "learning_rate": 2e-07, "loss": -0.002, "step": 259 }, { "clip_ratio/high_max": 0.001957925262104254, "clip_ratio/high_mean": 0.0007761007163935574, "clip_ratio/low_mean": 0.000615967179328436, "clip_ratio/low_min": 1.0657345228537451e-05, "clip_ratio/region_mean": 0.0013920679084549192, "epoch": 0.024266606000151666, "grad_norm": 0.12541061639785767, "learning_rate": 2e-07, "loss": 0.0234, "step": 260 }, { "clip_ratio/high_max": 0.001798011990103987, "clip_ratio/high_mean": 0.0006852715532659204, "clip_ratio/low_mean": 0.0004933090949634789, "clip_ratio/low_min": 4.658103716792539e-05, "clip_ratio/region_mean": 0.0011785806746047456, "epoch": 0.024359939100152248, "grad_norm": 0.1138421967625618, "learning_rate": 2e-07, "loss": 0.0117, "step": 261 }, { "clip_ratio/high_max": 0.0017228373762918636, "clip_ratio/high_mean": 0.0006643414053542074, "clip_ratio/low_mean": 0.000559452207198774, "clip_ratio/low_min": 3.1677656807005405e-05, "clip_ratio/region_mean": 0.0012237936098244973, "epoch": 0.024453272200152833, "grad_norm": 0.10744740813970566, "learning_rate": 2e-07, "loss": 0.0388, "step": 262 }, { "clip_ratio/high_max": 0.0017706188336887863, "clip_ratio/high_mean": 0.0006612868101001368, "clip_ratio/low_mean": 0.0005929264798396616, "clip_ratio/low_min": 3.248440771130845e-05, "clip_ratio/region_mean": 0.0012542132753878832, "epoch": 0.024546605300153415, "grad_norm": 0.1202259212732315, "learning_rate": 2e-07, "loss": 0.0312, "step": 263 }, { "clip_ratio/high_max": 0.0015295374068955425, "clip_ratio/high_mean": 0.0006629402414546348, "clip_ratio/low_mean": 0.00047803110737731913, "clip_ratio/low_min": 5.417177453637123e-05, "clip_ratio/region_mean": 0.0011409713224566076, "epoch": 0.024639938400154, "grad_norm": 0.10814613103866577, "learning_rate": 2e-07, "loss": 0.037, "step": 264 }, { "clip_ratio/high_max": 0.0019400609708100092, "clip_ratio/high_mean": 0.0007856115789763862, "clip_ratio/low_mean": 0.0005478906596181332, "clip_ratio/low_min": 4.0669911868462805e-05, "clip_ratio/region_mean": 0.001333502215857152, "epoch": 0.024733271500154582, "grad_norm": 0.12329860776662827, "learning_rate": 2e-07, "loss": -0.007, "step": 265 }, { "clip_ratio/high_max": 0.001895403016533237, "clip_ratio/high_mean": 0.0007664403710805345, "clip_ratio/low_mean": 0.0006475420977949398, "clip_ratio/low_min": 5.263374896458117e-05, "clip_ratio/region_mean": 0.001413982427038718, "epoch": 0.024826604600155167, "grad_norm": 0.12039011716842651, "learning_rate": 2e-07, "loss": 0.0695, "step": 266 }, { "clip_ratio/high_max": 0.0016763722815085202, "clip_ratio/high_mean": 0.0008385848814214114, "clip_ratio/low_mean": 0.0005155388389539439, "clip_ratio/low_min": 4.5927168685011566e-05, "clip_ratio/region_mean": 0.0013541237312892918, "epoch": 0.02491993770015575, "grad_norm": 0.10723930597305298, "learning_rate": 2e-07, "loss": 0.0061, "step": 267 }, { "clip_ratio/high_max": 0.0018994116835528985, "clip_ratio/high_mean": 0.0006988602644923958, "clip_ratio/low_mean": 0.0005986795495118713, "clip_ratio/low_min": 3.375337473698892e-05, "clip_ratio/region_mean": 0.0012975398312846664, "epoch": 0.025013270800156334, "grad_norm": 0.11861869692802429, "learning_rate": 2e-07, "loss": 0.0176, "step": 268 }, { "clip_ratio/high_max": 0.002124772989191115, "clip_ratio/high_mean": 0.0007490121952287154, "clip_ratio/low_mean": 0.0005637270305669517, "clip_ratio/low_min": 3.450834901741473e-05, "clip_ratio/region_mean": 0.0013127392194292042, "epoch": 0.025106603900156916, "grad_norm": 0.12164826691150665, "learning_rate": 2e-07, "loss": -0.006, "step": 269 }, { "clip_ratio/high_max": 0.0016789172404969577, "clip_ratio/high_mean": 0.0007443660251738038, "clip_ratio/low_mean": 0.0005670521550200647, "clip_ratio/low_min": 4.031072785437573e-05, "clip_ratio/region_mean": 0.0013114181820128579, "epoch": 0.0251999370001575, "grad_norm": 0.1176343485713005, "learning_rate": 2e-07, "loss": -0.0064, "step": 270 }, { "clip_ratio/high_max": 0.0015200042435026262, "clip_ratio/high_mean": 0.0006772066226403695, "clip_ratio/low_mean": 0.0005106960088596679, "clip_ratio/low_min": 2.700678578548832e-05, "clip_ratio/region_mean": 0.0011879026351380162, "epoch": 0.025293270100158083, "grad_norm": 0.10999931395053864, "learning_rate": 2e-07, "loss": 0.0421, "step": 271 }, { "clip_ratio/high_max": 0.002017531413002871, "clip_ratio/high_mean": 0.0008346976010216167, "clip_ratio/low_mean": 0.0005491766050909064, "clip_ratio/low_min": 5.048585626354907e-05, "clip_ratio/region_mean": 0.0013838742015650496, "epoch": 0.025386603200158665, "grad_norm": 0.12427344173192978, "learning_rate": 2e-07, "loss": 0.0225, "step": 272 }, { "clip_ratio/high_max": 0.0016355127081624232, "clip_ratio/high_mean": 0.0006829547037341399, "clip_ratio/low_mean": 0.0005896083912375616, "clip_ratio/low_min": 3.99093514715787e-05, "clip_ratio/region_mean": 0.0012725631022476591, "epoch": 0.02547993630015925, "grad_norm": 0.12157445400953293, "learning_rate": 2e-07, "loss": 0.0417, "step": 273 }, { "clip_ratio/high_max": 0.002293092074978631, "clip_ratio/high_mean": 0.0008798660164757166, "clip_ratio/low_mean": 0.0006503572803921998, "clip_ratio/low_min": 2.285192022100091e-05, "clip_ratio/region_mean": 0.0015302232495741919, "epoch": 0.02557326940015983, "grad_norm": 0.11832218617200851, "learning_rate": 2e-07, "loss": 0.0249, "step": 274 }, { "clip_ratio/high_max": 0.0018742052270681597, "clip_ratio/high_mean": 0.0008379823393624974, "clip_ratio/low_mean": 0.0006239262675080681, "clip_ratio/low_min": 6.075673991290387e-05, "clip_ratio/region_mean": 0.0014619086068705656, "epoch": 0.025666602500160417, "grad_norm": 0.11675229668617249, "learning_rate": 2e-07, "loss": 0.0438, "step": 275 }, { "clip_ratio/high_max": 0.002153520952560939, "clip_ratio/high_mean": 0.0007728609525656793, "clip_ratio/low_mean": 0.00061302209905989, "clip_ratio/low_min": 5.2356802370923106e-05, "clip_ratio/region_mean": 0.0013858830898243468, "epoch": 0.025759935600161, "grad_norm": 0.13468341529369354, "learning_rate": 2e-07, "loss": 0.0585, "step": 276 }, { "clip_ratio/high_max": 0.0017808724733185954, "clip_ratio/high_mean": 0.0006449294978665421, "clip_ratio/low_mean": 0.0005734244505219976, "clip_ratio/low_min": 2.304761619598139e-05, "clip_ratio/region_mean": 0.0012183539365651086, "epoch": 0.025853268700161584, "grad_norm": 0.1159195601940155, "learning_rate": 2e-07, "loss": 0.0783, "step": 277 }, { "clip_ratio/high_max": 0.0018481601946405135, "clip_ratio/high_mean": 0.0008243634620157536, "clip_ratio/low_mean": 0.0006029856831446523, "clip_ratio/low_min": 3.171281332470244e-05, "clip_ratio/region_mean": 0.00142734913242748, "epoch": 0.025946601800162165, "grad_norm": 0.12292173504829407, "learning_rate": 2e-07, "loss": 0.0196, "step": 278 }, { "clip_ratio/high_max": 0.0022643869488092605, "clip_ratio/high_mean": 0.0009244856701116078, "clip_ratio/low_mean": 0.0005498422669916181, "clip_ratio/low_min": 1.2386048183543608e-05, "clip_ratio/region_mean": 0.0014743279461981729, "epoch": 0.02603993490016275, "grad_norm": 0.11086355894804001, "learning_rate": 2e-07, "loss": -0.025, "step": 279 }, { "clip_ratio/high_max": 0.0020471166353672743, "clip_ratio/high_mean": 0.0008659021477797069, "clip_ratio/low_mean": 0.0005174417233320128, "clip_ratio/low_min": 1.4501159967039712e-05, "clip_ratio/region_mean": 0.0013833439043082763, "epoch": 0.026133268000163332, "grad_norm": 0.11316519230604172, "learning_rate": 2e-07, "loss": -0.0122, "step": 280 }, { "clip_ratio/high_max": 0.001924006577610271, "clip_ratio/high_mean": 0.0007099168178683612, "clip_ratio/low_mean": 0.0005460791708173929, "clip_ratio/low_min": 1.7977699826587923e-05, "clip_ratio/region_mean": 0.0012559959905047435, "epoch": 0.026226601100163918, "grad_norm": 0.11418233066797256, "learning_rate": 2e-07, "loss": 0.0407, "step": 281 }, { "clip_ratio/high_max": 0.0018654860323294997, "clip_ratio/high_mean": 0.0006909910862304969, "clip_ratio/low_mean": 0.0005937097366768285, "clip_ratio/low_min": 5.021708057029173e-05, "clip_ratio/region_mean": 0.001284700814721873, "epoch": 0.0263199342001645, "grad_norm": 0.11748393625020981, "learning_rate": 2e-07, "loss": 0.0488, "step": 282 }, { "clip_ratio/high_max": 0.002000603635678999, "clip_ratio/high_mean": 0.0008267343291663565, "clip_ratio/low_mean": 0.0005999288496241206, "clip_ratio/low_min": 7.297427600860829e-05, "clip_ratio/region_mean": 0.0014266631951613817, "epoch": 0.026413267300165084, "grad_norm": 0.12817393243312836, "learning_rate": 2e-07, "loss": 0.0094, "step": 283 }, { "clip_ratio/high_max": 0.0020890901796519756, "clip_ratio/high_mean": 0.0007941197418404045, "clip_ratio/low_mean": 0.0006186597793202964, "clip_ratio/low_min": 3.2501159239473054e-05, "clip_ratio/region_mean": 0.0014127795366221108, "epoch": 0.026506600400165666, "grad_norm": 0.11483299732208252, "learning_rate": 2e-07, "loss": 0.0202, "step": 284 }, { "clip_ratio/high_max": 0.001989562915696297, "clip_ratio/high_mean": 0.0007614011028636014, "clip_ratio/low_mean": 0.0005384645783124142, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012998657039133832, "epoch": 0.026599933500166248, "grad_norm": 0.12066849321126938, "learning_rate": 2e-07, "loss": -0.0087, "step": 285 }, { "clip_ratio/high_max": 0.0018384189170319587, "clip_ratio/high_mean": 0.0007701898084633285, "clip_ratio/low_mean": 0.0005567612979575642, "clip_ratio/low_min": 3.270080742368009e-05, "clip_ratio/region_mean": 0.001326951154624112, "epoch": 0.026693266600166833, "grad_norm": 0.11334282904863358, "learning_rate": 2e-07, "loss": 0.0134, "step": 286 }, { "clip_ratio/high_max": 0.0018156390724470839, "clip_ratio/high_mean": 0.0007659326784050791, "clip_ratio/low_mean": 0.0006194413454068126, "clip_ratio/low_min": 4.5179614062362816e-05, "clip_ratio/region_mean": 0.0013853740274498705, "epoch": 0.026786599700167415, "grad_norm": 0.1245957687497139, "learning_rate": 2e-07, "loss": 0.038, "step": 287 }, { "clip_ratio/high_max": 0.0019307895199744962, "clip_ratio/high_mean": 0.0007282035548996646, "clip_ratio/low_mean": 0.0005799779301014496, "clip_ratio/low_min": 1.5599001926602796e-05, "clip_ratio/region_mean": 0.001308181534113828, "epoch": 0.026879932800168, "grad_norm": 0.11065205931663513, "learning_rate": 2e-07, "loss": 0.0183, "step": 288 }, { "clip_ratio/high_max": 0.0016762155864853412, "clip_ratio/high_mean": 0.0006930882755113998, "clip_ratio/low_mean": 0.0006148903239591164, "clip_ratio/low_min": 5.532817067432916e-05, "clip_ratio/region_mean": 0.0013079785894660745, "epoch": 0.026973265900168582, "grad_norm": 0.10208636522293091, "learning_rate": 2e-07, "loss": 0.0828, "step": 289 }, { "clip_ratio/high_max": 0.0019060141748923343, "clip_ratio/high_mean": 0.0008040141065066564, "clip_ratio/low_mean": 0.0006279657500272151, "clip_ratio/low_min": 1.1474206075945403e-05, "clip_ratio/region_mean": 0.001431979766493896, "epoch": 0.027066599000169167, "grad_norm": 0.129505455493927, "learning_rate": 2e-07, "loss": 0.0046, "step": 290 }, { "clip_ratio/high_max": 0.001824366489017848, "clip_ratio/high_mean": 0.0008061655498750042, "clip_ratio/low_mean": 0.0006316633989627007, "clip_ratio/low_min": 1.6209748082474107e-05, "clip_ratio/region_mean": 0.0014378289561136626, "epoch": 0.02715993210016975, "grad_norm": 0.12492282688617706, "learning_rate": 2e-07, "loss": 0.031, "step": 291 }, { "clip_ratio/high_max": 0.0021610963594866917, "clip_ratio/high_mean": 0.0007835951673769159, "clip_ratio/low_mean": 0.0005685682899638778, "clip_ratio/low_min": 3.461108917690581e-05, "clip_ratio/region_mean": 0.0013521634646167513, "epoch": 0.027253265200170334, "grad_norm": 0.12403958290815353, "learning_rate": 2e-07, "loss": 0.0248, "step": 292 }, { "clip_ratio/high_max": 0.0020282550576666836, "clip_ratio/high_mean": 0.0007788134425936732, "clip_ratio/low_mean": 0.0006941737656234181, "clip_ratio/low_min": 7.969918806338683e-05, "clip_ratio/region_mean": 0.0014729871872987133, "epoch": 0.027346598300170916, "grad_norm": 0.11742532253265381, "learning_rate": 2e-07, "loss": 0.0258, "step": 293 }, { "clip_ratio/high_max": 0.00199810119374888, "clip_ratio/high_mean": 0.0007621298209414817, "clip_ratio/low_mean": 0.0006303685840975959, "clip_ratio/low_min": 5.319286719895899e-05, "clip_ratio/region_mean": 0.0013924984014010988, "epoch": 0.0274399314001715, "grad_norm": 0.11730988323688507, "learning_rate": 2e-07, "loss": 0.0623, "step": 294 }, { "clip_ratio/high_max": 0.0017734961038513575, "clip_ratio/high_mean": 0.0007222091408038978, "clip_ratio/low_mean": 0.0006572656675416511, "clip_ratio/low_min": 2.2484709916170686e-05, "clip_ratio/region_mean": 0.0013794747937936336, "epoch": 0.027533264500172083, "grad_norm": 0.11441951990127563, "learning_rate": 2e-07, "loss": 0.0486, "step": 295 }, { "clip_ratio/high_max": 0.001923352807352785, "clip_ratio/high_mean": 0.0007743161822872935, "clip_ratio/low_mean": 0.0006506190547952428, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014249352243496105, "epoch": 0.027626597600172668, "grad_norm": 0.10988679528236389, "learning_rate": 2e-07, "loss": 0.0228, "step": 296 }, { "clip_ratio/high_max": 0.0019848877855110914, "clip_ratio/high_mean": 0.0008030367553146789, "clip_ratio/low_mean": 0.0005408591969171539, "clip_ratio/low_min": 8.588704076828435e-06, "clip_ratio/region_mean": 0.0013438959686027374, "epoch": 0.02771993070017325, "grad_norm": 0.126237690448761, "learning_rate": 2e-07, "loss": 0.0167, "step": 297 }, { "clip_ratio/high_max": 0.0017423484932805877, "clip_ratio/high_mean": 0.0007889506468927721, "clip_ratio/low_mean": 0.0005747848390456056, "clip_ratio/low_min": 3.9272506000997964e-05, "clip_ratio/region_mean": 0.0013637354932143353, "epoch": 0.02781326380017383, "grad_norm": 0.12455515563488007, "learning_rate": 2e-07, "loss": 0.02, "step": 298 }, { "clip_ratio/high_max": 0.0017230886624020059, "clip_ratio/high_mean": 0.0007314204813155811, "clip_ratio/low_mean": 0.0005347597507352475, "clip_ratio/low_min": 1.9311653886688873e-05, "clip_ratio/region_mean": 0.0012661802393267862, "epoch": 0.027906596900174416, "grad_norm": 0.11264623701572418, "learning_rate": 2e-07, "loss": 0.0329, "step": 299 }, { "clip_ratio/high_max": 0.0019663340426632203, "clip_ratio/high_mean": 0.0007762718187223072, "clip_ratio/low_mean": 0.0006374502590915654, "clip_ratio/low_min": 6.776236750738462e-05, "clip_ratio/region_mean": 0.0014137220787233673, "epoch": 0.027999930000174998, "grad_norm": 0.12365193665027618, "learning_rate": 2e-07, "loss": 0.0544, "step": 300 }, { "clip_ratio/high_max": 0.0020610824067262, "clip_ratio/high_mean": 0.0008462089499516878, "clip_ratio/low_mean": 0.0005537785564229125, "clip_ratio/low_min": 6.071273764973739e-05, "clip_ratio/region_mean": 0.0013999874936416745, "epoch": 0.028093263100175583, "grad_norm": 0.11349142342805862, "learning_rate": 2e-07, "loss": 0.0063, "step": 301 }, { "clip_ratio/high_max": 0.0018935884545498993, "clip_ratio/high_mean": 0.0008074876404862152, "clip_ratio/low_mean": 0.0006114487805461977, "clip_ratio/low_min": 8.214577064791229e-05, "clip_ratio/region_mean": 0.00141893643012736, "epoch": 0.028186596200176165, "grad_norm": 0.12265170365571976, "learning_rate": 2e-07, "loss": 0.0331, "step": 302 }, { "clip_ratio/high_max": 0.0017851639713626355, "clip_ratio/high_mean": 0.0007438326447299914, "clip_ratio/low_mean": 0.0006346934405883076, "clip_ratio/low_min": 2.319697887287475e-05, "clip_ratio/region_mean": 0.0013785260234726593, "epoch": 0.02827992930017675, "grad_norm": 0.1156851202249527, "learning_rate": 2e-07, "loss": 0.0363, "step": 303 }, { "clip_ratio/high_max": 0.0015497145541303325, "clip_ratio/high_mean": 0.0006460076001530979, "clip_ratio/low_mean": 0.0005720689350710018, "clip_ratio/low_min": 4.790229741047369e-05, "clip_ratio/region_mean": 0.0012180765334051102, "epoch": 0.028373262400177332, "grad_norm": 0.10869140923023224, "learning_rate": 2e-07, "loss": 0.0559, "step": 304 }, { "clip_ratio/high_max": 0.002169876534026116, "clip_ratio/high_mean": 0.0009460025539738126, "clip_ratio/low_mean": 0.0004782475689353305, "clip_ratio/low_min": 4.207418533042073e-05, "clip_ratio/region_mean": 0.0014242500983527862, "epoch": 0.028466595500177917, "grad_norm": 0.12141937017440796, "learning_rate": 2e-07, "loss": -0.0167, "step": 305 }, { "clip_ratio/high_max": 0.00216663414175855, "clip_ratio/high_mean": 0.0008627387142041698, "clip_ratio/low_mean": 0.0006097215100453468, "clip_ratio/low_min": 5.1454970616759965e-05, "clip_ratio/region_mean": 0.001472460226068506, "epoch": 0.0285599286001785, "grad_norm": 0.12035830318927765, "learning_rate": 2e-07, "loss": 0.0161, "step": 306 }, { "clip_ratio/high_max": 0.0017779905247152783, "clip_ratio/high_mean": 0.000807691261798027, "clip_ratio/low_mean": 0.0005866352566954447, "clip_ratio/low_min": 2.2033443201507907e-05, "clip_ratio/region_mean": 0.0013943265585112385, "epoch": 0.028653261700179084, "grad_norm": 0.12182717025279999, "learning_rate": 2e-07, "loss": 0.0173, "step": 307 }, { "clip_ratio/high_max": 0.0019325420835230034, "clip_ratio/high_mean": 0.0007482777709810762, "clip_ratio/low_mean": 0.0005826978385812254, "clip_ratio/low_min": 2.8758774533343967e-05, "clip_ratio/region_mean": 0.0013309756213857327, "epoch": 0.028746594800179666, "grad_norm": 0.1085725948214531, "learning_rate": 2e-07, "loss": 0.0103, "step": 308 }, { "clip_ratio/high_max": 0.0022622082105954178, "clip_ratio/high_mean": 0.000824645881948527, "clip_ratio/low_mean": 0.0006643894121225458, "clip_ratio/low_min": 4.437986081029521e-05, "clip_ratio/region_mean": 0.0014890353195369244, "epoch": 0.02883992790018025, "grad_norm": 0.12374410778284073, "learning_rate": 2e-07, "loss": 0.055, "step": 309 }, { "clip_ratio/high_max": 0.0016471276539959945, "clip_ratio/high_mean": 0.0007477908857254079, "clip_ratio/low_mean": 0.0006039272857378819, "clip_ratio/low_min": 3.150043085042853e-05, "clip_ratio/region_mean": 0.0013517181905626785, "epoch": 0.028933261000180833, "grad_norm": 0.11700389534235, "learning_rate": 2e-07, "loss": -0.0052, "step": 310 }, { "clip_ratio/high_max": 0.0019830786986858584, "clip_ratio/high_mean": 0.0007303611819224898, "clip_ratio/low_mean": 0.0005711381818400696, "clip_ratio/low_min": 4.232197807141347e-05, "clip_ratio/region_mean": 0.001301499352848623, "epoch": 0.029026594100181415, "grad_norm": 0.1179599016904831, "learning_rate": 2e-07, "loss": 0.0273, "step": 311 }, { "clip_ratio/high_max": 0.0016685055888956413, "clip_ratio/high_mean": 0.0007133502240321832, "clip_ratio/low_mean": 0.0005924419310758822, "clip_ratio/low_min": 3.695352324939449e-05, "clip_ratio/region_mean": 0.0013057921787549276, "epoch": 0.029119927200182, "grad_norm": 0.13482874631881714, "learning_rate": 2e-07, "loss": -0.0159, "step": 312 }, { "clip_ratio/high_max": 0.0018655500462045893, "clip_ratio/high_mean": 0.0008256774217443308, "clip_ratio/low_mean": 0.0005759813893746468, "clip_ratio/low_min": 7.257852757902583e-05, "clip_ratio/region_mean": 0.001401658810209483, "epoch": 0.02921326030018258, "grad_norm": 0.10839033871889114, "learning_rate": 2e-07, "loss": -0.0024, "step": 313 }, { "clip_ratio/high_max": 0.0017268187111767475, "clip_ratio/high_mean": 0.0007236371584440349, "clip_ratio/low_mean": 0.0006265372248890344, "clip_ratio/low_min": 7.67276760598179e-05, "clip_ratio/region_mean": 0.0013501744251698256, "epoch": 0.029306593400183167, "grad_norm": 0.12443891912698746, "learning_rate": 2e-07, "loss": 0.0066, "step": 314 }, { "clip_ratio/high_max": 0.0018528828441048972, "clip_ratio/high_mean": 0.0007677442517888267, "clip_ratio/low_mean": 0.0005768253167843795, "clip_ratio/low_min": 3.8201259485504124e-05, "clip_ratio/region_mean": 0.0013445695803966373, "epoch": 0.02939992650018375, "grad_norm": 0.1159597635269165, "learning_rate": 2e-07, "loss": 0.0137, "step": 315 }, { "clip_ratio/high_max": 0.001895471923489822, "clip_ratio/high_mean": 0.0008120604124997044, "clip_ratio/low_mean": 0.0005665475355272065, "clip_ratio/low_min": 3.500260572764091e-05, "clip_ratio/region_mean": 0.0013786079653073102, "epoch": 0.029493259600184334, "grad_norm": 0.1261255443096161, "learning_rate": 2e-07, "loss": 0.0135, "step": 316 }, { "clip_ratio/high_max": 0.001565489117638208, "clip_ratio/high_mean": 0.0006448101321439026, "clip_ratio/low_mean": 0.0006233360254555009, "clip_ratio/low_min": 2.731101449171547e-05, "clip_ratio/region_mean": 0.0012681461230386049, "epoch": 0.029586592700184915, "grad_norm": 0.11905649304389954, "learning_rate": 2e-07, "loss": 0.0274, "step": 317 }, { "clip_ratio/high_max": 0.0018058292116620578, "clip_ratio/high_mean": 0.0006378151356329909, "clip_ratio/low_mean": 0.0005901934309804346, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001228008546604542, "epoch": 0.0296799258001855, "grad_norm": 0.1080046221613884, "learning_rate": 2e-07, "loss": 0.0645, "step": 318 }, { "clip_ratio/high_max": 0.0020870286753051914, "clip_ratio/high_mean": 0.0008321259974763962, "clip_ratio/low_mean": 0.0006464874877565308, "clip_ratio/low_min": 4.716136936622206e-05, "clip_ratio/region_mean": 0.0014786135143367574, "epoch": 0.029773258900186082, "grad_norm": 0.12716934084892273, "learning_rate": 2e-07, "loss": 0.0362, "step": 319 }, { "clip_ratio/high_max": 0.0019575896440073848, "clip_ratio/high_mean": 0.0008432189861196093, "clip_ratio/low_mean": 0.0005437746713141678, "clip_ratio/low_min": 1.0867674973269459e-05, "clip_ratio/region_mean": 0.0013869936738046817, "epoch": 0.029866592000186667, "grad_norm": 0.12483929842710495, "learning_rate": 2e-07, "loss": -0.0296, "step": 320 }, { "clip_ratio/high_max": 0.0019093683076789603, "clip_ratio/high_mean": 0.0008273908515548101, "clip_ratio/low_mean": 0.0006479590902017662, "clip_ratio/low_min": 5.760748535976745e-05, "clip_ratio/region_mean": 0.0014753499235666823, "epoch": 0.02995992510018725, "grad_norm": 0.1262880563735962, "learning_rate": 2e-07, "loss": 0.0281, "step": 321 }, { "clip_ratio/high_max": 0.0021952798051643185, "clip_ratio/high_mean": 0.0009407220150023932, "clip_ratio/low_mean": 0.0005884859983780188, "clip_ratio/low_min": 2.574689278844744e-05, "clip_ratio/region_mean": 0.0015292080242943484, "epoch": 0.030053258200187834, "grad_norm": 0.13950613141059875, "learning_rate": 2e-07, "loss": 0.0026, "step": 322 }, { "clip_ratio/high_max": 0.0018082228489220142, "clip_ratio/high_mean": 0.0007628774783370318, "clip_ratio/low_mean": 0.0005792008296339191, "clip_ratio/low_min": 1.193659318232676e-05, "clip_ratio/region_mean": 0.0013420783216133714, "epoch": 0.030146591300188416, "grad_norm": 0.125337615609169, "learning_rate": 2e-07, "loss": 0.0502, "step": 323 }, { "clip_ratio/high_max": 0.0020202920968586113, "clip_ratio/high_mean": 0.000845012425997993, "clip_ratio/low_mean": 0.0006102746265241876, "clip_ratio/low_min": 7.77556579123484e-06, "clip_ratio/region_mean": 0.0014552870670740958, "epoch": 0.030239924400188998, "grad_norm": 0.1325664073228836, "learning_rate": 2e-07, "loss": 0.0041, "step": 324 }, { "clip_ratio/high_max": 0.0018722272507147864, "clip_ratio/high_mean": 0.0007003525897744112, "clip_ratio/low_mean": 0.0005809882077301154, "clip_ratio/low_min": 1.833382157201413e-05, "clip_ratio/region_mean": 0.0012813407665817067, "epoch": 0.030333257500189583, "grad_norm": 0.11880411952733994, "learning_rate": 2e-07, "loss": 0.0233, "step": 325 }, { "clip_ratio/high_max": 0.0016914101070142351, "clip_ratio/high_mean": 0.0007185336926340824, "clip_ratio/low_mean": 0.0006415384577849181, "clip_ratio/low_min": 4.44346405856777e-05, "clip_ratio/region_mean": 0.0013600721358670853, "epoch": 0.030426590600190165, "grad_norm": 0.11393184959888458, "learning_rate": 2e-07, "loss": 0.042, "step": 326 }, { "clip_ratio/high_max": 0.0018389952201687265, "clip_ratio/high_mean": 0.0007157902073231526, "clip_ratio/low_mean": 0.0007002808142715367, "clip_ratio/low_min": 2.883483648474794e-05, "clip_ratio/region_mean": 0.0014160710270516574, "epoch": 0.03051992370019075, "grad_norm": 0.12140263617038727, "learning_rate": 2e-07, "loss": 0.0598, "step": 327 }, { "clip_ratio/high_max": 0.0018691142286115792, "clip_ratio/high_mean": 0.000827059517177986, "clip_ratio/low_mean": 0.0006826757971793995, "clip_ratio/low_min": 5.599279666057555e-05, "clip_ratio/region_mean": 0.0015097352843440603, "epoch": 0.030613256800191332, "grad_norm": 0.12040097266435623, "learning_rate": 2e-07, "loss": 0.0204, "step": 328 }, { "clip_ratio/high_max": 0.0020709427626570687, "clip_ratio/high_mean": 0.0008130800179060316, "clip_ratio/low_mean": 0.0007018190626695286, "clip_ratio/low_min": 8.159587923728395e-05, "clip_ratio/region_mean": 0.0015148990787565708, "epoch": 0.030706589900191917, "grad_norm": 0.11802371591329575, "learning_rate": 2e-07, "loss": 0.0607, "step": 329 }, { "clip_ratio/high_max": 0.0017908496265590657, "clip_ratio/high_mean": 0.000838683801703155, "clip_ratio/low_mean": 0.0007185102494986495, "clip_ratio/low_min": 5.549161687667947e-05, "clip_ratio/region_mean": 0.0015571940093650483, "epoch": 0.0307999230001925, "grad_norm": 0.13418221473693848, "learning_rate": 2e-07, "loss": 0.0283, "step": 330 }, { "clip_ratio/high_max": 0.0019069183654210065, "clip_ratio/high_mean": 0.0007655143290321575, "clip_ratio/low_mean": 0.0007304333967113052, "clip_ratio/low_min": 0.00010650184276528307, "clip_ratio/region_mean": 0.0014959477412048727, "epoch": 0.030893256100193084, "grad_norm": 0.13436955213546753, "learning_rate": 2e-07, "loss": 0.0441, "step": 331 }, { "clip_ratio/high_max": 0.002172822358261328, "clip_ratio/high_mean": 0.0008424574225500692, "clip_ratio/low_mean": 0.000788283223300823, "clip_ratio/low_min": 7.499276216549333e-05, "clip_ratio/region_mean": 0.0016307406694977544, "epoch": 0.030986589200193666, "grad_norm": 0.14253520965576172, "learning_rate": 2e-07, "loss": 0.0264, "step": 332 }, { "clip_ratio/high_max": 0.0022013826019247063, "clip_ratio/high_mean": 0.0008953339474828681, "clip_ratio/low_mean": 0.0007051754437270574, "clip_ratio/low_min": 1.4181982805894222e-05, "clip_ratio/region_mean": 0.0016005094184947666, "epoch": 0.03107992230019425, "grad_norm": 0.13072951138019562, "learning_rate": 2e-07, "loss": 0.0567, "step": 333 }, { "clip_ratio/high_max": 0.0017931079673871864, "clip_ratio/high_mean": 0.0008081988426056341, "clip_ratio/low_mean": 0.000618670737821958, "clip_ratio/low_min": 8.229092509282054e-05, "clip_ratio/region_mean": 0.0014268696068029385, "epoch": 0.031173255400194833, "grad_norm": 0.13211628794670105, "learning_rate": 2e-07, "loss": 0.077, "step": 334 }, { "clip_ratio/high_max": 0.0017426550184609368, "clip_ratio/high_mean": 0.0008040963293751702, "clip_ratio/low_mean": 0.0005453411849885015, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013494375089067034, "epoch": 0.03126658850019542, "grad_norm": 0.11757729202508926, "learning_rate": 2e-07, "loss": 0.0053, "step": 335 }, { "clip_ratio/high_max": 0.0017707709448586684, "clip_ratio/high_mean": 0.0007418416262225946, "clip_ratio/low_mean": 0.0006487011178251123, "clip_ratio/low_min": 5.6116896303137764e-05, "clip_ratio/region_mean": 0.0013905427294957917, "epoch": 0.031359921600196, "grad_norm": 0.13273580372333527, "learning_rate": 2e-07, "loss": 0.0761, "step": 336 }, { "clip_ratio/high_max": 0.0020378922345116735, "clip_ratio/high_mean": 0.0009189400407194626, "clip_ratio/low_mean": 0.0006542112496390473, "clip_ratio/low_min": 2.9296424145286437e-05, "clip_ratio/region_mean": 0.0015731512903585099, "epoch": 0.03145325470019658, "grad_norm": 0.12876418232917786, "learning_rate": 2e-07, "loss": -0.035, "step": 337 }, { "clip_ratio/high_max": 0.0021753712971985806, "clip_ratio/high_mean": 0.0008455993611278245, "clip_ratio/low_mean": 0.0006070604049455142, "clip_ratio/low_min": 1.780763113856665e-05, "clip_ratio/region_mean": 0.0014526597769872751, "epoch": 0.031546587800197166, "grad_norm": 0.13564316928386688, "learning_rate": 2e-07, "loss": 0.0391, "step": 338 }, { "clip_ratio/high_max": 0.0023537035667686723, "clip_ratio/high_mean": 0.0009317334988736548, "clip_ratio/low_mean": 0.0006164165497466456, "clip_ratio/low_min": 7.392971565423068e-05, "clip_ratio/region_mean": 0.0015481500340683851, "epoch": 0.03163992090019775, "grad_norm": 0.12651567161083221, "learning_rate": 2e-07, "loss": -0.0261, "step": 339 }, { "clip_ratio/high_max": 0.0018078884768328862, "clip_ratio/high_mean": 0.0006821720926382113, "clip_ratio/low_mean": 0.0007266493194038048, "clip_ratio/low_min": 2.1878348434256623e-05, "clip_ratio/region_mean": 0.0014088214193179738, "epoch": 0.03173325400019833, "grad_norm": 0.12444819509983063, "learning_rate": 2e-07, "loss": 0.0829, "step": 340 }, { "clip_ratio/high_max": 0.0019344561915204395, "clip_ratio/high_mean": 0.0007698418703512289, "clip_ratio/low_mean": 0.0005962260884189163, "clip_ratio/low_min": 2.3273132683243603e-05, "clip_ratio/region_mean": 0.0013660680087923538, "epoch": 0.031826587100198915, "grad_norm": 0.11317906528711319, "learning_rate": 2e-07, "loss": 0.0177, "step": 341 }, { "clip_ratio/high_max": 0.0019452507694950327, "clip_ratio/high_mean": 0.0008471414002997335, "clip_ratio/low_mean": 0.0006604988084291108, "clip_ratio/low_min": 5.4606490266451146e-05, "clip_ratio/region_mean": 0.0015076401978149079, "epoch": 0.0319199202001995, "grad_norm": 0.13797108829021454, "learning_rate": 2e-07, "loss": 0.0243, "step": 342 }, { "clip_ratio/high_max": 0.0017152908258140087, "clip_ratio/high_mean": 0.0007821099370630691, "clip_ratio/low_mean": 0.0005885351965844166, "clip_ratio/low_min": 5.3610104259860236e-05, "clip_ratio/region_mean": 0.0013706451463804115, "epoch": 0.032013253300200085, "grad_norm": 0.11515862494707108, "learning_rate": 2e-07, "loss": 0.0174, "step": 343 }, { "clip_ratio/high_max": 0.0018824800936272368, "clip_ratio/high_mean": 0.0007774492987664416, "clip_ratio/low_mean": 0.0006851958914921852, "clip_ratio/low_min": 3.6656313568528276e-05, "clip_ratio/region_mean": 0.0014626452102675103, "epoch": 0.032106586400200664, "grad_norm": 0.12402620166540146, "learning_rate": 2e-07, "loss": 0.0242, "step": 344 }, { "clip_ratio/high_max": 0.0026173783553531393, "clip_ratio/high_mean": 0.0008851697493810207, "clip_ratio/low_mean": 0.000640038117126096, "clip_ratio/low_min": 1.6873649656190537e-05, "clip_ratio/region_mean": 0.0015252078919729684, "epoch": 0.03219991950020125, "grad_norm": 0.15637467801570892, "learning_rate": 2e-07, "loss": 0.0251, "step": 345 }, { "clip_ratio/high_max": 0.002263232228870038, "clip_ratio/high_mean": 0.0009399782029504422, "clip_ratio/low_mean": 0.0006482475000666454, "clip_ratio/low_min": 7.782343345752452e-06, "clip_ratio/region_mean": 0.0015882257102930453, "epoch": 0.032293252600201834, "grad_norm": 0.1303851306438446, "learning_rate": 2e-07, "loss": 0.0257, "step": 346 }, { "clip_ratio/high_max": 0.0019814482548099477, "clip_ratio/high_mean": 0.0007946911409817403, "clip_ratio/low_mean": 0.0006722885691488045, "clip_ratio/low_min": 5.543444149225252e-05, "clip_ratio/region_mean": 0.0014669797601527534, "epoch": 0.03238658570020242, "grad_norm": 0.12173054367303848, "learning_rate": 2e-07, "loss": 0.0302, "step": 347 }, { "clip_ratio/high_max": 0.0018946742129628547, "clip_ratio/high_mean": 0.0007601962588523747, "clip_ratio/low_mean": 0.0006497679405583767, "clip_ratio/low_min": 2.7570488327910425e-05, "clip_ratio/region_mean": 0.0014099641848588362, "epoch": 0.032479918800203, "grad_norm": 0.1303040236234665, "learning_rate": 2e-07, "loss": 0.0383, "step": 348 }, { "clip_ratio/high_max": 0.0017901070350490045, "clip_ratio/high_mean": 0.0007549113397544716, "clip_ratio/low_mean": 0.0006227148787729675, "clip_ratio/low_min": 4.3878568249056116e-05, "clip_ratio/region_mean": 0.001377626191242598, "epoch": 0.03257325190020358, "grad_norm": 0.1619393229484558, "learning_rate": 2e-07, "loss": 0.0362, "step": 349 }, { "clip_ratio/high_max": 0.0020116166924708523, "clip_ratio/high_mean": 0.0008072157988863182, "clip_ratio/low_mean": 0.0006152933747216593, "clip_ratio/low_min": 1.0111632036569063e-05, "clip_ratio/region_mean": 0.0014225091581465676, "epoch": 0.03266658500020417, "grad_norm": 0.1221124455332756, "learning_rate": 2e-07, "loss": 0.0143, "step": 350 }, { "clip_ratio/high_max": 0.0021453488297993317, "clip_ratio/high_mean": 0.0007960320326674264, "clip_ratio/low_mean": 0.0005874770631635329, "clip_ratio/low_min": 3.421721794438781e-05, "clip_ratio/region_mean": 0.0013835090794600546, "epoch": 0.032759918100204746, "grad_norm": 0.12113963812589645, "learning_rate": 2e-07, "loss": 0.0125, "step": 351 }, { "clip_ratio/high_max": 0.0023286609139177017, "clip_ratio/high_mean": 0.0008069346658885479, "clip_ratio/low_mean": 0.0006730368531862041, "clip_ratio/low_min": 2.5416436074010562e-05, "clip_ratio/region_mean": 0.0014799715063418262, "epoch": 0.03285325120020533, "grad_norm": 0.12016429007053375, "learning_rate": 2e-07, "loss": 0.0323, "step": 352 }, { "clip_ratio/high_max": 0.002018118157138815, "clip_ratio/high_mean": 0.0007823203650332289, "clip_ratio/low_mean": 0.0006056782867744914, "clip_ratio/low_min": 3.2891550290514715e-05, "clip_ratio/region_mean": 0.0013879986508982256, "epoch": 0.03294658430020592, "grad_norm": 0.12529417872428894, "learning_rate": 2e-07, "loss": -0.008, "step": 353 }, { "clip_ratio/high_max": 0.001547041121739312, "clip_ratio/high_mean": 0.0007216818921733648, "clip_ratio/low_mean": 0.0006577965104952455, "clip_ratio/low_min": 1.9187643374607433e-05, "clip_ratio/region_mean": 0.0013794783808407374, "epoch": 0.0330399174002065, "grad_norm": 0.11982711404561996, "learning_rate": 2e-07, "loss": 0.01, "step": 354 }, { "clip_ratio/high_max": 0.002014764970226679, "clip_ratio/high_mean": 0.0008688471989444224, "clip_ratio/low_mean": 0.0006997004511504201, "clip_ratio/low_min": 4.0232356695923954e-05, "clip_ratio/region_mean": 0.0015685476246289909, "epoch": 0.03313325050020708, "grad_norm": 0.1305162012577057, "learning_rate": 2e-07, "loss": 0.0095, "step": 355 }, { "clip_ratio/high_max": 0.0019054939584748354, "clip_ratio/high_mean": 0.0007127008466341067, "clip_ratio/low_mean": 0.0006586289127881173, "clip_ratio/low_min": 4.5386538658931386e-05, "clip_ratio/region_mean": 0.001371329777612118, "epoch": 0.033226583600207665, "grad_norm": 0.11985760927200317, "learning_rate": 2e-07, "loss": 0.0155, "step": 356 }, { "clip_ratio/high_max": 0.0020804283922188915, "clip_ratio/high_mean": 0.0009287571774621028, "clip_ratio/low_mean": 0.0006215613921085605, "clip_ratio/low_min": 3.508260124363005e-05, "clip_ratio/region_mean": 0.0015503185350098647, "epoch": 0.03331991670020825, "grad_norm": 0.13267309963703156, "learning_rate": 2e-07, "loss": 0.0086, "step": 357 }, { "clip_ratio/high_max": 0.001969245964573929, "clip_ratio/high_mean": 0.0007912969977041939, "clip_ratio/low_mean": 0.0006782705168006942, "clip_ratio/low_min": 9.580519053997705e-05, "clip_ratio/region_mean": 0.0014695674908580258, "epoch": 0.033413249800208836, "grad_norm": 0.11836139857769012, "learning_rate": 2e-07, "loss": 0.0727, "step": 358 }, { "clip_ratio/high_max": 0.0019048401991312858, "clip_ratio/high_mean": 0.0007870481495046988, "clip_ratio/low_mean": 0.0007177717307058629, "clip_ratio/low_min": 6.116804524936015e-05, "clip_ratio/region_mean": 0.00150481985474471, "epoch": 0.033506582900209414, "grad_norm": 0.13217952847480774, "learning_rate": 2e-07, "loss": 0.0114, "step": 359 }, { "clip_ratio/high_max": 0.0020297840783314314, "clip_ratio/high_mean": 0.0008608384832768934, "clip_ratio/low_mean": 0.0006077831158108893, "clip_ratio/low_min": 7.204610938060796e-06, "clip_ratio/region_mean": 0.0014686216236441396, "epoch": 0.03359991600021, "grad_norm": 0.12482540309429169, "learning_rate": 2e-07, "loss": -0.0304, "step": 360 }, { "clip_ratio/high_max": 0.0020188749713270226, "clip_ratio/high_mean": 0.0008818371061352082, "clip_ratio/low_mean": 0.0006707772008667234, "clip_ratio/low_min": 1.3745326214120723e-05, "clip_ratio/region_mean": 0.0015526143433817197, "epoch": 0.033693249100210584, "grad_norm": 0.141623392701149, "learning_rate": 2e-07, "loss": 0.0005, "step": 361 }, { "clip_ratio/high_max": 0.002167911152355373, "clip_ratio/high_mean": 0.0008037817915464984, "clip_ratio/low_mean": 0.0007586861047457205, "clip_ratio/low_min": 8.293248174595647e-05, "clip_ratio/region_mean": 0.0015624679108441342, "epoch": 0.03378658220021117, "grad_norm": 0.13175365328788757, "learning_rate": 2e-07, "loss": 0.0022, "step": 362 }, { "clip_ratio/high_max": 0.002149682135495823, "clip_ratio/high_mean": 0.0008718295575818047, "clip_ratio/low_mean": 0.0006655684242105053, "clip_ratio/low_min": 4.600217471306678e-05, "clip_ratio/region_mean": 0.0015373979949799832, "epoch": 0.03387991530021175, "grad_norm": 0.12787693738937378, "learning_rate": 2e-07, "loss": 0.0078, "step": 363 }, { "clip_ratio/high_max": 0.002148786064935848, "clip_ratio/high_mean": 0.0008514384680893272, "clip_ratio/low_mean": 0.0005827631025567825, "clip_ratio/low_min": 2.6737967345979996e-05, "clip_ratio/region_mean": 0.001434201574738836, "epoch": 0.03397324840021233, "grad_norm": 0.11575324833393097, "learning_rate": 2e-07, "loss": -0.0104, "step": 364 }, { "clip_ratio/high_max": 0.0017832515404734295, "clip_ratio/high_mean": 0.000741971110983286, "clip_ratio/low_mean": 0.0006728530370310182, "clip_ratio/low_min": 4.173791239736602e-05, "clip_ratio/region_mean": 0.0014148241207294632, "epoch": 0.03406658150021292, "grad_norm": 0.12349911779165268, "learning_rate": 2e-07, "loss": 0.05, "step": 365 }, { "clip_ratio/high_max": 0.0020913054868287873, "clip_ratio/high_mean": 0.0008945612680690829, "clip_ratio/low_mean": 0.0006574874569196254, "clip_ratio/low_min": 8.562474067730363e-05, "clip_ratio/region_mean": 0.0015520487213507295, "epoch": 0.034159914600213497, "grad_norm": 0.1272130161523819, "learning_rate": 2e-07, "loss": 0.029, "step": 366 }, { "clip_ratio/high_max": 0.002231886228400981, "clip_ratio/high_mean": 0.0008604073518654332, "clip_ratio/low_mean": 0.00061496202579292, "clip_ratio/low_min": 7.722076452409965e-05, "clip_ratio/region_mean": 0.001475369390391279, "epoch": 0.03425324770021408, "grad_norm": 0.12255969643592834, "learning_rate": 2e-07, "loss": 0.0272, "step": 367 }, { "clip_ratio/high_max": 0.0022909300387254916, "clip_ratio/high_mean": 0.0009478352003498003, "clip_ratio/low_mean": 0.0006015002454660134, "clip_ratio/low_min": 2.1068046407890506e-05, "clip_ratio/region_mean": 0.0015493354767386336, "epoch": 0.03434658080021467, "grad_norm": 0.11591565608978271, "learning_rate": 2e-07, "loss": -0.0231, "step": 368 }, { "clip_ratio/high_max": 0.001848685697041219, "clip_ratio/high_mean": 0.0006901904180267593, "clip_ratio/low_mean": 0.0006117131761129713, "clip_ratio/low_min": 2.7364272682461888e-05, "clip_ratio/region_mean": 0.0013019035723118577, "epoch": 0.03443991390021525, "grad_norm": 0.13268209993839264, "learning_rate": 2e-07, "loss": 0.0548, "step": 369 }, { "clip_ratio/high_max": 0.0018065693584503606, "clip_ratio/high_mean": 0.0007710626978223445, "clip_ratio/low_mean": 0.0006208758277352899, "clip_ratio/low_min": 4.0096635530062485e-05, "clip_ratio/region_mean": 0.0013919385346525814, "epoch": 0.03453324700021583, "grad_norm": 0.13073807954788208, "learning_rate": 2e-07, "loss": 0.0264, "step": 370 }, { "clip_ratio/high_max": 0.0020543116224871483, "clip_ratio/high_mean": 0.000831001202641346, "clip_ratio/low_mean": 0.0006559370012837462, "clip_ratio/low_min": 0.00013855951692676172, "clip_ratio/region_mean": 0.0014869382248434704, "epoch": 0.034626580100216416, "grad_norm": 0.12477888911962509, "learning_rate": 2e-07, "loss": 0.016, "step": 371 }, { "clip_ratio/high_max": 0.0019062135470448993, "clip_ratio/high_mean": 0.0007313645546673797, "clip_ratio/low_mean": 0.0007595551542181056, "clip_ratio/low_min": 4.2784248762473e-05, "clip_ratio/region_mean": 0.0014909196688677184, "epoch": 0.034719913200217, "grad_norm": 0.137606680393219, "learning_rate": 2e-07, "loss": 0.0851, "step": 372 }, { "clip_ratio/high_max": 0.0021088054418214597, "clip_ratio/high_mean": 0.0007991455604496878, "clip_ratio/low_mean": 0.0006149913479021052, "clip_ratio/low_min": 4.018994059151737e-05, "clip_ratio/region_mean": 0.0014141369138087612, "epoch": 0.034813246300217586, "grad_norm": 0.14011959731578827, "learning_rate": 2e-07, "loss": 0.0172, "step": 373 }, { "clip_ratio/high_max": 0.0023670054724789225, "clip_ratio/high_mean": 0.0008459534492430976, "clip_ratio/low_mean": 0.0007747523177386029, "clip_ratio/low_min": 5.465014874062035e-05, "clip_ratio/region_mean": 0.0016207057778956369, "epoch": 0.034906579400218164, "grad_norm": 0.13037236034870148, "learning_rate": 2e-07, "loss": 0.086, "step": 374 }, { "clip_ratio/high_max": 0.001871685693913605, "clip_ratio/high_mean": 0.0008009878729353659, "clip_ratio/low_mean": 0.0006968768784645363, "clip_ratio/low_min": 5.2148810937069356e-05, "clip_ratio/region_mean": 0.0014978647741372697, "epoch": 0.03499991250021875, "grad_norm": 0.1299564242362976, "learning_rate": 2e-07, "loss": 0.0311, "step": 375 }, { "clip_ratio/high_max": 0.0021227499019005336, "clip_ratio/high_mean": 0.0007546021952293813, "clip_ratio/low_mean": 0.0006144433600638877, "clip_ratio/low_min": 1.6617921573924832e-05, "clip_ratio/region_mean": 0.0013690455634787213, "epoch": 0.035093245600219335, "grad_norm": 0.11728532612323761, "learning_rate": 2e-07, "loss": 0.0031, "step": 376 }, { "clip_ratio/high_max": 0.002051748873782344, "clip_ratio/high_mean": 0.0008261672301159706, "clip_ratio/low_mean": 0.0007225545632536523, "clip_ratio/low_min": 4.1859238081087824e-05, "clip_ratio/region_mean": 0.0015487217824556865, "epoch": 0.03518657870021991, "grad_norm": 0.12600870430469513, "learning_rate": 2e-07, "loss": 0.0312, "step": 377 }, { "clip_ratio/high_max": 0.00213628694837098, "clip_ratio/high_mean": 0.0008693533509358531, "clip_ratio/low_mean": 0.0005532468549063196, "clip_ratio/low_min": 3.526476893966901e-05, "clip_ratio/region_mean": 0.0014226001985662151, "epoch": 0.0352799118002205, "grad_norm": 0.12610547244548798, "learning_rate": 2e-07, "loss": 0.0139, "step": 378 }, { "clip_ratio/high_max": 0.0018962830072268844, "clip_ratio/high_mean": 0.0007201219759735977, "clip_ratio/low_mean": 0.0007967113342601806, "clip_ratio/low_min": 4.1063118260353804e-05, "clip_ratio/region_mean": 0.0015168333120527677, "epoch": 0.03537324490022108, "grad_norm": 0.1254897266626358, "learning_rate": 2e-07, "loss": 0.0975, "step": 379 }, { "clip_ratio/high_max": 0.0017949985558516346, "clip_ratio/high_mean": 0.0007229140628624009, "clip_ratio/low_mean": 0.0006714106793879182, "clip_ratio/low_min": 5.0748721150739584e-05, "clip_ratio/region_mean": 0.0013943247358838562, "epoch": 0.03546657800022167, "grad_norm": 0.12176269292831421, "learning_rate": 2e-07, "loss": 0.0224, "step": 380 }, { "clip_ratio/high_max": 0.0017871931595436763, "clip_ratio/high_mean": 0.000768938456531032, "clip_ratio/low_mean": 0.0006647947375313379, "clip_ratio/low_min": 2.8913120331708342e-05, "clip_ratio/region_mean": 0.0014337332249851897, "epoch": 0.03555991110022225, "grad_norm": 0.12029475718736649, "learning_rate": 2e-07, "loss": 0.0625, "step": 381 }, { "clip_ratio/high_max": 0.001985241331567522, "clip_ratio/high_mean": 0.0007566600397694856, "clip_ratio/low_mean": 0.0007062429140205495, "clip_ratio/low_min": 7.911537795735057e-05, "clip_ratio/region_mean": 0.0014629029283241834, "epoch": 0.03565324420022283, "grad_norm": 0.13246503472328186, "learning_rate": 2e-07, "loss": 0.0402, "step": 382 }, { "clip_ratio/high_max": 0.0020245888954377733, "clip_ratio/high_mean": 0.0008176612100214697, "clip_ratio/low_mean": 0.0006221003422979265, "clip_ratio/low_min": 3.41115874107345e-05, "clip_ratio/region_mean": 0.0014397615486814175, "epoch": 0.03574657730022342, "grad_norm": 0.13743485510349274, "learning_rate": 2e-07, "loss": 0.0211, "step": 383 }, { "clip_ratio/high_max": 0.001962187910976354, "clip_ratio/high_mean": 0.0007629390056536067, "clip_ratio/low_mean": 0.0005997011248837225, "clip_ratio/low_min": 2.7695125027094036e-05, "clip_ratio/region_mean": 0.001362640134175308, "epoch": 0.035839910400224, "grad_norm": 0.11758792400360107, "learning_rate": 2e-07, "loss": 0.0215, "step": 384 }, { "clip_ratio/high_max": 0.0012347276169748511, "clip_ratio/high_mean": 0.0005384833093557972, "clip_ratio/low_mean": 0.0006538678735523717, "clip_ratio/low_min": 3.101048423559405e-05, "clip_ratio/region_mean": 0.001192351228382904, "completions/clipped_ratio": 0.017011369977678603, "completions/max_length": 4096.0, "completions/max_terminated_length": 4079.0, "completions/mean_length": 638.9855346679688, "completions/mean_terminated_length": 579.1593017578125, "completions/min_length": 74.0, "completions/min_terminated_length": 74.0, "epoch": 0.03593324350022458, "grad_norm": 0.11270938813686371, "learning_rate": 2e-07, "loss": 0.0941, "num_tokens": 329389926.0, "reward": 0.5735124945640564, "reward_std": 0.1990089863538742, "rewards/simpleverify_reward/mean": 0.5735124945640564, "rewards/simpleverify_reward/std": 0.49456849694252014, "step": 385 }, { "clip_ratio/high_max": 0.0017514900355308782, "clip_ratio/high_mean": 0.0007251979614011361, "clip_ratio/low_mean": 0.0005968792029307224, "clip_ratio/low_min": 3.261834626755444e-05, "clip_ratio/region_mean": 0.0013220772161730565, "epoch": 0.036026576600225166, "grad_norm": 0.12380381673574448, "learning_rate": 2e-07, "loss": 0.0412, "step": 386 }, { "clip_ratio/high_max": 0.002276794650242664, "clip_ratio/high_mean": 0.0009244169214071007, "clip_ratio/low_mean": 0.00047346900919365, "clip_ratio/low_min": 1.272394092666218e-05, "clip_ratio/region_mean": 0.0013978859351482242, "epoch": 0.03611990970022575, "grad_norm": 0.12662142515182495, "learning_rate": 2e-07, "loss": -0.0081, "step": 387 }, { "clip_ratio/high_max": 0.001588627810633625, "clip_ratio/high_mean": 0.000698016050591832, "clip_ratio/low_mean": 0.0005534559049920063, "clip_ratio/low_min": 9.988600959331961e-05, "clip_ratio/region_mean": 0.001251471916475566, "epoch": 0.03621324280022633, "grad_norm": 0.12301915884017944, "learning_rate": 2e-07, "loss": 0.0302, "step": 388 }, { "clip_ratio/high_max": 0.001849981887062313, "clip_ratio/high_mean": 0.0006917185710335616, "clip_ratio/low_mean": 0.0005260355801510741, "clip_ratio/low_min": 5.9715281167882495e-05, "clip_ratio/region_mean": 0.0012177541539131198, "epoch": 0.036306575900226914, "grad_norm": 0.11322467029094696, "learning_rate": 2e-07, "loss": 0.0204, "step": 389 }, { "clip_ratio/high_max": 0.00219392205690383, "clip_ratio/high_mean": 0.0008677933637954993, "clip_ratio/low_mean": 0.0005832514461872051, "clip_ratio/low_min": 1.4460897546086926e-05, "clip_ratio/region_mean": 0.0014510447945212945, "epoch": 0.0363999090002275, "grad_norm": 0.11516296118497849, "learning_rate": 2e-07, "loss": 0.0073, "step": 390 }, { "clip_ratio/high_max": 0.0017264818234252743, "clip_ratio/high_mean": 0.0007142381400626618, "clip_ratio/low_mean": 0.0006461076209234307, "clip_ratio/low_min": 5.025524296797812e-05, "clip_ratio/region_mean": 0.0013603457882709336, "epoch": 0.036493242100228085, "grad_norm": 0.1377992182970047, "learning_rate": 2e-07, "loss": 0.0675, "step": 391 }, { "clip_ratio/high_max": 0.002028475490078563, "clip_ratio/high_mean": 0.0007738176045677392, "clip_ratio/low_mean": 0.0005608174833469093, "clip_ratio/low_min": 3.70880579794175e-05, "clip_ratio/region_mean": 0.001334635104285553, "epoch": 0.03658657520022866, "grad_norm": 0.11884529888629913, "learning_rate": 2e-07, "loss": 0.047, "step": 392 }, { "clip_ratio/high_max": 0.0017152601503767073, "clip_ratio/high_mean": 0.0007617962055519456, "clip_ratio/low_mean": 0.000550656874111155, "clip_ratio/low_min": 1.617773887119256e-05, "clip_ratio/region_mean": 0.001312453063292196, "epoch": 0.03667990830022925, "grad_norm": 0.12086061388254166, "learning_rate": 2e-07, "loss": 0.0209, "step": 393 }, { "clip_ratio/high_max": 0.0020684396877186373, "clip_ratio/high_mean": 0.0007977561308507575, "clip_ratio/low_mean": 0.0005664418085871148, "clip_ratio/low_min": 1.528864959254861e-05, "clip_ratio/region_mean": 0.0013641979458043352, "epoch": 0.036773241400229834, "grad_norm": 0.11817000061273575, "learning_rate": 2e-07, "loss": -0.0086, "step": 394 }, { "clip_ratio/high_max": 0.002025673231401015, "clip_ratio/high_mean": 0.0008083892971626483, "clip_ratio/low_mean": 0.0005080500877738814, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013164394185878336, "epoch": 0.03686657450023042, "grad_norm": 0.11603293567895889, "learning_rate": 2e-07, "loss": 0.0043, "step": 395 }, { "clip_ratio/high_max": 0.0019589182011259254, "clip_ratio/high_mean": 0.000726783418940613, "clip_ratio/low_mean": 0.0006322262561297975, "clip_ratio/low_min": 3.554187878762605e-05, "clip_ratio/region_mean": 0.0013590096750704106, "epoch": 0.036959907600231, "grad_norm": 0.12061294913291931, "learning_rate": 2e-07, "loss": 0.0534, "step": 396 }, { "clip_ratio/high_max": 0.0017086494881368708, "clip_ratio/high_mean": 0.0006746254966856213, "clip_ratio/low_mean": 0.0006281117020989768, "clip_ratio/low_min": 6.663306521659251e-05, "clip_ratio/region_mean": 0.0013027372006035876, "epoch": 0.03705324070023158, "grad_norm": 0.12303686141967773, "learning_rate": 2e-07, "loss": 0.0367, "step": 397 }, { "clip_ratio/high_max": 0.0017040939419530332, "clip_ratio/high_mean": 0.000678899274134892, "clip_ratio/low_mean": 0.0005510039609362138, "clip_ratio/low_min": 2.4588600354036316e-05, "clip_ratio/region_mean": 0.0012299032059672754, "epoch": 0.03714657380023217, "grad_norm": 0.10987848788499832, "learning_rate": 2e-07, "loss": 0.0317, "step": 398 }, { "clip_ratio/high_max": 0.0016458437348774169, "clip_ratio/high_mean": 0.0007616901166329626, "clip_ratio/low_mean": 0.0005956320401310222, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013573221021943027, "epoch": 0.03723990690023275, "grad_norm": 0.11941926926374435, "learning_rate": 2e-07, "loss": 0.0288, "step": 399 }, { "clip_ratio/high_max": 0.0015831498203624506, "clip_ratio/high_mean": 0.0006394210086000385, "clip_ratio/low_mean": 0.0004605914668900368, "clip_ratio/low_min": 1.3551604752137791e-05, "clip_ratio/region_mean": 0.001100012468668865, "epoch": 0.03733324000023333, "grad_norm": 0.12347234785556793, "learning_rate": 2e-07, "loss": 0.0362, "step": 400 }, { "clip_ratio/high_max": 0.0018264578720845748, "clip_ratio/high_mean": 0.0008253500200225972, "clip_ratio/low_mean": 0.0006199850995471934, "clip_ratio/low_min": 8.070744388533058e-05, "clip_ratio/region_mean": 0.0014453351104748435, "epoch": 0.037426573100233916, "grad_norm": 0.1205783411860466, "learning_rate": 2e-07, "loss": 0.0218, "step": 401 }, { "clip_ratio/high_max": 0.0017762559036782477, "clip_ratio/high_mean": 0.0006683251840513549, "clip_ratio/low_mean": 0.0006166025177662959, "clip_ratio/low_min": 2.9597363209177274e-05, "clip_ratio/region_mean": 0.0012849276772612939, "epoch": 0.0375199062002345, "grad_norm": 0.13441555202007294, "learning_rate": 2e-07, "loss": 0.0738, "step": 402 }, { "clip_ratio/high_max": 0.0017581367392267566, "clip_ratio/high_mean": 0.0006744673464709194, "clip_ratio/low_mean": 0.0006145869174360996, "clip_ratio/low_min": 5.342123768059537e-05, "clip_ratio/region_mean": 0.0012890542129753157, "epoch": 0.03761323930023508, "grad_norm": 0.1232827752828598, "learning_rate": 2e-07, "loss": 0.0786, "step": 403 }, { "clip_ratio/high_max": 0.0015349983041232917, "clip_ratio/high_mean": 0.0006732147876391537, "clip_ratio/low_mean": 0.0005914715293329209, "clip_ratio/low_min": 6.110829417593777e-05, "clip_ratio/region_mean": 0.0012646863251575269, "epoch": 0.037706572400235665, "grad_norm": 0.11538461595773697, "learning_rate": 2e-07, "loss": 0.0367, "step": 404 }, { "clip_ratio/high_max": 0.002222040951892268, "clip_ratio/high_mean": 0.0007868742013670271, "clip_ratio/low_mean": 0.0005668545436492423, "clip_ratio/low_min": 1.2475049516069703e-05, "clip_ratio/region_mean": 0.0013537287195504177, "epoch": 0.03779990550023625, "grad_norm": 0.1239287257194519, "learning_rate": 2e-07, "loss": 0.0066, "step": 405 }, { "clip_ratio/high_max": 0.0016204029780055862, "clip_ratio/high_mean": 0.0006828670047980268, "clip_ratio/low_mean": 0.0006186238288137247, "clip_ratio/low_min": 0.00010112248673976865, "clip_ratio/region_mean": 0.001301490847254172, "epoch": 0.037893238600236835, "grad_norm": 0.12326519936323166, "learning_rate": 2e-07, "loss": 0.0605, "step": 406 }, { "clip_ratio/high_max": 0.0019510324564180337, "clip_ratio/high_mean": 0.0007658493450435344, "clip_ratio/low_mean": 0.0005099280215290491, "clip_ratio/low_min": 3.564452072168933e-05, "clip_ratio/region_mean": 0.0012757773693010677, "epoch": 0.03798657170023741, "grad_norm": 0.12748096883296967, "learning_rate": 2e-07, "loss": -0.0211, "step": 407 }, { "clip_ratio/high_max": 0.0020628468482755125, "clip_ratio/high_mean": 0.0008341619522980182, "clip_ratio/low_mean": 0.0005715469487768132, "clip_ratio/low_min": 3.7850513763260096e-05, "clip_ratio/region_mean": 0.001405708899255842, "epoch": 0.038079904800238, "grad_norm": 0.12838807702064514, "learning_rate": 2e-07, "loss": 0.0197, "step": 408 }, { "clip_ratio/high_max": 0.0018263089768879581, "clip_ratio/high_mean": 0.0007088765778462403, "clip_ratio/low_mean": 0.0005474906483868835, "clip_ratio/low_min": 6.96456436344306e-06, "clip_ratio/region_mean": 0.0012563672098622192, "epoch": 0.038173237900238584, "grad_norm": 0.12237639725208282, "learning_rate": 2e-07, "loss": 0.0336, "step": 409 }, { "clip_ratio/high_max": 0.0021406182859209366, "clip_ratio/high_mean": 0.0008113982439681422, "clip_ratio/low_mean": 0.0004945800983477966, "clip_ratio/low_min": 2.977947679028148e-05, "clip_ratio/region_mean": 0.0013059783268545289, "epoch": 0.03826657100023917, "grad_norm": 0.13184009492397308, "learning_rate": 2e-07, "loss": 0.0249, "step": 410 }, { "clip_ratio/high_max": 0.0018464399399817921, "clip_ratio/high_mean": 0.0008035933788050897, "clip_ratio/low_mean": 0.0006142656557130977, "clip_ratio/low_min": 3.6554336475091986e-05, "clip_ratio/region_mean": 0.0014178590608935338, "epoch": 0.03835990410023975, "grad_norm": 0.12154219299554825, "learning_rate": 2e-07, "loss": 0.0115, "step": 411 }, { "clip_ratio/high_max": 0.0021857107130927034, "clip_ratio/high_mean": 0.0008745317572902422, "clip_ratio/low_mean": 0.0004963007122569252, "clip_ratio/low_min": 4.509063728619367e-05, "clip_ratio/region_mean": 0.001370832535030786, "epoch": 0.03845323720024033, "grad_norm": 0.12859344482421875, "learning_rate": 2e-07, "loss": 0.0282, "step": 412 }, { "clip_ratio/high_max": 0.0017857056700449903, "clip_ratio/high_mean": 0.0006907888509886106, "clip_ratio/low_mean": 0.0005186444832361303, "clip_ratio/low_min": 2.7596118343353737e-05, "clip_ratio/region_mean": 0.0012094333214918151, "epoch": 0.03854657030024092, "grad_norm": 0.11687982827425003, "learning_rate": 2e-07, "loss": 0.0074, "step": 413 }, { "clip_ratio/high_max": 0.0015598892750858795, "clip_ratio/high_mean": 0.0006392705681719235, "clip_ratio/low_mean": 0.0005820652531838277, "clip_ratio/low_min": 2.1591034055745695e-05, "clip_ratio/region_mean": 0.0012213358168082777, "epoch": 0.038639903400241496, "grad_norm": 0.11757929623126984, "learning_rate": 2e-07, "loss": 0.0331, "step": 414 }, { "clip_ratio/high_max": 0.0019532481601345353, "clip_ratio/high_mean": 0.0007526010813307948, "clip_ratio/low_mean": 0.000574838009015366, "clip_ratio/low_min": 5.15941374032991e-05, "clip_ratio/region_mean": 0.001327439080341719, "epoch": 0.03873323650024208, "grad_norm": 0.118524469435215, "learning_rate": 2e-07, "loss": -0.0027, "step": 415 }, { "clip_ratio/high_max": 0.002082292288832832, "clip_ratio/high_mean": 0.0007906272676336812, "clip_ratio/low_mean": 0.0005697506198885094, "clip_ratio/low_min": 7.656498382857535e-06, "clip_ratio/region_mean": 0.001360377878881991, "epoch": 0.038826569600242666, "grad_norm": 0.12712140381336212, "learning_rate": 2e-07, "loss": 0.0349, "step": 416 }, { "clip_ratio/high_max": 0.0016072593753051478, "clip_ratio/high_mean": 0.0006959099464438623, "clip_ratio/low_mean": 0.0006330057876766659, "clip_ratio/low_min": 3.57007274942589e-05, "clip_ratio/region_mean": 0.0013289156813698355, "epoch": 0.03891990270024325, "grad_norm": 0.10762687772512436, "learning_rate": 2e-07, "loss": 0.0147, "step": 417 }, { "clip_ratio/high_max": 0.0018354561761952937, "clip_ratio/high_mean": 0.0007133544168027584, "clip_ratio/low_mean": 0.0005252299306448549, "clip_ratio/low_min": 3.3713509765220806e-05, "clip_ratio/region_mean": 0.0012385843474476133, "epoch": 0.03901323580024383, "grad_norm": 0.10796509683132172, "learning_rate": 2e-07, "loss": 0.0242, "step": 418 }, { "clip_ratio/high_max": 0.0017270331918552984, "clip_ratio/high_mean": 0.000682954005242209, "clip_ratio/low_mean": 0.0006169766293169232, "clip_ratio/low_min": 4.547367825580295e-05, "clip_ratio/region_mean": 0.001299930638197111, "epoch": 0.039106568900244415, "grad_norm": 0.11778812110424042, "learning_rate": 2e-07, "loss": 0.0378, "step": 419 }, { "clip_ratio/high_max": 0.0015842504144529812, "clip_ratio/high_mean": 0.000643890118226409, "clip_ratio/low_mean": 0.0005384430805861484, "clip_ratio/low_min": 3.318064227642026e-05, "clip_ratio/region_mean": 0.001182333187898621, "epoch": 0.039199902000245, "grad_norm": 0.11454786360263824, "learning_rate": 2e-07, "loss": 0.0098, "step": 420 }, { "clip_ratio/high_max": 0.0017363987280987203, "clip_ratio/high_mean": 0.0007186073762568412, "clip_ratio/low_mean": 0.0005361959683796158, "clip_ratio/low_min": 2.3075503122527152e-05, "clip_ratio/region_mean": 0.0012548033337225206, "epoch": 0.039293235100245585, "grad_norm": 0.14187908172607422, "learning_rate": 2e-07, "loss": 0.0184, "step": 421 }, { "clip_ratio/high_max": 0.0019191528917872347, "clip_ratio/high_mean": 0.0007591036719531985, "clip_ratio/low_mean": 0.0005500177858266397, "clip_ratio/low_min": 5.3399327043734957e-05, "clip_ratio/region_mean": 0.0013091214386804495, "epoch": 0.039386568200246164, "grad_norm": 0.11690158396959305, "learning_rate": 2e-07, "loss": 0.0132, "step": 422 }, { "clip_ratio/high_max": 0.001988195777812507, "clip_ratio/high_mean": 0.0008226727040892001, "clip_ratio/low_mean": 0.0005774915989604779, "clip_ratio/low_min": 1.856835569924442e-05, "clip_ratio/region_mean": 0.0014001643066876568, "epoch": 0.03947990130024675, "grad_norm": 0.11577083170413971, "learning_rate": 2e-07, "loss": 0.0055, "step": 423 }, { "clip_ratio/high_max": 0.0016580955634708516, "clip_ratio/high_mean": 0.0006443700831368915, "clip_ratio/low_mean": 0.0006765044727217173, "clip_ratio/low_min": 7.147955420805374e-05, "clip_ratio/region_mean": 0.0013208745804149657, "epoch": 0.039573234400247334, "grad_norm": 0.12178140133619308, "learning_rate": 2e-07, "loss": 0.1037, "step": 424 }, { "clip_ratio/high_max": 0.0018633029430930037, "clip_ratio/high_mean": 0.0006890629301778972, "clip_ratio/low_mean": 0.0005968461646261858, "clip_ratio/low_min": 5.5288212479354115e-05, "clip_ratio/region_mean": 0.0012859091075370088, "epoch": 0.03966656750024792, "grad_norm": 0.11770538240671158, "learning_rate": 2e-07, "loss": 0.055, "step": 425 }, { "clip_ratio/high_max": 0.00172069984182599, "clip_ratio/high_mean": 0.0007586231167806545, "clip_ratio/low_mean": 0.0005650898501698975, "clip_ratio/low_min": 4.463002551347017e-05, "clip_ratio/region_mean": 0.001323712982411962, "epoch": 0.0397599006002485, "grad_norm": 0.1264885812997818, "learning_rate": 2e-07, "loss": 0.0522, "step": 426 }, { "clip_ratio/high_max": 0.0018600328730826732, "clip_ratio/high_mean": 0.0007414262381644221, "clip_ratio/low_mean": 0.0006611468370465445, "clip_ratio/low_min": 5.880843582417583e-05, "clip_ratio/region_mean": 0.0014025730779394507, "epoch": 0.03985323370024908, "grad_norm": 0.11060141026973724, "learning_rate": 2e-07, "loss": 0.0249, "step": 427 }, { "clip_ratio/high_max": 0.001553372207126813, "clip_ratio/high_mean": 0.0006348009728753823, "clip_ratio/low_mean": 0.0006448495132644894, "clip_ratio/low_min": 6.887811196065741e-06, "clip_ratio/region_mean": 0.001279650485230377, "epoch": 0.03994656680024967, "grad_norm": 0.12215851247310638, "learning_rate": 2e-07, "loss": 0.0861, "step": 428 }, { "clip_ratio/high_max": 0.001711247386992909, "clip_ratio/high_mean": 0.0007525309029006166, "clip_ratio/low_mean": 0.0005965703703623149, "clip_ratio/low_min": 0.00010234969886369072, "clip_ratio/region_mean": 0.001349101268715458, "epoch": 0.040039899900250246, "grad_norm": 0.1350710242986679, "learning_rate": 2e-07, "loss": 0.0131, "step": 429 }, { "clip_ratio/high_max": 0.00201556108368095, "clip_ratio/high_mean": 0.0008883250848157331, "clip_ratio/low_mean": 0.0005552990705837146, "clip_ratio/low_min": 2.6663274184102193e-05, "clip_ratio/region_mean": 0.0014436241508519743, "epoch": 0.04013323300025083, "grad_norm": 0.11819442361593246, "learning_rate": 2e-07, "loss": -0.0119, "step": 430 }, { "clip_ratio/high_max": 0.0019793869578279555, "clip_ratio/high_mean": 0.0008360595602425747, "clip_ratio/low_mean": 0.0005814457963424502, "clip_ratio/low_min": 4.411014833749505e-05, "clip_ratio/region_mean": 0.0014175053729559295, "epoch": 0.04022656610025142, "grad_norm": 0.1191720962524414, "learning_rate": 2e-07, "loss": -0.0257, "step": 431 }, { "clip_ratio/high_max": 0.00187696048305952, "clip_ratio/high_mean": 0.0007649337640032172, "clip_ratio/low_mean": 0.0005412335731307394, "clip_ratio/low_min": 2.3191123545984738e-05, "clip_ratio/region_mean": 0.0013061673416814301, "epoch": 0.040319899200252, "grad_norm": 0.13552697002887726, "learning_rate": 2e-07, "loss": 0.007, "step": 432 }, { "clip_ratio/high_max": 0.0018004282719630282, "clip_ratio/high_mean": 0.0007449172117048874, "clip_ratio/low_mean": 0.0005701249974663369, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013150422018952668, "epoch": 0.04041323230025258, "grad_norm": 0.12261927872896194, "learning_rate": 2e-07, "loss": 0.0446, "step": 433 }, { "clip_ratio/high_max": 0.0017152965629065875, "clip_ratio/high_mean": 0.0007143949951569084, "clip_ratio/low_mean": 0.0005856275074620498, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001300022555369651, "epoch": 0.040506565400253165, "grad_norm": 0.12421371042728424, "learning_rate": 2e-07, "loss": 0.0324, "step": 434 }, { "clip_ratio/high_max": 0.001589747225807514, "clip_ratio/high_mean": 0.000666312625980936, "clip_ratio/low_mean": 0.0006058425142327906, "clip_ratio/low_min": 4.493859159993008e-05, "clip_ratio/region_mean": 0.0012721551247523166, "epoch": 0.04059989850025375, "grad_norm": 0.11642207950353622, "learning_rate": 2e-07, "loss": 0.025, "step": 435 }, { "clip_ratio/high_max": 0.002010731812333688, "clip_ratio/high_mean": 0.0008159400094882585, "clip_ratio/low_mean": 0.0005646048348353361, "clip_ratio/low_min": 7.4823415161517914e-06, "clip_ratio/region_mean": 0.0013805448470520787, "epoch": 0.040693231600254336, "grad_norm": 0.12820373475551605, "learning_rate": 2e-07, "loss": -0.0004, "step": 436 }, { "clip_ratio/high_max": 0.0021220025737420656, "clip_ratio/high_mean": 0.000722414242773084, "clip_ratio/low_mean": 0.0006309702130238293, "clip_ratio/low_min": 5.7272294725407846e-05, "clip_ratio/region_mean": 0.0013533844175981358, "epoch": 0.040786564700254914, "grad_norm": 0.11796076595783234, "learning_rate": 2e-07, "loss": 0.0327, "step": 437 }, { "clip_ratio/high_max": 0.0021164892095839605, "clip_ratio/high_mean": 0.0007839306654204847, "clip_ratio/low_mean": 0.0006696440759696998, "clip_ratio/low_min": 4.658152920455905e-05, "clip_ratio/region_mean": 0.0014535747286572587, "epoch": 0.0408798978002555, "grad_norm": 0.1237604022026062, "learning_rate": 2e-07, "loss": 0.0097, "step": 438 }, { "clip_ratio/high_max": 0.0017770885024219751, "clip_ratio/high_mean": 0.000675656530802371, "clip_ratio/low_mean": 0.0006689549945804174, "clip_ratio/low_min": 6.909235435159644e-05, "clip_ratio/region_mean": 0.0013446115335682407, "epoch": 0.040973230900256084, "grad_norm": 0.12373483926057816, "learning_rate": 2e-07, "loss": 0.0653, "step": 439 }, { "clip_ratio/high_max": 0.0022465031543106306, "clip_ratio/high_mean": 0.0008405645749007817, "clip_ratio/low_mean": 0.0005956086788501125, "clip_ratio/low_min": 4.918804006592836e-05, "clip_ratio/region_mean": 0.0014361732282850426, "epoch": 0.04106656400025666, "grad_norm": 0.1197066679596901, "learning_rate": 2e-07, "loss": 0.0009, "step": 440 }, { "clip_ratio/high_max": 0.002001830449444242, "clip_ratio/high_mean": 0.0007812343865225557, "clip_ratio/low_mean": 0.0006093782812968129, "clip_ratio/low_min": 5.8935342167387716e-05, "clip_ratio/region_mean": 0.0013906126732763369, "epoch": 0.04115989710025725, "grad_norm": 0.11464092135429382, "learning_rate": 2e-07, "loss": 0.0549, "step": 441 }, { "clip_ratio/high_max": 0.001958455926796887, "clip_ratio/high_mean": 0.0008043298039410729, "clip_ratio/low_mean": 0.000591434658417711, "clip_ratio/low_min": 8.578054985264316e-05, "clip_ratio/region_mean": 0.001395764422341017, "epoch": 0.04125323020025783, "grad_norm": 0.12089533358812332, "learning_rate": 2e-07, "loss": 0.029, "step": 442 }, { "clip_ratio/high_max": 0.0018371817204752006, "clip_ratio/high_mean": 0.0007746384526399197, "clip_ratio/low_mean": 0.0006588350051970338, "clip_ratio/low_min": 6.700879293930484e-05, "clip_ratio/region_mean": 0.0014334734587464482, "epoch": 0.04134656330025842, "grad_norm": 0.15603022277355194, "learning_rate": 2e-07, "loss": 0.0327, "step": 443 }, { "clip_ratio/high_max": 0.0017309084105363581, "clip_ratio/high_mean": 0.0007445210158039117, "clip_ratio/low_mean": 0.0006156266663310817, "clip_ratio/low_min": 4.8328525735996664e-05, "clip_ratio/region_mean": 0.0013601476857729722, "epoch": 0.041439896400258996, "grad_norm": 0.12456014007329941, "learning_rate": 2e-07, "loss": 0.0264, "step": 444 }, { "clip_ratio/high_max": 0.0017974613838305231, "clip_ratio/high_mean": 0.0006904109686729498, "clip_ratio/low_mean": 0.0006568872586285579, "clip_ratio/low_min": 6.457977042373386e-05, "clip_ratio/region_mean": 0.0013472982100211084, "epoch": 0.04153322950025958, "grad_norm": 0.13176009058952332, "learning_rate": 2e-07, "loss": 0.0288, "step": 445 }, { "clip_ratio/high_max": 0.0018141597138310317, "clip_ratio/high_mean": 0.0007248572092066752, "clip_ratio/low_mean": 0.0005698010827472899, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012946583119628485, "epoch": 0.04162656260026017, "grad_norm": 0.11641500145196915, "learning_rate": 2e-07, "loss": 0.0187, "step": 446 }, { "clip_ratio/high_max": 0.0018931381491711363, "clip_ratio/high_mean": 0.0007787392441969132, "clip_ratio/low_mean": 0.0006778724127798341, "clip_ratio/low_min": 2.065205444523599e-05, "clip_ratio/region_mean": 0.0014566116660716943, "epoch": 0.04171989570026075, "grad_norm": 0.12853413820266724, "learning_rate": 2e-07, "loss": 0.0346, "step": 447 }, { "clip_ratio/high_max": 0.0016353964056179393, "clip_ratio/high_mean": 0.0007090633152984083, "clip_ratio/low_mean": 0.0006230837061593775, "clip_ratio/low_min": 5.759231135016307e-05, "clip_ratio/region_mean": 0.001332147036009701, "epoch": 0.04181322880026133, "grad_norm": 0.12481112778186798, "learning_rate": 2e-07, "loss": 0.0218, "step": 448 }, { "clip_ratio/high_max": 0.0019147114580846392, "clip_ratio/high_mean": 0.0007151903282647254, "clip_ratio/low_mean": 0.0006343818236018706, "clip_ratio/low_min": 1.3037129974691197e-05, "clip_ratio/region_mean": 0.0013495721541403327, "epoch": 0.041906561900261916, "grad_norm": 0.13248465955257416, "learning_rate": 2e-07, "loss": 0.0446, "step": 449 }, { "clip_ratio/high_max": 0.0018984083799296059, "clip_ratio/high_mean": 0.0008438753120572073, "clip_ratio/low_mean": 0.0006261078033276135, "clip_ratio/low_min": 5.055108886153903e-05, "clip_ratio/region_mean": 0.0014699831190227997, "epoch": 0.0419998950002625, "grad_norm": 0.12904009222984314, "learning_rate": 2e-07, "loss": 0.0165, "step": 450 }, { "clip_ratio/high_max": 0.002016342041315511, "clip_ratio/high_mean": 0.0008539397804270266, "clip_ratio/low_mean": 0.0005726678100472782, "clip_ratio/low_min": 1.2235708709340543e-05, "clip_ratio/region_mean": 0.001426607632311061, "epoch": 0.042093228100263086, "grad_norm": 0.13066567480564117, "learning_rate": 2e-07, "loss": 0.062, "step": 451 }, { "clip_ratio/high_max": 0.001846640014264267, "clip_ratio/high_mean": 0.0007470063410437433, "clip_ratio/low_mean": 0.0005799997143185465, "clip_ratio/low_min": 1.40860938699916e-05, "clip_ratio/region_mean": 0.001327006033534417, "epoch": 0.042186561200263664, "grad_norm": 0.19789563119411469, "learning_rate": 2e-07, "loss": 0.0167, "step": 452 }, { "clip_ratio/high_max": 0.0017684752237983048, "clip_ratio/high_mean": 0.0006986524131207261, "clip_ratio/low_mean": 0.0005649905106110964, "clip_ratio/low_min": 1.4070238648855593e-05, "clip_ratio/region_mean": 0.0012636429491976742, "epoch": 0.04227989430026425, "grad_norm": 0.12359024584293365, "learning_rate": 2e-07, "loss": 0.0069, "step": 453 }, { "clip_ratio/high_max": 0.001910876697365893, "clip_ratio/high_mean": 0.0007704495492362184, "clip_ratio/low_mean": 0.0005897570154047571, "clip_ratio/low_min": 5.442161591417971e-05, "clip_ratio/region_mean": 0.001360206566459965, "epoch": 0.042373227400264835, "grad_norm": 0.12701749801635742, "learning_rate": 2e-07, "loss": 0.0216, "step": 454 }, { "clip_ratio/high_max": 0.0018397683343209792, "clip_ratio/high_mean": 0.0008351807409781031, "clip_ratio/low_mean": 0.0006753395973646548, "clip_ratio/low_min": 4.957378860126482e-05, "clip_ratio/region_mean": 0.0015105203710845672, "epoch": 0.04246656050026541, "grad_norm": 0.12606306374073029, "learning_rate": 2e-07, "loss": -0.0086, "step": 455 }, { "clip_ratio/high_max": 0.00195910360707785, "clip_ratio/high_mean": 0.0007453814087057253, "clip_ratio/low_mean": 0.0006349608993332367, "clip_ratio/low_min": 3.224107786081731e-05, "clip_ratio/region_mean": 0.001380342298944015, "epoch": 0.042559893600266, "grad_norm": 0.13856789469718933, "learning_rate": 2e-07, "loss": 0.0234, "step": 456 }, { "clip_ratio/high_max": 0.0018492992930987384, "clip_ratio/high_mean": 0.0007245297456393018, "clip_ratio/low_mean": 0.0006384830485330895, "clip_ratio/low_min": 1.9154153051204048e-05, "clip_ratio/region_mean": 0.0013630127905344125, "epoch": 0.04265322670026658, "grad_norm": 0.1254524290561676, "learning_rate": 2e-07, "loss": 0.052, "step": 457 }, { "clip_ratio/high_max": 0.001859706037066644, "clip_ratio/high_mean": 0.0007021908331807936, "clip_ratio/low_mean": 0.0005660332808474777, "clip_ratio/low_min": 2.7907029107154813e-05, "clip_ratio/region_mean": 0.0012682241467700806, "epoch": 0.04274655980026717, "grad_norm": 0.11635055392980576, "learning_rate": 2e-07, "loss": 0.0508, "step": 458 }, { "clip_ratio/high_max": 0.001968060838407837, "clip_ratio/high_mean": 0.0008553091047360795, "clip_ratio/low_mean": 0.0005853095835846034, "clip_ratio/low_min": 8.647306549391942e-05, "clip_ratio/region_mean": 0.0014406187219719868, "epoch": 0.04283989290026775, "grad_norm": 0.12051598727703094, "learning_rate": 2e-07, "loss": 0.0097, "step": 459 }, { "clip_ratio/high_max": 0.0020751801930600777, "clip_ratio/high_mean": 0.0008141606776916888, "clip_ratio/low_mean": 0.0006304424659901997, "clip_ratio/low_min": 7.979600104590645e-05, "clip_ratio/region_mean": 0.0014446031200350262, "epoch": 0.04293322600026833, "grad_norm": 0.12642183899879456, "learning_rate": 2e-07, "loss": 0.077, "step": 460 }, { "clip_ratio/high_max": 0.001788652065442875, "clip_ratio/high_mean": 0.0007618489325977862, "clip_ratio/low_mean": 0.0006473122775787488, "clip_ratio/low_min": 6.924461831658846e-05, "clip_ratio/region_mean": 0.001409161188348662, "epoch": 0.04302655910026892, "grad_norm": 0.13254590332508087, "learning_rate": 2e-07, "loss": 0.0454, "step": 461 }, { "clip_ratio/high_max": 0.0018917141096608248, "clip_ratio/high_mean": 0.0007487091352231801, "clip_ratio/low_mean": 0.0006078190617699875, "clip_ratio/low_min": 2.7232941192778526e-05, "clip_ratio/region_mean": 0.0013565281915361993, "epoch": 0.0431198922002695, "grad_norm": 0.13048475980758667, "learning_rate": 2e-07, "loss": 0.0722, "step": 462 }, { "clip_ratio/high_max": 0.001978902240807656, "clip_ratio/high_mean": 0.0008076635513134534, "clip_ratio/low_mean": 0.0006850262207080959, "clip_ratio/low_min": 3.447567814873764e-05, "clip_ratio/region_mean": 0.0014926898074918427, "epoch": 0.04321322530027008, "grad_norm": 0.12645657360553741, "learning_rate": 2e-07, "loss": 0.0475, "step": 463 }, { "clip_ratio/high_max": 0.0020373752449813765, "clip_ratio/high_mean": 0.0008047998453548644, "clip_ratio/low_mean": 0.0005875231472600717, "clip_ratio/low_min": 1.2646702089114115e-05, "clip_ratio/region_mean": 0.0013923229998908937, "epoch": 0.043306558400270666, "grad_norm": 0.11570800095796585, "learning_rate": 2e-07, "loss": 0.0399, "step": 464 }, { "clip_ratio/high_max": 0.0018375962536083534, "clip_ratio/high_mean": 0.0007714734183537075, "clip_ratio/low_mean": 0.0006205290101206629, "clip_ratio/low_min": 3.364316944498569e-05, "clip_ratio/region_mean": 0.0013920024503022432, "epoch": 0.04339989150027125, "grad_norm": 0.12506726384162903, "learning_rate": 2e-07, "loss": 0.0256, "step": 465 }, { "clip_ratio/high_max": 0.0016282618453260511, "clip_ratio/high_mean": 0.000691588458721526, "clip_ratio/low_mean": 0.0005914965258853044, "clip_ratio/low_min": 1.9778481146204285e-05, "clip_ratio/region_mean": 0.0012830849846068304, "epoch": 0.04349322460027183, "grad_norm": 0.1287083625793457, "learning_rate": 2e-07, "loss": 0.0274, "step": 466 }, { "clip_ratio/high_max": 0.0023632442971575074, "clip_ratio/high_mean": 0.0010246125129924621, "clip_ratio/low_mean": 0.0007511674048146233, "clip_ratio/low_min": 5.4476713557960466e-05, "clip_ratio/region_mean": 0.0017757799214450642, "epoch": 0.043586557700272414, "grad_norm": 0.1350337713956833, "learning_rate": 2e-07, "loss": 0.0134, "step": 467 }, { "clip_ratio/high_max": 0.001926583114254754, "clip_ratio/high_mean": 0.0008448069820587989, "clip_ratio/low_mean": 0.0006421939779102104, "clip_ratio/low_min": 5.552802758757025e-05, "clip_ratio/region_mean": 0.0014870009654259775, "epoch": 0.043679890800273, "grad_norm": 0.12916550040245056, "learning_rate": 2e-07, "loss": 0.0112, "step": 468 }, { "clip_ratio/high_max": 0.0019348768000782002, "clip_ratio/high_mean": 0.0007833419513190165, "clip_ratio/low_mean": 0.0006311959768936504, "clip_ratio/low_min": 3.614551133068744e-05, "clip_ratio/region_mean": 0.0014145379027468152, "epoch": 0.043773223900273585, "grad_norm": 0.13057181239128113, "learning_rate": 2e-07, "loss": 0.0375, "step": 469 }, { "clip_ratio/high_max": 0.0017748103164194617, "clip_ratio/high_mean": 0.0007297284410014981, "clip_ratio/low_mean": 0.0005610586258626427, "clip_ratio/low_min": 5.169616633793339e-05, "clip_ratio/region_mean": 0.0012907870914204977, "epoch": 0.04386655700027416, "grad_norm": 0.2490302473306656, "learning_rate": 2e-07, "loss": 0.0355, "step": 470 }, { "clip_ratio/high_max": 0.002000537184358109, "clip_ratio/high_mean": 0.0008319806511281058, "clip_ratio/low_mean": 0.0005876981786059332, "clip_ratio/low_min": 6.802537882322213e-05, "clip_ratio/region_mean": 0.0014196788433764596, "epoch": 0.04395989010027475, "grad_norm": 0.13387982547283173, "learning_rate": 2e-07, "loss": 0.0093, "step": 471 }, { "clip_ratio/high_max": 0.0017401247241650708, "clip_ratio/high_mean": 0.0006841897302365396, "clip_ratio/low_mean": 0.0006540201975440141, "clip_ratio/low_min": 8.64262174218311e-05, "clip_ratio/region_mean": 0.001338209902314702, "epoch": 0.044053223200275334, "grad_norm": 0.11587109416723251, "learning_rate": 2e-07, "loss": 0.0266, "step": 472 }, { "clip_ratio/high_max": 0.0018274358517373912, "clip_ratio/high_mean": 0.0007513132659369148, "clip_ratio/low_mean": 0.0006456970659201033, "clip_ratio/low_min": 8.223280292440904e-05, "clip_ratio/region_mean": 0.0013970103136671241, "epoch": 0.04414655630027592, "grad_norm": 0.14696019887924194, "learning_rate": 2e-07, "loss": 0.0438, "step": 473 }, { "clip_ratio/high_max": 0.001910316183057148, "clip_ratio/high_mean": 0.0007853966380935162, "clip_ratio/low_mean": 0.0006032477294866112, "clip_ratio/low_min": 5.201619296713034e-05, "clip_ratio/region_mean": 0.0013886443375668023, "epoch": 0.0442398894002765, "grad_norm": 0.12520448863506317, "learning_rate": 2e-07, "loss": 0.0367, "step": 474 }, { "clip_ratio/high_max": 0.001820290541218128, "clip_ratio/high_mean": 0.0007888156069384422, "clip_ratio/low_mean": 0.0006824490264989436, "clip_ratio/low_min": 7.982896750036161e-05, "clip_ratio/region_mean": 0.0014712646589032374, "epoch": 0.04433322250027708, "grad_norm": 0.12633632123470306, "learning_rate": 2e-07, "loss": 0.0236, "step": 475 }, { "clip_ratio/high_max": 0.002008898776693968, "clip_ratio/high_mean": 0.0007447817861248041, "clip_ratio/low_mean": 0.0005726673516619485, "clip_ratio/low_min": 7.860851383156842e-05, "clip_ratio/region_mean": 0.0013174491250538267, "epoch": 0.04442655560027767, "grad_norm": 0.1192314401268959, "learning_rate": 2e-07, "loss": 0.0293, "step": 476 }, { "clip_ratio/high_max": 0.0023636059631826356, "clip_ratio/high_mean": 0.0008198912291845772, "clip_ratio/low_mean": 0.0007085239740263205, "clip_ratio/low_min": 6.083705375203863e-05, "clip_ratio/region_mean": 0.0015284151850210037, "epoch": 0.04451988870027825, "grad_norm": 0.13008737564086914, "learning_rate": 2e-07, "loss": 0.0351, "step": 477 }, { "clip_ratio/high_max": 0.0018168637943745125, "clip_ratio/high_mean": 0.0007782192096783547, "clip_ratio/low_mean": 0.000637017206827295, "clip_ratio/low_min": 4.74719518024358e-05, "clip_ratio/region_mean": 0.001415236427419586, "epoch": 0.04461322180027883, "grad_norm": 0.11413247138261795, "learning_rate": 2e-07, "loss": 0.0258, "step": 478 }, { "clip_ratio/high_max": 0.0018756001045403536, "clip_ratio/high_mean": 0.0008060260552156251, "clip_ratio/low_mean": 0.0006317216266324976, "clip_ratio/low_min": 2.2152676137920935e-05, "clip_ratio/region_mean": 0.00143774762909743, "epoch": 0.044706554900279416, "grad_norm": 0.11632099747657776, "learning_rate": 2e-07, "loss": 0.0221, "step": 479 }, { "clip_ratio/high_max": 0.0020333194552222267, "clip_ratio/high_mean": 0.0008790843785391189, "clip_ratio/low_mean": 0.0005179378913453547, "clip_ratio/low_min": 8.380262443097308e-06, "clip_ratio/region_mean": 0.0013970222571515478, "epoch": 0.04479988800028, "grad_norm": 0.14135634899139404, "learning_rate": 2e-07, "loss": 0.0254, "step": 480 }, { "clip_ratio/high_max": 0.0018009403283940628, "clip_ratio/high_mean": 0.0007572115646325983, "clip_ratio/low_mean": 0.0006490078012575395, "clip_ratio/low_min": 2.005515943892533e-05, "clip_ratio/region_mean": 0.0014062193477002438, "epoch": 0.04489322110028058, "grad_norm": 0.13307270407676697, "learning_rate": 2e-07, "loss": 0.0343, "step": 481 }, { "clip_ratio/high_max": 0.0019862474837282207, "clip_ratio/high_mean": 0.0007347618593485095, "clip_ratio/low_mean": 0.0004976794734830037, "clip_ratio/low_min": 3.0547765163646545e-05, "clip_ratio/region_mean": 0.0012324413437454496, "epoch": 0.044986554200281165, "grad_norm": 0.13349904119968414, "learning_rate": 2e-07, "loss": 0.0046, "step": 482 }, { "clip_ratio/high_max": 0.001969147240743041, "clip_ratio/high_mean": 0.0008334991234733025, "clip_ratio/low_mean": 0.0006813415075157536, "clip_ratio/low_min": 2.9812025786668528e-05, "clip_ratio/region_mean": 0.0015148406018852256, "epoch": 0.04507988730028175, "grad_norm": 0.13404598832130432, "learning_rate": 2e-07, "loss": 0.0557, "step": 483 }, { "clip_ratio/high_max": 0.002128822870872682, "clip_ratio/high_mean": 0.0009082065134862205, "clip_ratio/low_mean": 0.0006188216939335689, "clip_ratio/low_min": 2.636007047840394e-05, "clip_ratio/region_mean": 0.0015270282201527152, "epoch": 0.045173220400282335, "grad_norm": 0.13563790917396545, "learning_rate": 2e-07, "loss": -0.0206, "step": 484 }, { "clip_ratio/high_max": 0.0021564526614383794, "clip_ratio/high_mean": 0.0008959547922131605, "clip_ratio/low_mean": 0.0006271830134210177, "clip_ratio/low_min": 6.0532453971973155e-05, "clip_ratio/region_mean": 0.0015231378238240723, "epoch": 0.04526655350028291, "grad_norm": 0.1279647946357727, "learning_rate": 2e-07, "loss": -0.0132, "step": 485 }, { "clip_ratio/high_max": 0.002161427135433769, "clip_ratio/high_mean": 0.0009529424387437757, "clip_ratio/low_mean": 0.0005686108588633942, "clip_ratio/low_min": 7.374899632850429e-05, "clip_ratio/region_mean": 0.0015215532948786858, "epoch": 0.0453598866002835, "grad_norm": 0.1155356913805008, "learning_rate": 2e-07, "loss": 0.005, "step": 486 }, { "clip_ratio/high_max": 0.002026515157922404, "clip_ratio/high_mean": 0.0008240645165642491, "clip_ratio/low_mean": 0.0005802326213597553, "clip_ratio/low_min": 2.812505954352673e-05, "clip_ratio/region_mean": 0.0014042971488379408, "epoch": 0.045453219700284084, "grad_norm": 0.12698383629322052, "learning_rate": 2e-07, "loss": 0.0078, "step": 487 }, { "clip_ratio/high_max": 0.001922994626511354, "clip_ratio/high_mean": 0.0007929826406325446, "clip_ratio/low_mean": 0.0006118523224358796, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014048349985387176, "epoch": 0.04554655280028467, "grad_norm": 0.12232217937707901, "learning_rate": 2e-07, "loss": 0.0029, "step": 488 }, { "clip_ratio/high_max": 0.0020258961121726315, "clip_ratio/high_mean": 0.0008030399530980503, "clip_ratio/low_mean": 0.0007230992250697454, "clip_ratio/low_min": 8.407700715906685e-05, "clip_ratio/region_mean": 0.001526139210909605, "epoch": 0.04563988590028525, "grad_norm": 0.13492295145988464, "learning_rate": 2e-07, "loss": 0.068, "step": 489 }, { "clip_ratio/high_max": 0.0023484741104766726, "clip_ratio/high_mean": 0.000894650202099001, "clip_ratio/low_mean": 0.0007108736572263297, "clip_ratio/low_min": 7.80571426730603e-05, "clip_ratio/region_mean": 0.0016055238666012883, "epoch": 0.04573321900028583, "grad_norm": 0.13307319581508636, "learning_rate": 2e-07, "loss": 0.0574, "step": 490 }, { "clip_ratio/high_max": 0.0022369191101461183, "clip_ratio/high_mean": 0.0009330135417258134, "clip_ratio/low_mean": 0.0006291696881817188, "clip_ratio/low_min": 7.408109195239376e-05, "clip_ratio/region_mean": 0.0015621832353645004, "epoch": 0.04582655210028642, "grad_norm": 0.14723420143127441, "learning_rate": 2e-07, "loss": -0.009, "step": 491 }, { "clip_ratio/high_max": 0.0015179910042206757, "clip_ratio/high_mean": 0.0006145557508716593, "clip_ratio/low_mean": 0.0006580953631782904, "clip_ratio/low_min": 7.03976347722346e-05, "clip_ratio/region_mean": 0.0012726511013170239, "epoch": 0.045919885200286996, "grad_norm": 0.11829836666584015, "learning_rate": 2e-07, "loss": 0.0836, "step": 492 }, { "clip_ratio/high_max": 0.001707770748907933, "clip_ratio/high_mean": 0.0007141609257814707, "clip_ratio/low_mean": 0.0005698990962628159, "clip_ratio/low_min": 2.446541748213349e-05, "clip_ratio/region_mean": 0.001284059999306919, "epoch": 0.04601321830028758, "grad_norm": 0.11830785125494003, "learning_rate": 2e-07, "loss": 0.0356, "step": 493 }, { "clip_ratio/high_max": 0.0018666433679754846, "clip_ratio/high_mean": 0.0007922782351670321, "clip_ratio/low_mean": 0.0007456970361090498, "clip_ratio/low_min": 3.9176505197247025e-05, "clip_ratio/region_mean": 0.0015379752512671985, "epoch": 0.046106551400288166, "grad_norm": 0.12381519377231598, "learning_rate": 2e-07, "loss": 0.0311, "step": 494 }, { "clip_ratio/high_max": 0.001854005407949444, "clip_ratio/high_mean": 0.0008238007285399362, "clip_ratio/low_mean": 0.000575413012484205, "clip_ratio/low_min": 6.497345384559594e-05, "clip_ratio/region_mean": 0.0013992137064633425, "epoch": 0.04619988450028875, "grad_norm": 0.12948384881019592, "learning_rate": 2e-07, "loss": -0.0019, "step": 495 }, { "clip_ratio/high_max": 0.0020317581675044494, "clip_ratio/high_mean": 0.0007746997662252397, "clip_ratio/low_mean": 0.0006825306627433747, "clip_ratio/low_min": 4.632042328012176e-05, "clip_ratio/region_mean": 0.0014572304062312469, "epoch": 0.04629321760028933, "grad_norm": 0.13935619592666626, "learning_rate": 2e-07, "loss": 0.0482, "step": 496 }, { "clip_ratio/high_max": 0.002130854980350705, "clip_ratio/high_mean": 0.0008616955492470879, "clip_ratio/low_mean": 0.0006189533505676081, "clip_ratio/low_min": 2.6722931579570286e-05, "clip_ratio/region_mean": 0.0014806488834437914, "epoch": 0.046386550700289915, "grad_norm": 0.12275732308626175, "learning_rate": 2e-07, "loss": 0.0265, "step": 497 }, { "clip_ratio/high_max": 0.0020626810219255276, "clip_ratio/high_mean": 0.0007578172535431804, "clip_ratio/low_mean": 0.0006987713823036756, "clip_ratio/low_min": 2.6179177439189516e-05, "clip_ratio/region_mean": 0.0014565886194759514, "epoch": 0.0464798838002905, "grad_norm": 0.13679419457912445, "learning_rate": 2e-07, "loss": 0.0735, "step": 498 }, { "clip_ratio/high_max": 0.0018492375820642337, "clip_ratio/high_mean": 0.0007298536493181018, "clip_ratio/low_mean": 0.0006207284131960478, "clip_ratio/low_min": 5.88807006351999e-05, "clip_ratio/region_mean": 0.0013505820606951602, "epoch": 0.046573216900291085, "grad_norm": 0.1103619858622551, "learning_rate": 2e-07, "loss": 0.0195, "step": 499 }, { "clip_ratio/high_max": 0.0019186159843229689, "clip_ratio/high_mean": 0.0007655673234694405, "clip_ratio/low_mean": 0.0005962698664916388, "clip_ratio/low_min": 2.233069153589895e-05, "clip_ratio/region_mean": 0.0013618372031487525, "epoch": 0.046666550000291664, "grad_norm": 0.13140170276165009, "learning_rate": 2e-07, "loss": -0.0066, "step": 500 }, { "clip_ratio/high_max": 0.002101513062370941, "clip_ratio/high_mean": 0.0008340467520611128, "clip_ratio/low_mean": 0.0007634428402525373, "clip_ratio/low_min": 7.973694300744683e-05, "clip_ratio/region_mean": 0.0015974895904946607, "epoch": 0.04675988310029225, "grad_norm": 0.15203750133514404, "learning_rate": 2e-07, "loss": 0.0497, "step": 501 }, { "clip_ratio/high_max": 0.001876110654848162, "clip_ratio/high_mean": 0.0007428601875290042, "clip_ratio/low_mean": 0.0006086422536100144, "clip_ratio/low_min": 2.500362643331755e-05, "clip_ratio/region_mean": 0.0013515024475054815, "epoch": 0.046853216200292834, "grad_norm": 0.12534724175930023, "learning_rate": 2e-07, "loss": 0.0567, "step": 502 }, { "clip_ratio/high_max": 0.0019188530786777847, "clip_ratio/high_mean": 0.0008121489190671127, "clip_ratio/low_mean": 0.0007653781794942915, "clip_ratio/low_min": 3.324529461679049e-05, "clip_ratio/region_mean": 0.0015775270876474679, "epoch": 0.04694654930029342, "grad_norm": 0.13307712972164154, "learning_rate": 2e-07, "loss": 0.0409, "step": 503 }, { "clip_ratio/high_max": 0.0020770316987182014, "clip_ratio/high_mean": 0.0008291678932437208, "clip_ratio/low_mean": 0.0006928369293746073, "clip_ratio/low_min": 7.08294228388695e-05, "clip_ratio/region_mean": 0.0015220048007904552, "epoch": 0.047039882400294, "grad_norm": 0.14268989861011505, "learning_rate": 2e-07, "loss": 0.0508, "step": 504 }, { "clip_ratio/high_max": 0.001973775557416957, "clip_ratio/high_mean": 0.0007764970814605476, "clip_ratio/low_mean": 0.0005938445865467656, "clip_ratio/low_min": 1.6344141840818338e-05, "clip_ratio/region_mean": 0.0013703416734642815, "epoch": 0.04713321550029458, "grad_norm": 0.12510068714618683, "learning_rate": 2e-07, "loss": 0.0259, "step": 505 }, { "clip_ratio/high_max": 0.0017966504965443164, "clip_ratio/high_mean": 0.0007113698375178501, "clip_ratio/low_mean": 0.0006904272031533765, "clip_ratio/low_min": 3.632706921052886e-05, "clip_ratio/region_mean": 0.0014017970315762796, "epoch": 0.04722654860029517, "grad_norm": 0.12421339750289917, "learning_rate": 2e-07, "loss": 0.0028, "step": 506 }, { "clip_ratio/high_max": 0.0018278961688338313, "clip_ratio/high_mean": 0.0008187573967006756, "clip_ratio/low_mean": 0.0005875160941286595, "clip_ratio/low_min": 2.2852533220429905e-05, "clip_ratio/region_mean": 0.0014062735026527662, "epoch": 0.047319881700295746, "grad_norm": 0.13031218945980072, "learning_rate": 2e-07, "loss": 0.0302, "step": 507 }, { "clip_ratio/high_max": 0.002324550488992827, "clip_ratio/high_mean": 0.0009599067179806298, "clip_ratio/low_mean": 0.0007103462903614854, "clip_ratio/low_min": 6.337150261970237e-05, "clip_ratio/region_mean": 0.001670253011980094, "epoch": 0.04741321480029633, "grad_norm": 0.1300336718559265, "learning_rate": 2e-07, "loss": 0.0031, "step": 508 }, { "clip_ratio/high_max": 0.001830149587476626, "clip_ratio/high_mean": 0.0007993463223101571, "clip_ratio/low_mean": 0.0006755232680006884, "clip_ratio/low_min": 5.068012615083717e-05, "clip_ratio/region_mean": 0.0014748695975868031, "epoch": 0.04750654790029692, "grad_norm": 0.13169723749160767, "learning_rate": 2e-07, "loss": 0.032, "step": 509 }, { "clip_ratio/high_max": 0.002151258486264851, "clip_ratio/high_mean": 0.0009018862911034375, "clip_ratio/low_mean": 0.0007169297323343926, "clip_ratio/low_min": 7.104249743861146e-05, "clip_ratio/region_mean": 0.0016188160407182295, "epoch": 0.0475998810002975, "grad_norm": 0.13975879549980164, "learning_rate": 2e-07, "loss": 0.0441, "step": 510 }, { "clip_ratio/high_max": 0.0020038730472151656, "clip_ratio/high_mean": 0.0007748464831820456, "clip_ratio/low_mean": 0.0005797157123197394, "clip_ratio/low_min": 3.269920216553146e-05, "clip_ratio/region_mean": 0.001354562209598953, "epoch": 0.04769321410029808, "grad_norm": 0.1201063022017479, "learning_rate": 2e-07, "loss": 0.0087, "step": 511 }, { "clip_ratio/high_max": 0.0017223360810021404, "clip_ratio/high_mean": 0.0007924489473225549, "clip_ratio/low_mean": 0.0007499489820474992, "clip_ratio/low_min": 4.7501595872745384e-05, "clip_ratio/region_mean": 0.0015423979239130858, "epoch": 0.047786547200298665, "grad_norm": 0.1441613733768463, "learning_rate": 2e-07, "loss": 0.0207, "step": 512 }, { "clip_ratio/high_max": 0.0016911438033275772, "clip_ratio/high_mean": 0.0006324619535007514, "clip_ratio/low_mean": 0.000622469604422804, "clip_ratio/low_min": 4.368683403299656e-05, "clip_ratio/region_mean": 0.0012549315360956825, "completions/clipped_ratio": 0.018702915736607095, "completions/max_length": 4096.0, "completions/max_terminated_length": 4090.0, "completions/mean_length": 660.1011962890625, "completions/mean_terminated_length": 594.6150512695312, "completions/min_length": 83.0, "completions/min_terminated_length": 83.0, "epoch": 0.04787988030029925, "grad_norm": 0.11298026889562607, "learning_rate": 2e-07, "loss": 0.0524, "num_tokens": 416559796.0, "reward": 0.5731375813484192, "reward_std": 0.19086571037769318, "rewards/simpleverify_reward/mean": 0.5731375813484192, "rewards/simpleverify_reward/std": 0.49462413787841797, "step": 513 }, { "clip_ratio/high_max": 0.0019055615885008592, "clip_ratio/high_mean": 0.0007541301165474579, "clip_ratio/low_mean": 0.0005483079567056848, "clip_ratio/low_min": 1.864558544184547e-05, "clip_ratio/region_mean": 0.0013024380714341532, "epoch": 0.047973213400299836, "grad_norm": 0.12759138643741608, "learning_rate": 2e-07, "loss": 0.0207, "step": 514 }, { "clip_ratio/high_max": 0.0016372005666198675, "clip_ratio/high_mean": 0.000622099862084724, "clip_ratio/low_mean": 0.00048604997391521465, "clip_ratio/low_min": 1.1920656106667593e-05, "clip_ratio/region_mean": 0.0011081498632847797, "epoch": 0.048066546500300414, "grad_norm": 0.1298171728849411, "learning_rate": 2e-07, "loss": 0.0416, "step": 515 }, { "clip_ratio/high_max": 0.0017211995655088685, "clip_ratio/high_mean": 0.0007311028307412926, "clip_ratio/low_mean": 0.000658808905427577, "clip_ratio/low_min": 6.318408759398153e-05, "clip_ratio/region_mean": 0.0013899117293476593, "epoch": 0.048159879600301, "grad_norm": 0.13067112863063812, "learning_rate": 2e-07, "loss": 0.028, "step": 516 }, { "clip_ratio/high_max": 0.0015548435003438499, "clip_ratio/high_mean": 0.0006100687151047168, "clip_ratio/low_mean": 0.00044122221970610553, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010512909284443595, "epoch": 0.048253212700301584, "grad_norm": 0.12154652178287506, "learning_rate": 2e-07, "loss": 0.0501, "step": 517 }, { "clip_ratio/high_max": 0.001699821645161137, "clip_ratio/high_mean": 0.000649929081191658, "clip_ratio/low_mean": 0.0006470609423558926, "clip_ratio/low_min": 8.591075311414897e-05, "clip_ratio/region_mean": 0.0012969900053576566, "epoch": 0.04834654580030216, "grad_norm": 0.12324988096952438, "learning_rate": 2e-07, "loss": 0.0241, "step": 518 }, { "clip_ratio/high_max": 0.001591528758581262, "clip_ratio/high_mean": 0.0006743817330061574, "clip_ratio/low_mean": 0.0005555367597480654, "clip_ratio/low_min": 2.5526946046738885e-05, "clip_ratio/region_mean": 0.0012299184745643288, "epoch": 0.04843987890030275, "grad_norm": 0.13422206044197083, "learning_rate": 2e-07, "loss": -0.0069, "step": 519 }, { "clip_ratio/high_max": 0.0019250004115747288, "clip_ratio/high_mean": 0.0007246539571497124, "clip_ratio/low_mean": 0.0005635598245135043, "clip_ratio/low_min": 2.576258066255832e-05, "clip_ratio/region_mean": 0.0012882137962151319, "epoch": 0.04853321200030333, "grad_norm": 0.12279489636421204, "learning_rate": 2e-07, "loss": 0.0115, "step": 520 }, { "clip_ratio/high_max": 0.0017109660293499473, "clip_ratio/high_mean": 0.0006368041376845213, "clip_ratio/low_mean": 0.0004745522910525324, "clip_ratio/low_min": 1.6133210465341108e-05, "clip_ratio/region_mean": 0.0011113564323750325, "epoch": 0.04862654510030392, "grad_norm": 0.11207243800163269, "learning_rate": 2e-07, "loss": 0.0104, "step": 521 }, { "clip_ratio/high_max": 0.0017926863001775928, "clip_ratio/high_mean": 0.0007263516181410523, "clip_ratio/low_mean": 0.000540116906449839, "clip_ratio/low_min": 3.0086114747973625e-05, "clip_ratio/region_mean": 0.0012664685127674602, "epoch": 0.048719878200304496, "grad_norm": 0.11876726895570755, "learning_rate": 2e-07, "loss": 0.0663, "step": 522 }, { "clip_ratio/high_max": 0.0017169080274470616, "clip_ratio/high_mean": 0.0006668853802693775, "clip_ratio/low_mean": 0.0006589068561879685, "clip_ratio/low_min": 0.00010109489358001156, "clip_ratio/region_mean": 0.0013257922510092612, "epoch": 0.04881321130030508, "grad_norm": 0.12012317776679993, "learning_rate": 2e-07, "loss": 0.0462, "step": 523 }, { "clip_ratio/high_max": 0.0015614154617651366, "clip_ratio/high_mean": 0.0006649482511420501, "clip_ratio/low_mean": 0.0006062602205929579, "clip_ratio/low_min": 2.778395173663739e-05, "clip_ratio/region_mean": 0.0012712084680970293, "epoch": 0.04890654440030567, "grad_norm": 0.12514448165893555, "learning_rate": 2e-07, "loss": 0.0412, "step": 524 }, { "clip_ratio/high_max": 0.0018573427332739811, "clip_ratio/high_mean": 0.0006457889085140778, "clip_ratio/low_mean": 0.0006450058335758513, "clip_ratio/low_min": 6.813176878495142e-05, "clip_ratio/region_mean": 0.0012907947530038655, "epoch": 0.04899987750030625, "grad_norm": 0.1253548413515091, "learning_rate": 2e-07, "loss": 0.0747, "step": 525 }, { "clip_ratio/high_max": 0.0014719585560669657, "clip_ratio/high_mean": 0.000570067892113002, "clip_ratio/low_mean": 0.0005554877479880815, "clip_ratio/low_min": 7.219591225293698e-06, "clip_ratio/region_mean": 0.0011255556055402849, "epoch": 0.04909321060030683, "grad_norm": 0.10819262266159058, "learning_rate": 2e-07, "loss": 0.026, "step": 526 }, { "clip_ratio/high_max": 0.0014960130320105236, "clip_ratio/high_mean": 0.0005884772908757441, "clip_ratio/low_mean": 0.0005537784272746649, "clip_ratio/low_min": 2.4256875803985167e-05, "clip_ratio/region_mean": 0.0011422557108744513, "epoch": 0.049186543700307415, "grad_norm": 0.10986540466547012, "learning_rate": 2e-07, "loss": 0.0513, "step": 527 }, { "clip_ratio/high_max": 0.001744572651659837, "clip_ratio/high_mean": 0.0006841859394626226, "clip_ratio/low_mean": 0.0005065137061137648, "clip_ratio/low_min": 1.9027771941182436e-05, "clip_ratio/region_mean": 0.0011906996624020394, "epoch": 0.049279876800308, "grad_norm": 0.11253663152456284, "learning_rate": 2e-07, "loss": 0.0474, "step": 528 }, { "clip_ratio/high_max": 0.0016298773407470435, "clip_ratio/high_mean": 0.0006067223093850771, "clip_ratio/low_mean": 0.0004987436504961806, "clip_ratio/low_min": 6.978742749197409e-05, "clip_ratio/region_mean": 0.001105465940781869, "epoch": 0.049373209900308586, "grad_norm": 0.11536478996276855, "learning_rate": 2e-07, "loss": 0.027, "step": 529 }, { "clip_ratio/high_max": 0.0018762354884529486, "clip_ratio/high_mean": 0.0006536356813739985, "clip_ratio/low_mean": 0.00045648969808098627, "clip_ratio/low_min": 1.2295888154767454e-05, "clip_ratio/region_mean": 0.0011101254021923523, "epoch": 0.049466543000309164, "grad_norm": 0.10952523350715637, "learning_rate": 2e-07, "loss": 0.0148, "step": 530 }, { "clip_ratio/high_max": 0.0016240243530774023, "clip_ratio/high_mean": 0.0005951501116214786, "clip_ratio/low_mean": 0.0005810797229059972, "clip_ratio/low_min": 2.2846100819151616e-05, "clip_ratio/region_mean": 0.0011762298381654546, "epoch": 0.04955987610030975, "grad_norm": 0.13476504385471344, "learning_rate": 2e-07, "loss": 0.0776, "step": 531 }, { "clip_ratio/high_max": 0.001935601689183386, "clip_ratio/high_mean": 0.0007952528394525871, "clip_ratio/low_mean": 0.0005942847137703211, "clip_ratio/low_min": 7.807414840499405e-05, "clip_ratio/region_mean": 0.001389537559589371, "epoch": 0.049653209200310335, "grad_norm": 0.11563518643379211, "learning_rate": 2e-07, "loss": 0.0307, "step": 532 }, { "clip_ratio/high_max": 0.002030391689913813, "clip_ratio/high_mean": 0.0007907604704087134, "clip_ratio/low_mean": 0.0005345317167666508, "clip_ratio/low_min": 2.198042784584686e-05, "clip_ratio/region_mean": 0.0013252921962703113, "epoch": 0.04974654230031091, "grad_norm": 0.10787410289049149, "learning_rate": 2e-07, "loss": 0.046, "step": 533 }, { "clip_ratio/high_max": 0.0020212958770571277, "clip_ratio/high_mean": 0.0007454073511325987, "clip_ratio/low_mean": 0.0006577837302756961, "clip_ratio/low_min": 2.8121484319854062e-05, "clip_ratio/region_mean": 0.0014031910795893054, "epoch": 0.0498398754003115, "grad_norm": 0.1376996487379074, "learning_rate": 2e-07, "loss": 0.0364, "step": 534 }, { "clip_ratio/high_max": 0.001841231744037941, "clip_ratio/high_mean": 0.0007891688310337486, "clip_ratio/low_mean": 0.0005850360666954657, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013742048795393202, "epoch": 0.04993320850031208, "grad_norm": 0.12365517765283585, "learning_rate": 2e-07, "loss": 0.0242, "step": 535 }, { "clip_ratio/high_max": 0.0014932702761143446, "clip_ratio/high_mean": 0.0006284396331466269, "clip_ratio/low_mean": 0.0005191584123167559, "clip_ratio/low_min": 1.891498322947882e-05, "clip_ratio/region_mean": 0.0011475980827526655, "epoch": 0.05002654160031267, "grad_norm": 0.11342525482177734, "learning_rate": 2e-07, "loss": 0.0695, "step": 536 }, { "clip_ratio/high_max": 0.0017290584873990156, "clip_ratio/high_mean": 0.0006354221168294316, "clip_ratio/low_mean": 0.0006204359378898516, "clip_ratio/low_min": 4.611405347532127e-05, "clip_ratio/region_mean": 0.0012558580820041243, "epoch": 0.05011987470031325, "grad_norm": 0.12601055204868317, "learning_rate": 2e-07, "loss": 0.0645, "step": 537 }, { "clip_ratio/high_max": 0.0018686095827433746, "clip_ratio/high_mean": 0.0007850421243347228, "clip_ratio/low_mean": 0.0005586308434430975, "clip_ratio/low_min": 3.90221607631247e-05, "clip_ratio/region_mean": 0.0013436729859677143, "epoch": 0.05021320780031383, "grad_norm": 0.1223674863576889, "learning_rate": 2e-07, "loss": 0.0117, "step": 538 }, { "clip_ratio/high_max": 0.0019604140288720373, "clip_ratio/high_mean": 0.0008119478243315825, "clip_ratio/low_mean": 0.0005584951350101619, "clip_ratio/low_min": 5.867901927558705e-05, "clip_ratio/region_mean": 0.0013704429693461861, "epoch": 0.05030654090031442, "grad_norm": 0.11562816053628922, "learning_rate": 2e-07, "loss": 0.036, "step": 539 }, { "clip_ratio/high_max": 0.001785804211976938, "clip_ratio/high_mean": 0.0006332191069304827, "clip_ratio/low_mean": 0.0006834329633420566, "clip_ratio/low_min": 8.132418406603392e-05, "clip_ratio/region_mean": 0.0013166520657250658, "epoch": 0.050399874000315, "grad_norm": 0.12420156598091125, "learning_rate": 2e-07, "loss": 0.0722, "step": 540 }, { "clip_ratio/high_max": 0.0018473132804501802, "clip_ratio/high_mean": 0.0006577056992682628, "clip_ratio/low_mean": 0.0005769932586190407, "clip_ratio/low_min": 1.3372419743973296e-05, "clip_ratio/region_mean": 0.0012346989460638724, "epoch": 0.05049320710031558, "grad_norm": 0.12634967267513275, "learning_rate": 2e-07, "loss": 0.0417, "step": 541 }, { "clip_ratio/high_max": 0.0018358995475864504, "clip_ratio/high_mean": 0.0007257254765136167, "clip_ratio/low_mean": 0.0004934849885103176, "clip_ratio/low_min": 1.8340243059356e-05, "clip_ratio/region_mean": 0.0012192104513815138, "epoch": 0.050586540200316166, "grad_norm": 0.12871892750263214, "learning_rate": 2e-07, "loss": 0.0321, "step": 542 }, { "clip_ratio/high_max": 0.0021494842985703144, "clip_ratio/high_mean": 0.0007976802480698097, "clip_ratio/low_mean": 0.0006469649861173821, "clip_ratio/low_min": 2.588775987533154e-05, "clip_ratio/region_mean": 0.0014446452478296123, "epoch": 0.05067987330031675, "grad_norm": 0.12188766151666641, "learning_rate": 2e-07, "loss": 0.0186, "step": 543 }, { "clip_ratio/high_max": 0.0019197439614799805, "clip_ratio/high_mean": 0.0008248587164416676, "clip_ratio/low_mean": 0.0006082845939090475, "clip_ratio/low_min": 2.5058481696760282e-05, "clip_ratio/region_mean": 0.0014331432757899165, "epoch": 0.05077320640031733, "grad_norm": 0.12502016127109528, "learning_rate": 2e-07, "loss": 0.0155, "step": 544 }, { "clip_ratio/high_max": 0.0016668248135829344, "clip_ratio/high_mean": 0.0006620520434807986, "clip_ratio/low_mean": 0.0005633309820041177, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001225383035489358, "epoch": 0.050866539500317914, "grad_norm": 0.12126310914754868, "learning_rate": 2e-07, "loss": 0.029, "step": 545 }, { "clip_ratio/high_max": 0.0021015980728407158, "clip_ratio/high_mean": 0.0007989244022610364, "clip_ratio/low_mean": 0.000507291098983842, "clip_ratio/low_min": 3.895943063980667e-05, "clip_ratio/region_mean": 0.0013062154794170056, "epoch": 0.0509598726003185, "grad_norm": 0.122885562479496, "learning_rate": 2e-07, "loss": -0.0154, "step": 546 }, { "clip_ratio/high_max": 0.0019345288310432807, "clip_ratio/high_mean": 0.0007570865909656277, "clip_ratio/low_mean": 0.0005317979698702402, "clip_ratio/low_min": 1.387963584420504e-05, "clip_ratio/region_mean": 0.0012888845812994987, "epoch": 0.051053205700319085, "grad_norm": 0.11875979602336884, "learning_rate": 2e-07, "loss": -0.0209, "step": 547 }, { "clip_ratio/high_max": 0.001721780694424524, "clip_ratio/high_mean": 0.0006966763148739119, "clip_ratio/low_mean": 0.0005951242510491284, "clip_ratio/low_min": 7.561992242699489e-05, "clip_ratio/region_mean": 0.0012918005922983866, "epoch": 0.05114653880031966, "grad_norm": 0.12873409688472748, "learning_rate": 2e-07, "loss": 0.014, "step": 548 }, { "clip_ratio/high_max": 0.001902618914755294, "clip_ratio/high_mean": 0.0008150464182108408, "clip_ratio/low_mean": 0.000650896815386659, "clip_ratio/low_min": 3.164745066897012e-05, "clip_ratio/region_mean": 0.0014659432272310369, "epoch": 0.05123987190032025, "grad_norm": 0.1314297765493393, "learning_rate": 2e-07, "loss": 0.0092, "step": 549 }, { "clip_ratio/high_max": 0.00191223432921106, "clip_ratio/high_mean": 0.0007700695114181144, "clip_ratio/low_mean": 0.0006386641962308204, "clip_ratio/low_min": 5.1340840400371235e-05, "clip_ratio/region_mean": 0.0014087336967349984, "epoch": 0.05133320500032083, "grad_norm": 0.13911126554012299, "learning_rate": 2e-07, "loss": 0.013, "step": 550 }, { "clip_ratio/high_max": 0.0014529107575071976, "clip_ratio/high_mean": 0.0006132005491963355, "clip_ratio/low_mean": 0.0005745126145484392, "clip_ratio/low_min": 7.918378514659707e-05, "clip_ratio/region_mean": 0.001187713183753658, "epoch": 0.05142653810032142, "grad_norm": 0.20155708491802216, "learning_rate": 2e-07, "loss": 0.0718, "step": 551 }, { "clip_ratio/high_max": 0.0018723777902778238, "clip_ratio/high_mean": 0.0007526285298808943, "clip_ratio/low_mean": 0.0005221480087129748, "clip_ratio/low_min": 5.5972776863200124e-05, "clip_ratio/region_mean": 0.0012747765467793215, "epoch": 0.051519871200322, "grad_norm": 0.11362409591674805, "learning_rate": 2e-07, "loss": 0.0351, "step": 552 }, { "clip_ratio/high_max": 0.0018042937081190757, "clip_ratio/high_mean": 0.0006608404291910119, "clip_ratio/low_mean": 0.0005771404048573459, "clip_ratio/low_min": 3.3334152249153703e-05, "clip_ratio/region_mean": 0.0012379808067635167, "epoch": 0.05161320430032258, "grad_norm": 0.10544995218515396, "learning_rate": 2e-07, "loss": 0.0311, "step": 553 }, { "clip_ratio/high_max": 0.0016912070532271173, "clip_ratio/high_mean": 0.0006509279810416047, "clip_ratio/low_mean": 0.000695123821060406, "clip_ratio/low_min": 2.047275847871788e-05, "clip_ratio/region_mean": 0.0013460518493957352, "epoch": 0.05170653740032317, "grad_norm": 0.11964976042509079, "learning_rate": 2e-07, "loss": 0.0577, "step": 554 }, { "clip_ratio/high_max": 0.0017991704298765399, "clip_ratio/high_mean": 0.0007979949150467291, "clip_ratio/low_mean": 0.0005872214151168009, "clip_ratio/low_min": 2.8045772523910273e-05, "clip_ratio/region_mean": 0.0013852163283445407, "epoch": 0.05179987050032375, "grad_norm": 0.11670426279306412, "learning_rate": 2e-07, "loss": 0.0152, "step": 555 }, { "clip_ratio/high_max": 0.001659480061789509, "clip_ratio/high_mean": 0.0007044650037641986, "clip_ratio/low_mean": 0.0005288314732752042, "clip_ratio/low_min": 7.4201589086442254e-06, "clip_ratio/region_mean": 0.0012332964761299081, "epoch": 0.05189320360032433, "grad_norm": 0.12758934497833252, "learning_rate": 2e-07, "loss": 0.0153, "step": 556 }, { "clip_ratio/high_max": 0.0020512506234808825, "clip_ratio/high_mean": 0.000795040607044939, "clip_ratio/low_mean": 0.0006537424542329973, "clip_ratio/low_min": 3.822591133939568e-05, "clip_ratio/region_mean": 0.0014487830449070316, "epoch": 0.051986536700324916, "grad_norm": 0.1316671073436737, "learning_rate": 2e-07, "loss": 0.0267, "step": 557 }, { "clip_ratio/high_max": 0.0016326837831002194, "clip_ratio/high_mean": 0.0006808873986301478, "clip_ratio/low_mean": 0.0005960292837698944, "clip_ratio/low_min": 4.069219994562445e-05, "clip_ratio/region_mean": 0.0012769166642101482, "epoch": 0.0520798698003255, "grad_norm": 0.11492060124874115, "learning_rate": 2e-07, "loss": 0.079, "step": 558 }, { "clip_ratio/high_max": 0.0017714321620587725, "clip_ratio/high_mean": 0.0007082804895617301, "clip_ratio/low_mean": 0.0005819697053084383, "clip_ratio/low_min": 6.389534155459842e-05, "clip_ratio/region_mean": 0.0012902501948701683, "epoch": 0.05217320290032608, "grad_norm": 0.11201989650726318, "learning_rate": 2e-07, "loss": 0.0321, "step": 559 }, { "clip_ratio/high_max": 0.0017779856352717616, "clip_ratio/high_mean": 0.0007113968331395881, "clip_ratio/low_mean": 0.0005475915659189923, "clip_ratio/low_min": 6.314742131507955e-05, "clip_ratio/region_mean": 0.0012589883735927287, "epoch": 0.052266536000326665, "grad_norm": 0.1309257596731186, "learning_rate": 2e-07, "loss": 0.0169, "step": 560 }, { "clip_ratio/high_max": 0.001847168525273446, "clip_ratio/high_mean": 0.0007185839604062494, "clip_ratio/low_mean": 0.000633893912890926, "clip_ratio/low_min": 1.582679215061944e-05, "clip_ratio/region_mean": 0.0013524778696591966, "epoch": 0.05235986910032725, "grad_norm": 0.13159914314746857, "learning_rate": 2e-07, "loss": 0.0626, "step": 561 }, { "clip_ratio/high_max": 0.0018328269470657688, "clip_ratio/high_mean": 0.0007086717923812103, "clip_ratio/low_mean": 0.0005212823052715976, "clip_ratio/low_min": 4.080983126186766e-05, "clip_ratio/region_mean": 0.0012299541049287654, "epoch": 0.052453202200327835, "grad_norm": 0.1301041692495346, "learning_rate": 2e-07, "loss": 0.0029, "step": 562 }, { "clip_ratio/high_max": 0.001789040383300744, "clip_ratio/high_mean": 0.0007790057497913949, "clip_ratio/low_mean": 0.0006246452521736501, "clip_ratio/low_min": 2.373538245592499e-05, "clip_ratio/region_mean": 0.0014036510074220132, "epoch": 0.05254653530032841, "grad_norm": 0.12613432109355927, "learning_rate": 2e-07, "loss": 0.0164, "step": 563 }, { "clip_ratio/high_max": 0.0018007719663728494, "clip_ratio/high_mean": 0.0007018050800979836, "clip_ratio/low_mean": 0.0006099563579482492, "clip_ratio/low_min": 2.6959542992699426e-05, "clip_ratio/region_mean": 0.0013117614398652222, "epoch": 0.052639868400329, "grad_norm": 0.11805761605501175, "learning_rate": 2e-07, "loss": 0.0537, "step": 564 }, { "clip_ratio/high_max": 0.0016851529762789141, "clip_ratio/high_mean": 0.0006174919744807994, "clip_ratio/low_mean": 0.0004897001808785717, "clip_ratio/low_min": 5.278716344037093e-06, "clip_ratio/region_mean": 0.0011071921144321095, "epoch": 0.052733201500329584, "grad_norm": 0.11520353704690933, "learning_rate": 2e-07, "loss": 0.0168, "step": 565 }, { "clip_ratio/high_max": 0.001998689662286779, "clip_ratio/high_mean": 0.0008338744646607665, "clip_ratio/low_mean": 0.0005769850076831062, "clip_ratio/low_min": 3.468655540928012e-05, "clip_ratio/region_mean": 0.0014108594914432615, "epoch": 0.05282653460033017, "grad_norm": 0.11920414865016937, "learning_rate": 2e-07, "loss": -0.0154, "step": 566 }, { "clip_ratio/high_max": 0.002038974289462203, "clip_ratio/high_mean": 0.0007023693287919741, "clip_ratio/low_mean": 0.0006350495123115252, "clip_ratio/low_min": 6.761611621186603e-05, "clip_ratio/region_mean": 0.0013374188638408668, "epoch": 0.05291986770033075, "grad_norm": 0.12366802245378494, "learning_rate": 2e-07, "loss": -0.0109, "step": 567 }, { "clip_ratio/high_max": 0.0017269053059862927, "clip_ratio/high_mean": 0.0007129967470973497, "clip_ratio/low_mean": 0.000608542128247791, "clip_ratio/low_min": 1.4095624464971479e-05, "clip_ratio/region_mean": 0.001321538889897056, "epoch": 0.05301320080033133, "grad_norm": 0.11472231894731522, "learning_rate": 2e-07, "loss": 0.0095, "step": 568 }, { "clip_ratio/high_max": 0.0016702533030183986, "clip_ratio/high_mean": 0.0006925598663656274, "clip_ratio/low_mean": 0.0006427852840715786, "clip_ratio/low_min": 5.117778346175328e-05, "clip_ratio/region_mean": 0.0013353451649891213, "epoch": 0.05310653390033192, "grad_norm": 0.14368966221809387, "learning_rate": 2e-07, "loss": 0.0202, "step": 569 }, { "clip_ratio/high_max": 0.0018115360544470605, "clip_ratio/high_mean": 0.0006607777377212187, "clip_ratio/low_mean": 0.0005787943264294881, "clip_ratio/low_min": 2.473641598044196e-05, "clip_ratio/region_mean": 0.0012395720623317175, "epoch": 0.053199867000332496, "grad_norm": 0.1197221577167511, "learning_rate": 2e-07, "loss": 0.0699, "step": 570 }, { "clip_ratio/high_max": 0.0020126455965510104, "clip_ratio/high_mean": 0.000775036467530299, "clip_ratio/low_mean": 0.0005871908851986518, "clip_ratio/low_min": 9.262003914045636e-06, "clip_ratio/region_mean": 0.0013622273909277283, "epoch": 0.05329320010033308, "grad_norm": 0.13638238608837128, "learning_rate": 2e-07, "loss": 0.0142, "step": 571 }, { "clip_ratio/high_max": 0.001846505590947345, "clip_ratio/high_mean": 0.0007671996118006064, "clip_ratio/low_mean": 0.0004907596371594991, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001257959254871821, "epoch": 0.053386533200333666, "grad_norm": 0.13579873740673065, "learning_rate": 2e-07, "loss": -0.0004, "step": 572 }, { "clip_ratio/high_max": 0.001885782661702251, "clip_ratio/high_mean": 0.0007782633765600622, "clip_ratio/low_mean": 0.0005505429435288534, "clip_ratio/low_min": 3.887061848217854e-05, "clip_ratio/region_mean": 0.0013288063455547672, "epoch": 0.05347986630033425, "grad_norm": 0.22227318584918976, "learning_rate": 2e-07, "loss": 0.0468, "step": 573 }, { "clip_ratio/high_max": 0.0017666459643805865, "clip_ratio/high_mean": 0.0007799595659889746, "clip_ratio/low_mean": 0.0004781320076290285, "clip_ratio/low_min": 1.042709391185781e-05, "clip_ratio/region_mean": 0.0012580915936268866, "epoch": 0.05357319940033483, "grad_norm": 0.11867934465408325, "learning_rate": 2e-07, "loss": -0.0119, "step": 574 }, { "clip_ratio/high_max": 0.0019866141883539967, "clip_ratio/high_mean": 0.0008548714358767029, "clip_ratio/low_mean": 0.000600242141445051, "clip_ratio/low_min": 3.095135798503179e-05, "clip_ratio/region_mean": 0.001455113582778722, "epoch": 0.053666532500335415, "grad_norm": 0.1356223076581955, "learning_rate": 2e-07, "loss": -0.0001, "step": 575 }, { "clip_ratio/high_max": 0.00195651211834047, "clip_ratio/high_mean": 0.0007460907036147546, "clip_ratio/low_mean": 0.0006365838853525929, "clip_ratio/low_min": 4.306820119381882e-05, "clip_ratio/region_mean": 0.0013826745744154323, "epoch": 0.053759865600336, "grad_norm": 0.11269514262676239, "learning_rate": 2e-07, "loss": 0.0246, "step": 576 }, { "clip_ratio/high_max": 0.0016697847859177273, "clip_ratio/high_mean": 0.0007288893521035789, "clip_ratio/low_mean": 0.0005168548964320507, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012457442680897657, "epoch": 0.053853198700336585, "grad_norm": 0.1243629902601242, "learning_rate": 2e-07, "loss": 0.0475, "step": 577 }, { "clip_ratio/high_max": 0.0017798299031710485, "clip_ratio/high_mean": 0.000710913129751134, "clip_ratio/low_mean": 0.000506110159221862, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012170232585049234, "epoch": 0.053946531800337164, "grad_norm": 0.12458079308271408, "learning_rate": 2e-07, "loss": 0.0161, "step": 578 }, { "clip_ratio/high_max": 0.00162797112352564, "clip_ratio/high_mean": 0.0006646018855462899, "clip_ratio/low_mean": 0.0005998745705255715, "clip_ratio/low_min": 5.250122012512293e-05, "clip_ratio/region_mean": 0.0012644764319702517, "epoch": 0.05403986490033775, "grad_norm": 0.11783482134342194, "learning_rate": 2e-07, "loss": 0.0373, "step": 579 }, { "clip_ratio/high_max": 0.001879115923657082, "clip_ratio/high_mean": 0.0007565552077721804, "clip_ratio/low_mean": 0.0005385502927310881, "clip_ratio/low_min": 5.771285850642016e-05, "clip_ratio/region_mean": 0.0012951054886798374, "epoch": 0.054133198000338334, "grad_norm": 0.13198161125183105, "learning_rate": 2e-07, "loss": 0.0335, "step": 580 }, { "clip_ratio/high_max": 0.0021892019794904627, "clip_ratio/high_mean": 0.0009171991750918096, "clip_ratio/low_mean": 0.0006238217756617814, "clip_ratio/low_min": 3.529979449012899e-05, "clip_ratio/region_mean": 0.0015410209671244957, "epoch": 0.05422653110033892, "grad_norm": 0.13487371802330017, "learning_rate": 2e-07, "loss": -0.0103, "step": 581 }, { "clip_ratio/high_max": 0.0017096548253903165, "clip_ratio/high_mean": 0.0007594519738631789, "clip_ratio/low_mean": 0.0006225819361134199, "clip_ratio/low_min": 3.124610702798236e-05, "clip_ratio/region_mean": 0.0013820339372614399, "epoch": 0.0543198642003395, "grad_norm": 0.13045914471149445, "learning_rate": 2e-07, "loss": 0.0313, "step": 582 }, { "clip_ratio/high_max": 0.0014533554276567884, "clip_ratio/high_mean": 0.000575455122088897, "clip_ratio/low_mean": 0.0006742003333783941, "clip_ratio/low_min": 1.7319468042842345e-05, "clip_ratio/region_mean": 0.0012496554481913336, "epoch": 0.05441319730034008, "grad_norm": 0.12129105627536774, "learning_rate": 2e-07, "loss": 0.0665, "step": 583 }, { "clip_ratio/high_max": 0.0017846442024165299, "clip_ratio/high_mean": 0.0006911709606356453, "clip_ratio/low_mean": 0.0005766718677477911, "clip_ratio/low_min": 1.2530072126537561e-05, "clip_ratio/region_mean": 0.0012678428065555636, "epoch": 0.05450653040034067, "grad_norm": 0.1220710501074791, "learning_rate": 2e-07, "loss": 0.0306, "step": 584 }, { "clip_ratio/high_max": 0.00198998495034175, "clip_ratio/high_mean": 0.0006400764304999029, "clip_ratio/low_mean": 0.0006634789551753784, "clip_ratio/low_min": 6.018608110025525e-05, "clip_ratio/region_mean": 0.0013035553965892177, "epoch": 0.054599863500341246, "grad_norm": 0.12754133343696594, "learning_rate": 2e-07, "loss": 0.0773, "step": 585 }, { "clip_ratio/high_max": 0.0016538534982828423, "clip_ratio/high_mean": 0.0006879127586216782, "clip_ratio/low_mean": 0.0005469334355439059, "clip_ratio/low_min": 1.4794650269323029e-05, "clip_ratio/region_mean": 0.0012348462259978987, "epoch": 0.05469319660034183, "grad_norm": 0.12297696620225906, "learning_rate": 2e-07, "loss": 0.0469, "step": 586 }, { "clip_ratio/high_max": 0.0020761688247148413, "clip_ratio/high_mean": 0.0008023149021028075, "clip_ratio/low_mean": 0.000612718193224282, "clip_ratio/low_min": 2.556498657213524e-05, "clip_ratio/region_mean": 0.0014150330935081001, "epoch": 0.054786529700342416, "grad_norm": 0.13738387823104858, "learning_rate": 2e-07, "loss": 0.0554, "step": 587 }, { "clip_ratio/high_max": 0.0017503084018244408, "clip_ratio/high_mean": 0.0007874890934544965, "clip_ratio/low_mean": 0.0005550610330828931, "clip_ratio/low_min": 7.897397154010832e-06, "clip_ratio/region_mean": 0.001342550134722842, "epoch": 0.054879862800343, "grad_norm": 0.13236290216445923, "learning_rate": 2e-07, "loss": 0.0027, "step": 588 }, { "clip_ratio/high_max": 0.0015410939558933023, "clip_ratio/high_mean": 0.0006149955115688499, "clip_ratio/low_mean": 0.0006659924847554066, "clip_ratio/low_min": 2.988053620356368e-05, "clip_ratio/region_mean": 0.0012809879881388042, "epoch": 0.05497319590034358, "grad_norm": 0.12689785659313202, "learning_rate": 2e-07, "loss": 0.0316, "step": 589 }, { "clip_ratio/high_max": 0.001959101267857477, "clip_ratio/high_mean": 0.0007448444430337986, "clip_ratio/low_mean": 0.0006252952262002509, "clip_ratio/low_min": 1.6075102394097485e-05, "clip_ratio/region_mean": 0.0013701396565011237, "epoch": 0.055066529000344165, "grad_norm": 0.13592302799224854, "learning_rate": 2e-07, "loss": 0.0153, "step": 590 }, { "clip_ratio/high_max": 0.0020989665572415106, "clip_ratio/high_mean": 0.0008433330694970209, "clip_ratio/low_mean": 0.0006176371016408666, "clip_ratio/low_min": 6.281680452957517e-05, "clip_ratio/region_mean": 0.001460970175685361, "epoch": 0.05515986210034475, "grad_norm": 0.1321668177843094, "learning_rate": 2e-07, "loss": 0.0081, "step": 591 }, { "clip_ratio/high_max": 0.0016573472021264024, "clip_ratio/high_mean": 0.0006624462021136424, "clip_ratio/low_mean": 0.0005255823480183608, "clip_ratio/low_min": 2.5123720661213156e-05, "clip_ratio/region_mean": 0.0011880285710503813, "epoch": 0.055253195200345336, "grad_norm": 0.13149040937423706, "learning_rate": 2e-07, "loss": 0.0419, "step": 592 }, { "clip_ratio/high_max": 0.0017793898659874685, "clip_ratio/high_mean": 0.0007033273050183197, "clip_ratio/low_mean": 0.0006222679530765163, "clip_ratio/low_min": 2.470067011017818e-05, "clip_ratio/region_mean": 0.0013255952471808996, "epoch": 0.055346528300345914, "grad_norm": 0.14036960899829865, "learning_rate": 2e-07, "loss": 0.02, "step": 593 }, { "clip_ratio/high_max": 0.0020325880068412516, "clip_ratio/high_mean": 0.0007809740709490143, "clip_ratio/low_mean": 0.0006450348955695517, "clip_ratio/low_min": 2.497957757441327e-05, "clip_ratio/region_mean": 0.001426008966518566, "epoch": 0.0554398614003465, "grad_norm": 0.2676650881767273, "learning_rate": 2e-07, "loss": 0.0211, "step": 594 }, { "clip_ratio/high_max": 0.0018841320234059822, "clip_ratio/high_mean": 0.0006703415783704259, "clip_ratio/low_mean": 0.0005237162185949273, "clip_ratio/low_min": 5.951671209913911e-05, "clip_ratio/region_mean": 0.0011940578151552472, "epoch": 0.055533194500347084, "grad_norm": 0.14318114519119263, "learning_rate": 2e-07, "loss": 0.0364, "step": 595 }, { "clip_ratio/high_max": 0.001886978108814219, "clip_ratio/high_mean": 0.0007561022484878777, "clip_ratio/low_mean": 0.000568112930523057, "clip_ratio/low_min": 5.2477713325060904e-05, "clip_ratio/region_mean": 0.0013242152017483022, "epoch": 0.05562652760034766, "grad_norm": 0.12458319962024689, "learning_rate": 2e-07, "loss": 0.0009, "step": 596 }, { "clip_ratio/high_max": 0.0017968566571653355, "clip_ratio/high_mean": 0.0007810509523551445, "clip_ratio/low_mean": 0.0006321854143607197, "clip_ratio/low_min": 2.5635767087806016e-05, "clip_ratio/region_mean": 0.0014132363648968749, "epoch": 0.05571986070034825, "grad_norm": 0.13053551316261292, "learning_rate": 2e-07, "loss": 0.0165, "step": 597 }, { "clip_ratio/high_max": 0.0016252153509412892, "clip_ratio/high_mean": 0.0006437623396777781, "clip_ratio/low_mean": 0.0005544756704694009, "clip_ratio/low_min": 1.5311121387640014e-05, "clip_ratio/region_mean": 0.0011982380019617267, "epoch": 0.05581319380034883, "grad_norm": 0.1327916532754898, "learning_rate": 2e-07, "loss": 0.0263, "step": 598 }, { "clip_ratio/high_max": 0.0018848180334316567, "clip_ratio/high_mean": 0.000723632696463028, "clip_ratio/low_mean": 0.0005847063603141578, "clip_ratio/low_min": 8.272667400888167e-06, "clip_ratio/region_mean": 0.001308339062234154, "epoch": 0.05590652690034942, "grad_norm": 0.1310209333896637, "learning_rate": 2e-07, "loss": 0.041, "step": 599 }, { "clip_ratio/high_max": 0.001695720995485317, "clip_ratio/high_mean": 0.0006634865485466435, "clip_ratio/low_mean": 0.000634342611192551, "clip_ratio/low_min": 7.913289664429612e-05, "clip_ratio/region_mean": 0.001297829148825258, "epoch": 0.055999860000349996, "grad_norm": 0.12018702179193497, "learning_rate": 2e-07, "loss": 0.0418, "step": 600 }, { "clip_ratio/high_max": 0.001900447517982684, "clip_ratio/high_mean": 0.0007841793922125362, "clip_ratio/low_mean": 0.0006151568468339974, "clip_ratio/low_min": 4.4292556594882626e-05, "clip_ratio/region_mean": 0.0013993362008477561, "epoch": 0.05609319310035058, "grad_norm": 0.12365878373384476, "learning_rate": 2e-07, "loss": 0.0075, "step": 601 }, { "clip_ratio/high_max": 0.0019290830059617292, "clip_ratio/high_mean": 0.0007831463481124956, "clip_ratio/low_mean": 0.0007195020843937527, "clip_ratio/low_min": 5.172137980480329e-05, "clip_ratio/region_mean": 0.0015026484688860364, "epoch": 0.05618652620035117, "grad_norm": 0.14077097177505493, "learning_rate": 2e-07, "loss": 0.0376, "step": 602 }, { "clip_ratio/high_max": 0.0017942325648618862, "clip_ratio/high_mean": 0.0007575634645036189, "clip_ratio/low_mean": 0.0005991123907733709, "clip_ratio/low_min": 0.0001274592223126092, "clip_ratio/region_mean": 0.001356675897113746, "epoch": 0.05627985930035175, "grad_norm": 0.12890395522117615, "learning_rate": 2e-07, "loss": 0.0328, "step": 603 }, { "clip_ratio/high_max": 0.0018646691314643249, "clip_ratio/high_mean": 0.0007725319755991222, "clip_ratio/low_mean": 0.0007225594108604128, "clip_ratio/low_min": 4.432194145920221e-05, "clip_ratio/region_mean": 0.001495091404649429, "epoch": 0.05637319240035233, "grad_norm": 0.12693729996681213, "learning_rate": 2e-07, "loss": 0.0305, "step": 604 }, { "clip_ratio/high_max": 0.002063531155727105, "clip_ratio/high_mean": 0.0007943812506709946, "clip_ratio/low_mean": 0.0006294220856943866, "clip_ratio/low_min": 2.5925908630597405e-05, "clip_ratio/region_mean": 0.00142380334000336, "epoch": 0.056466525500352915, "grad_norm": 0.1365271508693695, "learning_rate": 2e-07, "loss": -0.0021, "step": 605 }, { "clip_ratio/high_max": 0.00203251661878312, "clip_ratio/high_mean": 0.0007879346158006229, "clip_ratio/low_mean": 0.0004991922824046924, "clip_ratio/low_min": 4.6887322241673246e-05, "clip_ratio/region_mean": 0.0012871269209426828, "epoch": 0.0565598586003535, "grad_norm": 0.1278432458639145, "learning_rate": 2e-07, "loss": -0.0027, "step": 606 }, { "clip_ratio/high_max": 0.0015390693515655585, "clip_ratio/high_mean": 0.0005903260589548154, "clip_ratio/low_mean": 0.0005913725435675588, "clip_ratio/low_min": 4.17025075876154e-05, "clip_ratio/region_mean": 0.0011816985679615755, "epoch": 0.056653191700354086, "grad_norm": 0.11410504579544067, "learning_rate": 2e-07, "loss": 0.047, "step": 607 }, { "clip_ratio/high_max": 0.0017402319754182827, "clip_ratio/high_mean": 0.0006966765085962834, "clip_ratio/low_mean": 0.0005274328523228178, "clip_ratio/low_min": 2.7827270059788134e-05, "clip_ratio/region_mean": 0.00122410936455708, "epoch": 0.056746524800354664, "grad_norm": 0.11616390943527222, "learning_rate": 2e-07, "loss": 0.0202, "step": 608 }, { "clip_ratio/high_max": 0.0017816999170463532, "clip_ratio/high_mean": 0.0006182619736136985, "clip_ratio/low_mean": 0.000609972744314291, "clip_ratio/low_min": 4.807841924048262e-05, "clip_ratio/region_mean": 0.0012282347270229366, "epoch": 0.05683985790035525, "grad_norm": 0.11660334467887878, "learning_rate": 2e-07, "loss": 0.0494, "step": 609 }, { "clip_ratio/high_max": 0.0017853262434073258, "clip_ratio/high_mean": 0.0007639398354513105, "clip_ratio/low_mean": 0.0007102946292434353, "clip_ratio/low_min": 6.503276290459326e-05, "clip_ratio/region_mean": 0.001474234501074534, "epoch": 0.056933191000355834, "grad_norm": 0.1354941427707672, "learning_rate": 2e-07, "loss": 0.049, "step": 610 }, { "clip_ratio/high_max": 0.0018278978750458919, "clip_ratio/high_mean": 0.0007430641799146542, "clip_ratio/low_mean": 0.0006183151544973953, "clip_ratio/low_min": 3.4503143979236484e-05, "clip_ratio/region_mean": 0.0013613793125841767, "epoch": 0.05702652410035641, "grad_norm": 0.12802274525165558, "learning_rate": 2e-07, "loss": 0.0351, "step": 611 }, { "clip_ratio/high_max": 0.0016272912689601071, "clip_ratio/high_mean": 0.0006205138251971221, "clip_ratio/low_mean": 0.0005882887417101301, "clip_ratio/low_min": 1.2939958651259076e-05, "clip_ratio/region_mean": 0.0012088025723642204, "epoch": 0.057119857200357, "grad_norm": 0.11734262853860855, "learning_rate": 2e-07, "loss": 0.0348, "step": 612 }, { "clip_ratio/high_max": 0.0021973373877699487, "clip_ratio/high_mean": 0.0007850227539165644, "clip_ratio/low_mean": 0.0005910487889195792, "clip_ratio/low_min": 2.1779372218588833e-05, "clip_ratio/region_mean": 0.0013760715373791754, "epoch": 0.05721319030035758, "grad_norm": 0.13653187453746796, "learning_rate": 2e-07, "loss": 0.0116, "step": 613 }, { "clip_ratio/high_max": 0.0020950166654074565, "clip_ratio/high_mean": 0.0008593393977207597, "clip_ratio/low_mean": 0.00056025269714155, "clip_ratio/low_min": 2.3286560463020578e-05, "clip_ratio/region_mean": 0.001419592103047762, "epoch": 0.05730652340035817, "grad_norm": 0.12896187603473663, "learning_rate": 2e-07, "loss": -0.0116, "step": 614 }, { "clip_ratio/high_max": 0.0016556828504690202, "clip_ratio/high_mean": 0.0006919518855283968, "clip_ratio/low_mean": 0.0006706585518259089, "clip_ratio/low_min": 1.2437811165000312e-05, "clip_ratio/region_mean": 0.0013626104082504753, "epoch": 0.05739985650035875, "grad_norm": 0.1294972002506256, "learning_rate": 2e-07, "loss": -0.0025, "step": 615 }, { "clip_ratio/high_max": 0.0018003187396971043, "clip_ratio/high_mean": 0.0007296469157154206, "clip_ratio/low_mean": 0.0006821004117227858, "clip_ratio/low_min": 4.4877334403281566e-05, "clip_ratio/region_mean": 0.001411747307429323, "epoch": 0.05749318960035933, "grad_norm": 0.13182489573955536, "learning_rate": 2e-07, "loss": 0.0563, "step": 616 }, { "clip_ratio/high_max": 0.002149923333490733, "clip_ratio/high_mean": 0.0007671562889299821, "clip_ratio/low_mean": 0.000634258379250241, "clip_ratio/low_min": 1.1388484381313901e-05, "clip_ratio/region_mean": 0.0014014146436238661, "epoch": 0.05758652270035992, "grad_norm": 0.12806399166584015, "learning_rate": 2e-07, "loss": 0.0765, "step": 617 }, { "clip_ratio/high_max": 0.0020226039923727512, "clip_ratio/high_mean": 0.0008018541338969953, "clip_ratio/low_mean": 0.0005924443748881458, "clip_ratio/low_min": 5.686851091013523e-05, "clip_ratio/region_mean": 0.0013942985133326147, "epoch": 0.0576798558003605, "grad_norm": 0.1382153481245041, "learning_rate": 2e-07, "loss": 0.0074, "step": 618 }, { "clip_ratio/high_max": 0.0019411146749916952, "clip_ratio/high_mean": 0.0007067885544529418, "clip_ratio/low_mean": 0.0006499896626337431, "clip_ratio/low_min": 7.562792507087579e-05, "clip_ratio/region_mean": 0.001356778204353759, "epoch": 0.05777318890036108, "grad_norm": 0.14665408432483673, "learning_rate": 2e-07, "loss": 0.0307, "step": 619 }, { "clip_ratio/high_max": 0.0017691556677164044, "clip_ratio/high_mean": 0.0007072527023410657, "clip_ratio/low_mean": 0.000624091499048518, "clip_ratio/low_min": 6.261970247578574e-05, "clip_ratio/region_mean": 0.0013313442104845308, "epoch": 0.057866522000361666, "grad_norm": 0.13146257400512695, "learning_rate": 2e-07, "loss": 0.0507, "step": 620 }, { "clip_ratio/high_max": 0.0016596964760537958, "clip_ratio/high_mean": 0.0006650236400673748, "clip_ratio/low_mean": 0.0006698264733131509, "clip_ratio/low_min": 5.13114532623149e-05, "clip_ratio/region_mean": 0.0013348501197469886, "epoch": 0.05795985510036225, "grad_norm": 0.13904964923858643, "learning_rate": 2e-07, "loss": 0.0179, "step": 621 }, { "clip_ratio/high_max": 0.002203146941610612, "clip_ratio/high_mean": 0.0008818303649604786, "clip_ratio/low_mean": 0.0005728210853703786, "clip_ratio/low_min": 1.4572161489923019e-05, "clip_ratio/region_mean": 0.0014546514248650055, "epoch": 0.05805318820036283, "grad_norm": 0.15285640954971313, "learning_rate": 2e-07, "loss": 0.0136, "step": 622 }, { "clip_ratio/high_max": 0.0018832642526831478, "clip_ratio/high_mean": 0.0007876799973018933, "clip_ratio/low_mean": 0.0006289450611802749, "clip_ratio/low_min": 5.1864752094843425e-05, "clip_ratio/region_mean": 0.0014166250875859987, "epoch": 0.058146521300363414, "grad_norm": 0.12940692901611328, "learning_rate": 2e-07, "loss": -0.0043, "step": 623 }, { "clip_ratio/high_max": 0.002043551048700465, "clip_ratio/high_mean": 0.0008772184410190675, "clip_ratio/low_mean": 0.0006694986923321267, "clip_ratio/low_min": 5.122583570482675e-05, "clip_ratio/region_mean": 0.0015467170960619114, "epoch": 0.058239854400364, "grad_norm": 0.1330651193857193, "learning_rate": 2e-07, "loss": -0.0152, "step": 624 }, { "clip_ratio/high_max": 0.0019444019526417833, "clip_ratio/high_mean": 0.000901491723197978, "clip_ratio/low_mean": 0.0006607248060390702, "clip_ratio/low_min": 8.575434003432747e-05, "clip_ratio/region_mean": 0.0015622165446984582, "epoch": 0.058333187500364585, "grad_norm": 0.13738784193992615, "learning_rate": 2e-07, "loss": 0.029, "step": 625 }, { "clip_ratio/high_max": 0.0019095624738838524, "clip_ratio/high_mean": 0.0007395011580229038, "clip_ratio/low_mean": 0.0005824506743010716, "clip_ratio/low_min": 3.587809260352515e-05, "clip_ratio/region_mean": 0.0013219518259575125, "epoch": 0.05842652060036516, "grad_norm": 0.14015622437000275, "learning_rate": 2e-07, "loss": 0.0295, "step": 626 }, { "clip_ratio/high_max": 0.0015879261518421117, "clip_ratio/high_mean": 0.0007069680050335592, "clip_ratio/low_mean": 0.0005264173141767969, "clip_ratio/low_min": 3.0555547709809616e-05, "clip_ratio/region_mean": 0.0012333853264863137, "epoch": 0.05851985370036575, "grad_norm": 0.11589527875185013, "learning_rate": 2e-07, "loss": 0.005, "step": 627 }, { "clip_ratio/high_max": 0.0018926439806818962, "clip_ratio/high_mean": 0.0007253129351738608, "clip_ratio/low_mean": 0.0005649846516462276, "clip_ratio/low_min": 1.7246136849280447e-05, "clip_ratio/region_mean": 0.0012902975795441307, "epoch": 0.05861318680036633, "grad_norm": 0.12498581409454346, "learning_rate": 2e-07, "loss": 0.0129, "step": 628 }, { "clip_ratio/high_max": 0.0018813434326148126, "clip_ratio/high_mean": 0.0007009445853327634, "clip_ratio/low_mean": 0.0006813356885686517, "clip_ratio/low_min": 2.2374528271029703e-05, "clip_ratio/region_mean": 0.0013822802793583833, "epoch": 0.05870651990036692, "grad_norm": 0.13449352979660034, "learning_rate": 2e-07, "loss": 0.0606, "step": 629 }, { "clip_ratio/high_max": 0.0019149374747939873, "clip_ratio/high_mean": 0.0007418008281092625, "clip_ratio/low_mean": 0.0007568519413325703, "clip_ratio/low_min": 6.313886842690408e-05, "clip_ratio/region_mean": 0.0014986527676228434, "epoch": 0.0587998530003675, "grad_norm": 0.12492045760154724, "learning_rate": 2e-07, "loss": 0.024, "step": 630 }, { "clip_ratio/high_max": 0.0020977575113647617, "clip_ratio/high_mean": 0.0007916204049251974, "clip_ratio/low_mean": 0.0007588077005493687, "clip_ratio/low_min": 0.00013757063788943924, "clip_ratio/region_mean": 0.001550428096379619, "epoch": 0.05889318610036808, "grad_norm": 0.135748952627182, "learning_rate": 2e-07, "loss": 0.0745, "step": 631 }, { "clip_ratio/high_max": 0.0018270688451593742, "clip_ratio/high_mean": 0.0007102314011717681, "clip_ratio/low_mean": 0.0006292733178270282, "clip_ratio/low_min": 4.093898769497173e-05, "clip_ratio/region_mean": 0.0013395047244557645, "epoch": 0.05898651920036867, "grad_norm": 0.13410218060016632, "learning_rate": 2e-07, "loss": 0.0397, "step": 632 }, { "clip_ratio/high_max": 0.002197666333813686, "clip_ratio/high_mean": 0.000817728256151895, "clip_ratio/low_mean": 0.0005857736214238685, "clip_ratio/low_min": 7.592322617711034e-06, "clip_ratio/region_mean": 0.0014035018903086893, "epoch": 0.05907985230036925, "grad_norm": 0.13063833117485046, "learning_rate": 2e-07, "loss": -0.0181, "step": 633 }, { "clip_ratio/high_max": 0.002088592984364368, "clip_ratio/high_mean": 0.0008108243673632387, "clip_ratio/low_mean": 0.0006597821193281561, "clip_ratio/low_min": 1.3757428860117216e-05, "clip_ratio/region_mean": 0.0014706064612255432, "epoch": 0.05917318540036983, "grad_norm": 0.12496549636125565, "learning_rate": 2e-07, "loss": 0.0244, "step": 634 }, { "clip_ratio/high_max": 0.002147826955479104, "clip_ratio/high_mean": 0.0008981159353425028, "clip_ratio/low_mean": 0.0007368638689513318, "clip_ratio/low_min": 0.00012901648096885765, "clip_ratio/region_mean": 0.0016349798170267604, "epoch": 0.059266518500370416, "grad_norm": 0.15332888066768646, "learning_rate": 2e-07, "loss": 0.0002, "step": 635 }, { "clip_ratio/high_max": 0.0017043730949808378, "clip_ratio/high_mean": 0.0006608036728721345, "clip_ratio/low_mean": 0.0005998108481435338, "clip_ratio/low_min": 2.775927168841008e-05, "clip_ratio/region_mean": 0.0012606145319296047, "epoch": 0.059359851600371, "grad_norm": 0.1403980404138565, "learning_rate": 2e-07, "loss": 0.0598, "step": 636 }, { "clip_ratio/high_max": 0.001959842113137711, "clip_ratio/high_mean": 0.000792916431237245, "clip_ratio/low_mean": 0.0005838958913955139, "clip_ratio/low_min": 6.341314929159125e-06, "clip_ratio/region_mean": 0.0013768123390036635, "epoch": 0.05945318470037158, "grad_norm": 0.13317140936851501, "learning_rate": 2e-07, "loss": 0.0459, "step": 637 }, { "clip_ratio/high_max": 0.0017596183279238176, "clip_ratio/high_mean": 0.0006951760715310229, "clip_ratio/low_mean": 0.0007069658277032431, "clip_ratio/low_min": 8.743433954805369e-05, "clip_ratio/region_mean": 0.0014021418974152766, "epoch": 0.059546517800372165, "grad_norm": 0.12893326580524445, "learning_rate": 2e-07, "loss": 0.0465, "step": 638 }, { "clip_ratio/high_max": 0.0020533813476504292, "clip_ratio/high_mean": 0.0007459484040737152, "clip_ratio/low_mean": 0.000673320573696401, "clip_ratio/low_min": 3.2872610972844996e-05, "clip_ratio/region_mean": 0.0014192689704941586, "epoch": 0.05963985090037275, "grad_norm": 0.14024290442466736, "learning_rate": 2e-07, "loss": 0.0161, "step": 639 }, { "clip_ratio/high_max": 0.0020172522781649604, "clip_ratio/high_mean": 0.000765625454732799, "clip_ratio/low_mean": 0.0005502803778654197, "clip_ratio/low_min": 3.56140335497912e-05, "clip_ratio/region_mean": 0.0013159058325982187, "epoch": 0.059733184000373335, "grad_norm": 0.12708166241645813, "learning_rate": 2e-07, "loss": 0.0182, "step": 640 }, { "clip_ratio/high_max": 0.02092636685119942, "clip_ratio/high_mean": 0.00911651787464507, "clip_ratio/low_mean": 0.0026331456101615913, "clip_ratio/low_min": 0.00020501024118857458, "clip_ratio/region_mean": 0.011749663448426872, "completions/clipped_ratio": 0.016339983258928603, "completions/max_length": 4096.0, "completions/max_terminated_length": 4094.0, "completions/mean_length": 662.2559814453125, "completions/mean_terminated_length": 605.2166748046875, "completions/min_length": 81.0, "completions/min_terminated_length": 81.0, "epoch": 0.05982651710037391, "grad_norm": 1365884.75, "learning_rate": 2e-07, "loss": 754.9741, "num_tokens": 503988990.0, "reward": 0.5851876735687256, "reward_std": 0.1897149533033371, "rewards/simpleverify_reward/mean": 0.5851876139640808, "rewards/simpleverify_reward/std": 0.4926918148994446, "step": 641 }, { "clip_ratio/high_max": 0.0017065773863578215, "clip_ratio/high_mean": 0.0006681604663754115, "clip_ratio/low_mean": 0.0006310067692538723, "clip_ratio/low_min": 6.089785983931506e-05, "clip_ratio/region_mean": 0.0012991672447242308, "epoch": 0.0599198502003745, "grad_norm": 0.12068770825862885, "learning_rate": 2e-07, "loss": 0.0108, "step": 642 }, { "clip_ratio/high_max": 0.001730965515889693, "clip_ratio/high_mean": 0.0006250574406294618, "clip_ratio/low_mean": 0.0005252438168099616, "clip_ratio/low_min": 2.9053457183181308e-05, "clip_ratio/region_mean": 0.0011503012428875081, "epoch": 0.060013183300375084, "grad_norm": 0.125808447599411, "learning_rate": 2e-07, "loss": 0.0274, "step": 643 }, { "clip_ratio/high_max": 0.0016850394531502388, "clip_ratio/high_mean": 0.000668474083795445, "clip_ratio/low_mean": 0.0005000874962206581, "clip_ratio/low_min": 2.4826216758810915e-05, "clip_ratio/region_mean": 0.0011685615972965024, "epoch": 0.06010651640037567, "grad_norm": 0.12118154019117355, "learning_rate": 2e-07, "loss": 0.0184, "step": 644 }, { "clip_ratio/high_max": 0.0016316668734361883, "clip_ratio/high_mean": 0.0006467286038969178, "clip_ratio/low_mean": 0.0006818644997110823, "clip_ratio/low_min": 3.6188573176332284e-05, "clip_ratio/region_mean": 0.0013285931381687988, "epoch": 0.06019984950037625, "grad_norm": 0.12603475153446198, "learning_rate": 2e-07, "loss": 0.0395, "step": 645 }, { "clip_ratio/high_max": 0.0018834678157872986, "clip_ratio/high_mean": 0.0006996198872002424, "clip_ratio/low_mean": 0.0005479102946992498, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012475301919039339, "epoch": 0.06029318260037683, "grad_norm": 0.13647961616516113, "learning_rate": 2e-07, "loss": -0.003, "step": 646 }, { "clip_ratio/high_max": 0.0015827391944185365, "clip_ratio/high_mean": 0.0006524945520141046, "clip_ratio/low_mean": 0.0005707615746359807, "clip_ratio/low_min": 4.483800012167194e-05, "clip_ratio/region_mean": 0.001223256123921601, "epoch": 0.06038651570037742, "grad_norm": 0.10979334264993668, "learning_rate": 2e-07, "loss": 0.0373, "step": 647 }, { "clip_ratio/high_max": 0.0016456763150927145, "clip_ratio/high_mean": 0.000672917876727297, "clip_ratio/low_mean": 0.0005004310442018323, "clip_ratio/low_min": 1.699293170531746e-05, "clip_ratio/region_mean": 0.0011733489191101398, "epoch": 0.060479848800377996, "grad_norm": 0.12378764152526855, "learning_rate": 2e-07, "loss": 0.0409, "step": 648 }, { "clip_ratio/high_max": 0.0017563787478138693, "clip_ratio/high_mean": 0.0006613577461394016, "clip_ratio/low_mean": 0.0005320533109625103, "clip_ratio/low_min": 3.86052270187065e-05, "clip_ratio/region_mean": 0.0011934110407310072, "epoch": 0.06057318190037858, "grad_norm": 0.12648127973079681, "learning_rate": 2e-07, "loss": 0.0262, "step": 649 }, { "clip_ratio/high_max": 0.0014960420594434254, "clip_ratio/high_mean": 0.0005957888351986185, "clip_ratio/low_mean": 0.0005056591426182422, "clip_ratio/low_min": 1.1140819879074115e-05, "clip_ratio/region_mean": 0.0011014479714503977, "epoch": 0.060666515000379166, "grad_norm": 0.1355755478143692, "learning_rate": 2e-07, "loss": 0.0558, "step": 650 }, { "clip_ratio/high_max": 0.001664666098804446, "clip_ratio/high_mean": 0.0006345188267005142, "clip_ratio/low_mean": 0.0005502725643964368, "clip_ratio/low_min": 5.613666053250199e-05, "clip_ratio/region_mean": 0.0011847913883684669, "epoch": 0.06075984810037975, "grad_norm": 0.12586739659309387, "learning_rate": 2e-07, "loss": 0.0562, "step": 651 }, { "clip_ratio/high_max": 0.0016897981622605585, "clip_ratio/high_mean": 0.0007013797530817101, "clip_ratio/low_mean": 0.0005595267984972452, "clip_ratio/low_min": 2.9481141609721817e-05, "clip_ratio/region_mean": 0.0012609065634023864, "epoch": 0.06085318120038033, "grad_norm": 0.1216101348400116, "learning_rate": 2e-07, "loss": -0.0016, "step": 652 }, { "clip_ratio/high_max": 0.001654162851991714, "clip_ratio/high_mean": 0.0006733025825269578, "clip_ratio/low_mean": 0.0005089970859444293, "clip_ratio/low_min": 4.363052357803099e-05, "clip_ratio/region_mean": 0.0011822996602859348, "epoch": 0.060946514300380915, "grad_norm": 0.12671025097370148, "learning_rate": 2e-07, "loss": 0.0067, "step": 653 }, { "clip_ratio/high_max": 0.0017454118642490357, "clip_ratio/high_mean": 0.0006835994390712585, "clip_ratio/low_mean": 0.0005551656822717632, "clip_ratio/low_min": 7.006811210885644e-05, "clip_ratio/region_mean": 0.001238765129528474, "epoch": 0.0610398474003815, "grad_norm": 0.12396043539047241, "learning_rate": 2e-07, "loss": 0.0216, "step": 654 }, { "clip_ratio/high_max": 0.0016575774898228701, "clip_ratio/high_mean": 0.0006655197103100363, "clip_ratio/low_mean": 0.0005959520467513357, "clip_ratio/low_min": 3.256022864661645e-05, "clip_ratio/region_mean": 0.0012614717707037926, "epoch": 0.061133180500382085, "grad_norm": 0.12199550122022629, "learning_rate": 2e-07, "loss": 0.0425, "step": 655 }, { "clip_ratio/high_max": 0.0017090075853047892, "clip_ratio/high_mean": 0.0007437635576934554, "clip_ratio/low_mean": 0.0005538780487768236, "clip_ratio/low_min": 2.8759850465576164e-05, "clip_ratio/region_mean": 0.001297641640121583, "epoch": 0.061226513600382663, "grad_norm": 0.11475273966789246, "learning_rate": 2e-07, "loss": 0.0198, "step": 656 }, { "clip_ratio/high_max": 0.0017478534391557332, "clip_ratio/high_mean": 0.0007449047625414096, "clip_ratio/low_mean": 0.0005980523692414863, "clip_ratio/low_min": 2.178457680201973e-05, "clip_ratio/region_mean": 0.0013429571117740124, "epoch": 0.06131984670038325, "grad_norm": 0.13247263431549072, "learning_rate": 2e-07, "loss": 0.0265, "step": 657 }, { "clip_ratio/high_max": 0.0017484819836681709, "clip_ratio/high_mean": 0.0007402138317047502, "clip_ratio/low_mean": 0.0006166663679323392, "clip_ratio/low_min": 5.5167659411381464e-05, "clip_ratio/region_mean": 0.0013568802060035523, "epoch": 0.061413179800383834, "grad_norm": 0.13908669352531433, "learning_rate": 2e-07, "loss": 0.024, "step": 658 }, { "clip_ratio/high_max": 0.0016893646388780326, "clip_ratio/high_mean": 0.0006303880654741079, "clip_ratio/low_mean": 0.0005771830274170497, "clip_ratio/low_min": 5.259279078018153e-05, "clip_ratio/region_mean": 0.001207571112900041, "epoch": 0.06150651290038442, "grad_norm": 0.11770395934581757, "learning_rate": 2e-07, "loss": 0.0121, "step": 659 }, { "clip_ratio/high_max": 0.0014463627194345463, "clip_ratio/high_mean": 0.0006558750883414177, "clip_ratio/low_mean": 0.00056148011390178, "clip_ratio/low_min": 4.235309825162403e-05, "clip_ratio/region_mean": 0.0012173551876912825, "epoch": 0.061599846000385, "grad_norm": 0.13160111010074615, "learning_rate": 2e-07, "loss": -0.006, "step": 660 }, { "clip_ratio/high_max": 0.0018279202849953435, "clip_ratio/high_mean": 0.0006532464030897245, "clip_ratio/low_mean": 0.0005032659555581631, "clip_ratio/low_min": 1.4927155461919028e-05, "clip_ratio/region_mean": 0.0011565123531909194, "epoch": 0.06169317910038558, "grad_norm": 0.13015127182006836, "learning_rate": 2e-07, "loss": 0.0182, "step": 661 }, { "clip_ratio/high_max": 0.0016299131784762722, "clip_ratio/high_mean": 0.0006979461595619796, "clip_ratio/low_mean": 0.0006059737606847193, "clip_ratio/low_min": 2.8777673378499458e-05, "clip_ratio/region_mean": 0.0013039199329796247, "epoch": 0.06178651220038617, "grad_norm": 0.13408970832824707, "learning_rate": 2e-07, "loss": 0.0214, "step": 662 }, { "clip_ratio/high_max": 0.0014583126212528441, "clip_ratio/high_mean": 0.0005836872824147576, "clip_ratio/low_mean": 0.0005877462754142471, "clip_ratio/low_min": 4.274965976946987e-05, "clip_ratio/region_mean": 0.0011714335705619305, "epoch": 0.061879845300386746, "grad_norm": 0.12468250840902328, "learning_rate": 2e-07, "loss": 0.0419, "step": 663 }, { "clip_ratio/high_max": 0.001667812051891815, "clip_ratio/high_mean": 0.0006553807925229194, "clip_ratio/low_mean": 0.000523083528605639, "clip_ratio/low_min": 2.0741557818837464e-05, "clip_ratio/region_mean": 0.0011784643247665372, "epoch": 0.06197317840038733, "grad_norm": 0.12835747003555298, "learning_rate": 2e-07, "loss": 0.0224, "step": 664 }, { "clip_ratio/high_max": 0.0015729515907878522, "clip_ratio/high_mean": 0.0006172027096909005, "clip_ratio/low_mean": 0.0005488729864282504, "clip_ratio/low_min": 6.383155687217368e-05, "clip_ratio/region_mean": 0.0011660757008939981, "epoch": 0.062066511500387916, "grad_norm": 0.12002723664045334, "learning_rate": 2e-07, "loss": -0.0239, "step": 665 }, { "clip_ratio/high_max": 0.0021446828759508207, "clip_ratio/high_mean": 0.0007378095051535638, "clip_ratio/low_mean": 0.0006026764294801978, "clip_ratio/low_min": 6.826732806075597e-05, "clip_ratio/region_mean": 0.0013404859419097193, "epoch": 0.0621598446003885, "grad_norm": 0.1323888599872589, "learning_rate": 2e-07, "loss": 0.0659, "step": 666 }, { "clip_ratio/high_max": 0.0017468734258727636, "clip_ratio/high_mean": 0.00072161966272688, "clip_ratio/low_mean": 0.000522592412380618, "clip_ratio/low_min": 4.996178722649347e-05, "clip_ratio/region_mean": 0.0012442120678315405, "epoch": 0.06225317770038908, "grad_norm": 0.16321566700935364, "learning_rate": 2e-07, "loss": -0.0122, "step": 667 }, { "clip_ratio/high_max": 0.0018500303012842778, "clip_ratio/high_mean": 0.0007944196950120386, "clip_ratio/low_mean": 0.0005760900894529186, "clip_ratio/low_min": 7.073568031046307e-05, "clip_ratio/region_mean": 0.0013705097990168724, "epoch": 0.062346510800389665, "grad_norm": 0.7605500221252441, "learning_rate": 2e-07, "loss": 0.0373, "step": 668 }, { "clip_ratio/high_max": 0.0016664655340719037, "clip_ratio/high_mean": 0.0006709124663757393, "clip_ratio/low_mean": 0.0005034664127379074, "clip_ratio/low_min": 5.518204488907941e-05, "clip_ratio/region_mean": 0.0011743788891180884, "epoch": 0.06243984390039025, "grad_norm": 0.12057474255561829, "learning_rate": 2e-07, "loss": 0.0196, "step": 669 }, { "clip_ratio/high_max": 0.0017185471770062577, "clip_ratio/high_mean": 0.0006968451434659073, "clip_ratio/low_mean": 0.0005092984847578919, "clip_ratio/low_min": 1.0111632036569063e-05, "clip_ratio/region_mean": 0.001206143591844011, "epoch": 0.06253317700039084, "grad_norm": 0.1370396912097931, "learning_rate": 2e-07, "loss": 0.0033, "step": 670 }, { "clip_ratio/high_max": 0.0020873833491350524, "clip_ratio/high_mean": 0.0008032144323806278, "clip_ratio/low_mean": 0.0005407195385487285, "clip_ratio/low_min": 4.871517649007728e-05, "clip_ratio/region_mean": 0.0013439339491014834, "epoch": 0.06262651010039141, "grad_norm": 0.12908096611499786, "learning_rate": 2e-07, "loss": -0.0074, "step": 671 }, { "clip_ratio/high_max": 0.0018211700589745305, "clip_ratio/high_mean": 0.0006363991524267476, "clip_ratio/low_mean": 0.0005653623211401282, "clip_ratio/low_min": 3.167420618410688e-05, "clip_ratio/region_mean": 0.001201761497213738, "epoch": 0.062719843200392, "grad_norm": 0.12195020914077759, "learning_rate": 2e-07, "loss": 0.0639, "step": 672 }, { "clip_ratio/high_max": 0.0016658248496241868, "clip_ratio/high_mean": 0.0005900564055991708, "clip_ratio/low_mean": 0.0005537913511943771, "clip_ratio/low_min": 2.931248218374094e-05, "clip_ratio/region_mean": 0.0011438477467891062, "epoch": 0.06281317630039258, "grad_norm": 0.11666692793369293, "learning_rate": 2e-07, "loss": 0.06, "step": 673 }, { "clip_ratio/high_max": 0.0018279250944033265, "clip_ratio/high_mean": 0.0007008269276411738, "clip_ratio/low_mean": 0.000625688227955834, "clip_ratio/low_min": 3.0054607577767456e-05, "clip_ratio/region_mean": 0.0013265151865198277, "epoch": 0.06290650940039316, "grad_norm": 0.1199314072728157, "learning_rate": 2e-07, "loss": -0.0018, "step": 674 }, { "clip_ratio/high_max": 0.0017797584223444574, "clip_ratio/high_mean": 0.0007540653004980413, "clip_ratio/low_mean": 0.0006237299348867964, "clip_ratio/low_min": 8.825190889183432e-06, "clip_ratio/region_mean": 0.0013777952262898907, "epoch": 0.06299984250039375, "grad_norm": 0.13348956406116486, "learning_rate": 2e-07, "loss": 0.0296, "step": 675 }, { "clip_ratio/high_max": 0.0017820864013629034, "clip_ratio/high_mean": 0.0006629771332882228, "clip_ratio/low_mean": 0.0006046263324606116, "clip_ratio/low_min": 8.266373879450839e-05, "clip_ratio/region_mean": 0.001267603463929845, "epoch": 0.06309317560039433, "grad_norm": 0.132685124874115, "learning_rate": 2e-07, "loss": 0.0548, "step": 676 }, { "clip_ratio/high_max": 0.0014455714917858131, "clip_ratio/high_mean": 0.0006452912239183206, "clip_ratio/low_mean": 0.00047958471623132937, "clip_ratio/low_min": 1.3478542314260267e-05, "clip_ratio/region_mean": 0.0011248759328736924, "epoch": 0.06318650870039491, "grad_norm": 0.12158458679914474, "learning_rate": 2e-07, "loss": 0.0445, "step": 677 }, { "clip_ratio/high_max": 0.0019001661603397224, "clip_ratio/high_mean": 0.0008478759682475356, "clip_ratio/low_mean": 0.0006295389466686174, "clip_ratio/low_min": 4.7775852181075606e-05, "clip_ratio/region_mean": 0.001477414905821206, "epoch": 0.0632798418003955, "grad_norm": 0.23240794241428375, "learning_rate": 2e-07, "loss": 0.004, "step": 678 }, { "clip_ratio/high_max": 0.0016498203549417667, "clip_ratio/high_mean": 0.0006674472469967441, "clip_ratio/low_mean": 0.0005517329627764411, "clip_ratio/low_min": 2.261870986330905e-05, "clip_ratio/region_mean": 0.0012191802125016693, "epoch": 0.06337317490039608, "grad_norm": 0.2108023464679718, "learning_rate": 2e-07, "loss": 0.0185, "step": 679 }, { "clip_ratio/high_max": 0.0017059472775144968, "clip_ratio/high_mean": 0.000640398689029098, "clip_ratio/low_mean": 0.0005386439320318459, "clip_ratio/low_min": 7.18225692253327e-06, "clip_ratio/region_mean": 0.0011790426124207443, "epoch": 0.06346650800039666, "grad_norm": 0.1245657354593277, "learning_rate": 2e-07, "loss": 0.0511, "step": 680 }, { "clip_ratio/high_max": 0.0017104010839830153, "clip_ratio/high_mean": 0.0006297570889728377, "clip_ratio/low_mean": 0.0006337284339679172, "clip_ratio/low_min": 5.7636620113044046e-05, "clip_ratio/region_mean": 0.0012634855374926701, "epoch": 0.06355984110039725, "grad_norm": 0.14375777542591095, "learning_rate": 2e-07, "loss": 0.0539, "step": 681 }, { "clip_ratio/high_max": 0.0017551425698911771, "clip_ratio/high_mean": 0.0007165225979406387, "clip_ratio/low_mean": 0.0005489909472089494, "clip_ratio/low_min": 2.4218548787757754e-05, "clip_ratio/region_mean": 0.0012655135687964503, "epoch": 0.06365317420039783, "grad_norm": 0.13683193922042847, "learning_rate": 2e-07, "loss": 0.0323, "step": 682 }, { "clip_ratio/high_max": 0.001880586234619841, "clip_ratio/high_mean": 0.0008188173505914165, "clip_ratio/low_mean": 0.0006308059873845195, "clip_ratio/low_min": 1.7472742911195382e-05, "clip_ratio/region_mean": 0.0014496233598038089, "epoch": 0.06374650730039842, "grad_norm": 0.1362380087375641, "learning_rate": 2e-07, "loss": 0.0069, "step": 683 }, { "clip_ratio/high_max": 0.0015462803930859081, "clip_ratio/high_mean": 0.000618541651419946, "clip_ratio/low_mean": 0.0004311111606511986, "clip_ratio/low_min": 1.6496749594807625e-05, "clip_ratio/region_mean": 0.0010496528157091234, "epoch": 0.063839840400399, "grad_norm": 0.1300773173570633, "learning_rate": 2e-07, "loss": -0.001, "step": 684 }, { "clip_ratio/high_max": 0.0015001613028289285, "clip_ratio/high_mean": 0.0005908993643970462, "clip_ratio/low_mean": 0.0006145246552478056, "clip_ratio/low_min": 4.9877981837198604e-05, "clip_ratio/region_mean": 0.0012054240323777776, "epoch": 0.06393317350039958, "grad_norm": 0.1277017742395401, "learning_rate": 2e-07, "loss": 0.067, "step": 685 }, { "clip_ratio/high_max": 0.0015584733391733607, "clip_ratio/high_mean": 0.0006159597569421749, "clip_ratio/low_mean": 0.0005733204889111221, "clip_ratio/low_min": 2.2012056433595717e-05, "clip_ratio/region_mean": 0.0011892802431248128, "epoch": 0.06402650660040017, "grad_norm": 0.12499699741601944, "learning_rate": 2e-07, "loss": 0.0366, "step": 686 }, { "clip_ratio/high_max": 0.0014135846504359506, "clip_ratio/high_mean": 0.0006369060192810139, "clip_ratio/low_mean": 0.0006140184996183962, "clip_ratio/low_min": 4.3716231630241964e-05, "clip_ratio/region_mean": 0.0012509245425462723, "epoch": 0.06411983970040075, "grad_norm": 0.12887609004974365, "learning_rate": 2e-07, "loss": 0.0461, "step": 687 }, { "clip_ratio/high_max": 0.0015638781624147668, "clip_ratio/high_mean": 0.0006727978343405994, "clip_ratio/low_mean": 0.0006178403964440804, "clip_ratio/low_min": 7.245242341014091e-05, "clip_ratio/region_mean": 0.0012906382289656904, "epoch": 0.06421317280040133, "grad_norm": 0.1270231157541275, "learning_rate": 2e-07, "loss": 0.0253, "step": 688 }, { "clip_ratio/high_max": 0.0020093826460652053, "clip_ratio/high_mean": 0.0008021069625101518, "clip_ratio/low_mean": 0.0004748574183395249, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012769644090440124, "epoch": 0.06430650590040192, "grad_norm": 0.12813836336135864, "learning_rate": 2e-07, "loss": -0.0216, "step": 689 }, { "clip_ratio/high_max": 0.001896862388093723, "clip_ratio/high_mean": 0.0008131685135595035, "clip_ratio/low_mean": 0.0006969935402594274, "clip_ratio/low_min": 1.859549229266122e-05, "clip_ratio/region_mean": 0.0015101620447239839, "epoch": 0.0643998390004025, "grad_norm": 0.1347668319940567, "learning_rate": 2e-07, "loss": 0.0181, "step": 690 }, { "clip_ratio/high_max": 0.0017879898769024294, "clip_ratio/high_mean": 0.0007061422984406818, "clip_ratio/low_mean": 0.000609958908171393, "clip_ratio/low_min": 1.1524985893629491e-05, "clip_ratio/region_mean": 0.0013161012102500536, "epoch": 0.06449317210040308, "grad_norm": 0.12205696851015091, "learning_rate": 2e-07, "loss": 0.0285, "step": 691 }, { "clip_ratio/high_max": 0.001683655777014792, "clip_ratio/high_mean": 0.0006340133713820251, "clip_ratio/low_mean": 0.0006758243880540249, "clip_ratio/low_min": 3.171433763782261e-05, "clip_ratio/region_mean": 0.0013098377785354387, "epoch": 0.06458650520040367, "grad_norm": 0.12861458957195282, "learning_rate": 2e-07, "loss": 0.0646, "step": 692 }, { "clip_ratio/high_max": 0.0014577286237908993, "clip_ratio/high_mean": 0.0005960850976407528, "clip_ratio/low_mean": 0.0005952550645815791, "clip_ratio/low_min": 7.587713753309799e-06, "clip_ratio/region_mean": 0.0011913401795027312, "epoch": 0.06467983830040425, "grad_norm": 0.13748180866241455, "learning_rate": 2e-07, "loss": 0.0428, "step": 693 }, { "clip_ratio/high_max": 0.00193388285151741, "clip_ratio/high_mean": 0.0007328521787712816, "clip_ratio/low_mean": 0.0005002969328415929, "clip_ratio/low_min": 3.787717196246376e-05, "clip_ratio/region_mean": 0.0012331491270742845, "epoch": 0.06477317140040484, "grad_norm": 0.13457660377025604, "learning_rate": 2e-07, "loss": -0.0163, "step": 694 }, { "clip_ratio/high_max": 0.0017190680810017511, "clip_ratio/high_mean": 0.000632153736660257, "clip_ratio/low_mean": 0.0005849529879924376, "clip_ratio/low_min": 1.9975170289399102e-05, "clip_ratio/region_mean": 0.0012171067064628005, "epoch": 0.06486650450040542, "grad_norm": 0.12993580102920532, "learning_rate": 2e-07, "loss": -0.0064, "step": 695 }, { "clip_ratio/high_max": 0.0017197706110891886, "clip_ratio/high_mean": 0.0007023792641120963, "clip_ratio/low_mean": 0.0005865202474524267, "clip_ratio/low_min": 2.6220986001135316e-05, "clip_ratio/region_mean": 0.0012888994860986713, "epoch": 0.064959837600406, "grad_norm": 0.12869040668010712, "learning_rate": 2e-07, "loss": 0.0276, "step": 696 }, { "clip_ratio/high_max": 0.001507964789198013, "clip_ratio/high_mean": 0.0006283692819124553, "clip_ratio/low_mean": 0.0005688575326985301, "clip_ratio/low_min": 2.0697924355772557e-05, "clip_ratio/region_mean": 0.0011972268330282532, "epoch": 0.06505317070040659, "grad_norm": 0.11785142868757248, "learning_rate": 2e-07, "loss": 0.0408, "step": 697 }, { "clip_ratio/high_max": 0.0016889129983610474, "clip_ratio/high_mean": 0.0007024408359939116, "clip_ratio/low_mean": 0.0005552773691306356, "clip_ratio/low_min": 5.5123628953879233e-05, "clip_ratio/region_mean": 0.0012577181914821267, "epoch": 0.06514650380040717, "grad_norm": 0.12636211514472961, "learning_rate": 2e-07, "loss": 0.0415, "step": 698 }, { "clip_ratio/high_max": 0.0014897844994266052, "clip_ratio/high_mean": 0.0005816352086185361, "clip_ratio/low_mean": 0.0005174009265829227, "clip_ratio/low_min": 8.71930842549773e-06, "clip_ratio/region_mean": 0.0010990361461153952, "epoch": 0.06523983690040774, "grad_norm": 0.49638545513153076, "learning_rate": 2e-07, "loss": 0.0151, "step": 699 }, { "clip_ratio/high_max": 0.0017284897548961453, "clip_ratio/high_mean": 0.0007320513868762646, "clip_ratio/low_mean": 0.000541347915714141, "clip_ratio/low_min": 1.617494854144752e-05, "clip_ratio/region_mean": 0.0012733993062283844, "epoch": 0.06533317000040834, "grad_norm": 0.12663838267326355, "learning_rate": 2e-07, "loss": -0.0085, "step": 700 }, { "clip_ratio/high_max": 0.0016152882526512258, "clip_ratio/high_mean": 0.0007114342261047568, "clip_ratio/low_mean": 0.0005025990249123424, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012140332291892264, "epoch": 0.06542650310040891, "grad_norm": 0.12812025845050812, "learning_rate": 2e-07, "loss": -0.0069, "step": 701 }, { "clip_ratio/high_max": 0.0017815071441873442, "clip_ratio/high_mean": 0.0007084357184794499, "clip_ratio/low_mean": 0.0004825739351872471, "clip_ratio/low_min": 4.561720834317384e-05, "clip_ratio/region_mean": 0.001191009665490128, "epoch": 0.06551983620040949, "grad_norm": 0.12003355473279953, "learning_rate": 2e-07, "loss": -0.0175, "step": 702 }, { "clip_ratio/high_max": 0.0018161366242566146, "clip_ratio/high_mean": 0.0007496271882700967, "clip_ratio/low_mean": 0.0006222048523341073, "clip_ratio/low_min": 2.633242002048064e-05, "clip_ratio/region_mean": 0.0013718320115003735, "epoch": 0.06561316930041008, "grad_norm": 0.13450071215629578, "learning_rate": 2e-07, "loss": -0.0127, "step": 703 }, { "clip_ratio/high_max": 0.0017458758593420498, "clip_ratio/high_mean": 0.0007060937950882362, "clip_ratio/low_mean": 0.0006095738663134398, "clip_ratio/low_min": 3.218835217921878e-05, "clip_ratio/region_mean": 0.0013156676359358244, "epoch": 0.06570650240041066, "grad_norm": 0.12530018389225006, "learning_rate": 2e-07, "loss": 0.04, "step": 704 }, { "clip_ratio/high_max": 0.0017650343725108542, "clip_ratio/high_mean": 0.0007810108945704997, "clip_ratio/low_mean": 0.0006335267644317355, "clip_ratio/low_min": 5.295045866660075e-05, "clip_ratio/region_mean": 0.0014145376953820232, "epoch": 0.06579983550041126, "grad_norm": 0.13335835933685303, "learning_rate": 2e-07, "loss": -0.0044, "step": 705 }, { "clip_ratio/high_max": 0.0015114101370272692, "clip_ratio/high_mean": 0.0005972737362753833, "clip_ratio/low_mean": 0.0006492882639577147, "clip_ratio/low_min": 0.00011737395561794983, "clip_ratio/region_mean": 0.0012465620238799602, "epoch": 0.06589316860041183, "grad_norm": 0.13296709954738617, "learning_rate": 2e-07, "loss": 0.058, "step": 706 }, { "clip_ratio/high_max": 0.0016603402837063186, "clip_ratio/high_mean": 0.0006937317521078512, "clip_ratio/low_mean": 0.0006222367301234044, "clip_ratio/low_min": 3.281970475654816e-05, "clip_ratio/region_mean": 0.0013159684713173192, "epoch": 0.06598650170041241, "grad_norm": 0.14746566116809845, "learning_rate": 2e-07, "loss": 0.0298, "step": 707 }, { "clip_ratio/high_max": 0.001845924802182708, "clip_ratio/high_mean": 0.0006878824387968052, "clip_ratio/low_mean": 0.0004872417857768596, "clip_ratio/low_min": 3.313130946480669e-05, "clip_ratio/region_mean": 0.0011751242418540642, "epoch": 0.066079834800413, "grad_norm": 0.11855975538492203, "learning_rate": 2e-07, "loss": -0.0128, "step": 708 }, { "clip_ratio/high_max": 0.001873504548711935, "clip_ratio/high_mean": 0.0006899400505062658, "clip_ratio/low_mean": 0.000627187599093304, "clip_ratio/low_min": 6.876307270431425e-05, "clip_ratio/region_mean": 0.0013171276696084533, "epoch": 0.06617316790041358, "grad_norm": 0.1277121752500534, "learning_rate": 2e-07, "loss": 0.0491, "step": 709 }, { "clip_ratio/high_max": 0.0016110573924379423, "clip_ratio/high_mean": 0.0006285007930273423, "clip_ratio/low_mean": 0.0005979634715913562, "clip_ratio/low_min": 4.5567341658170335e-05, "clip_ratio/region_mean": 0.0012264642609807197, "epoch": 0.06626650100041416, "grad_norm": 0.13598451018333435, "learning_rate": 2e-07, "loss": 0.0458, "step": 710 }, { "clip_ratio/high_max": 0.0013013172792852856, "clip_ratio/high_mean": 0.0005796274335807539, "clip_ratio/low_mean": 0.0005775121721853793, "clip_ratio/low_min": 2.177660553570604e-05, "clip_ratio/region_mean": 0.001157139635324711, "epoch": 0.06635983410041475, "grad_norm": 0.12078677117824554, "learning_rate": 2e-07, "loss": 0.0655, "step": 711 }, { "clip_ratio/high_max": 0.0018404439324513078, "clip_ratio/high_mean": 0.0007353419869104982, "clip_ratio/low_mean": 0.0005537161050597206, "clip_ratio/low_min": 9.481189408688806e-06, "clip_ratio/region_mean": 0.0012890580655948725, "epoch": 0.06645316720041533, "grad_norm": 0.23469290137290955, "learning_rate": 2e-07, "loss": 0.0517, "step": 712 }, { "clip_ratio/high_max": 0.0019216766449972056, "clip_ratio/high_mean": 0.0007578448276035488, "clip_ratio/low_mean": 0.000612449114669289, "clip_ratio/low_min": 5.650384355249116e-05, "clip_ratio/region_mean": 0.0013702939395443536, "epoch": 0.06654650030041591, "grad_norm": 0.15581178665161133, "learning_rate": 2e-07, "loss": 0.0482, "step": 713 }, { "clip_ratio/high_max": 0.0018119904343620874, "clip_ratio/high_mean": 0.0008236179146479117, "clip_ratio/low_mean": 0.0005594272793132404, "clip_ratio/low_min": 2.0195907836750848e-05, "clip_ratio/region_mean": 0.0013830451789544895, "epoch": 0.0666398334004165, "grad_norm": 0.13393065333366394, "learning_rate": 2e-07, "loss": 0.0061, "step": 714 }, { "clip_ratio/high_max": 0.0019005545946129132, "clip_ratio/high_mean": 0.000769074416894, "clip_ratio/low_mean": 0.0004924691852465912, "clip_ratio/low_min": 2.4428376491414383e-05, "clip_ratio/region_mean": 0.0012615436098712962, "epoch": 0.06673316650041708, "grad_norm": 0.13768470287322998, "learning_rate": 2e-07, "loss": 0.0413, "step": 715 }, { "clip_ratio/high_max": 0.001969527846085839, "clip_ratio/high_mean": 0.0007563246090285247, "clip_ratio/low_mean": 0.0004865717237407807, "clip_ratio/low_min": 1.941896516655106e-05, "clip_ratio/region_mean": 0.0012428963200363796, "epoch": 0.06682649960041767, "grad_norm": 0.12603184580802917, "learning_rate": 2e-07, "loss": -0.0039, "step": 716 }, { "clip_ratio/high_max": 0.001646428612730233, "clip_ratio/high_mean": 0.0006939051108929561, "clip_ratio/low_mean": 0.0006199538838700391, "clip_ratio/low_min": 5.3864967412664555e-05, "clip_ratio/region_mean": 0.0013138589820300695, "epoch": 0.06691983270041825, "grad_norm": 0.1386903077363968, "learning_rate": 2e-07, "loss": 0.0503, "step": 717 }, { "clip_ratio/high_max": 0.0017478908612247324, "clip_ratio/high_mean": 0.0007568989344690635, "clip_ratio/low_mean": 0.0005725580067519331, "clip_ratio/low_min": 8.82643689692486e-06, "clip_ratio/region_mean": 0.001329456914390903, "epoch": 0.06701316580041883, "grad_norm": 0.13218489289283752, "learning_rate": 2e-07, "loss": 0.0129, "step": 718 }, { "clip_ratio/high_max": 0.0016431936201115604, "clip_ratio/high_mean": 0.0007291721158253495, "clip_ratio/low_mean": 0.000700981112458976, "clip_ratio/low_min": 2.0619502265617484e-05, "clip_ratio/region_mean": 0.0014301532282843255, "epoch": 0.06710649890041942, "grad_norm": 0.1383633017539978, "learning_rate": 2e-07, "loss": 0.0333, "step": 719 }, { "clip_ratio/high_max": 0.0016926482785493135, "clip_ratio/high_mean": 0.0006537069566547871, "clip_ratio/low_mean": 0.0005763051558460575, "clip_ratio/low_min": 3.150811744490056e-05, "clip_ratio/region_mean": 0.0012300121379666962, "epoch": 0.06719983200042, "grad_norm": 0.1416458934545517, "learning_rate": 2e-07, "loss": 0.0253, "step": 720 }, { "clip_ratio/high_max": 0.0016349463185179047, "clip_ratio/high_mean": 0.0006912409116921481, "clip_ratio/low_mean": 0.0006255785419853055, "clip_ratio/low_min": 2.5650052975834114e-05, "clip_ratio/region_mean": 0.001316819430940086, "epoch": 0.06729316510042058, "grad_norm": 0.1343315839767456, "learning_rate": 2e-07, "loss": 0.041, "step": 721 }, { "clip_ratio/high_max": 0.001746447385812644, "clip_ratio/high_mean": 0.0007376395542451064, "clip_ratio/low_mean": 0.0006119174759078305, "clip_ratio/low_min": 2.7068214876635466e-05, "clip_ratio/region_mean": 0.0013495570783561561, "epoch": 0.06738649820042117, "grad_norm": 2.9352288246154785, "learning_rate": 2e-07, "loss": 0.0137, "step": 722 }, { "clip_ratio/high_max": 0.0019403832702664658, "clip_ratio/high_mean": 0.0007071226418702281, "clip_ratio/low_mean": 0.0006266819455049699, "clip_ratio/low_min": 4.775853449245915e-05, "clip_ratio/region_mean": 0.0013338045646378305, "epoch": 0.06747983130042175, "grad_norm": 0.14023274183273315, "learning_rate": 2e-07, "loss": 0.0682, "step": 723 }, { "clip_ratio/high_max": 0.0016476722776133101, "clip_ratio/high_mean": 0.0005884115435037529, "clip_ratio/low_mean": 0.0006121876922406955, "clip_ratio/low_min": 3.521393546179752e-05, "clip_ratio/region_mean": 0.0012005992502963636, "epoch": 0.06757316440042234, "grad_norm": 0.14161323010921478, "learning_rate": 2e-07, "loss": 0.0554, "step": 724 }, { "clip_ratio/high_max": 0.0018050209037028253, "clip_ratio/high_mean": 0.0007605813470945577, "clip_ratio/low_mean": 0.0006543144563693204, "clip_ratio/low_min": 6.010193828842603e-05, "clip_ratio/region_mean": 0.0014148957889119629, "epoch": 0.06766649750042292, "grad_norm": 0.12920735776424408, "learning_rate": 2e-07, "loss": 0.0284, "step": 725 }, { "clip_ratio/high_max": 0.0019291339049232192, "clip_ratio/high_mean": 0.0007724874012637883, "clip_ratio/low_mean": 0.0007200568106782157, "clip_ratio/low_min": 2.381763533776393e-05, "clip_ratio/region_mean": 0.0014925442155799828, "epoch": 0.0677598306004235, "grad_norm": 0.14308056235313416, "learning_rate": 2e-07, "loss": 0.0175, "step": 726 }, { "clip_ratio/high_max": 0.001391034875268815, "clip_ratio/high_mean": 0.0006132711023383308, "clip_ratio/low_mean": 0.0005933129032200668, "clip_ratio/low_min": 3.510344413371058e-05, "clip_ratio/region_mean": 0.0012065840237482917, "epoch": 0.06785316370042409, "grad_norm": 0.13062018156051636, "learning_rate": 2e-07, "loss": 0.0274, "step": 727 }, { "clip_ratio/high_max": 0.0018009515624726191, "clip_ratio/high_mean": 0.0006586129838979105, "clip_ratio/low_mean": 0.0005433078795249457, "clip_ratio/low_min": 1.1495309991005342e-05, "clip_ratio/region_mean": 0.0012019208952551708, "epoch": 0.06794649680042467, "grad_norm": 0.1303359419107437, "learning_rate": 2e-07, "loss": 0.0192, "step": 728 }, { "clip_ratio/high_max": 0.001653441442613257, "clip_ratio/high_mean": 0.0006880047203594586, "clip_ratio/low_mean": 0.0005780893825431122, "clip_ratio/low_min": 1.7994701465795515e-05, "clip_ratio/region_mean": 0.0012660941101785284, "epoch": 0.06803982990042524, "grad_norm": 0.1346747875213623, "learning_rate": 2e-07, "loss": 0.0268, "step": 729 }, { "clip_ratio/high_max": 0.0015159739559749141, "clip_ratio/high_mean": 0.000616999542216945, "clip_ratio/low_mean": 0.0006180608252179809, "clip_ratio/low_min": 2.564760325185489e-05, "clip_ratio/region_mean": 0.0012350603719823994, "epoch": 0.06813316300042584, "grad_norm": 0.13803623616695404, "learning_rate": 2e-07, "loss": 0.0537, "step": 730 }, { "clip_ratio/high_max": 0.0019053148243983742, "clip_ratio/high_mean": 0.0007460387405444635, "clip_ratio/low_mean": 0.0006647058489761548, "clip_ratio/low_min": 2.9260299925226718e-05, "clip_ratio/region_mean": 0.0014107445676927455, "epoch": 0.06822649610042641, "grad_norm": 0.1356225609779358, "learning_rate": 2e-07, "loss": 0.0147, "step": 731 }, { "clip_ratio/high_max": 0.0021733314206358045, "clip_ratio/high_mean": 0.0008002834438229911, "clip_ratio/low_mean": 0.0005339936342352303, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013342770726012532, "epoch": 0.06831982920042699, "grad_norm": 0.1248110979795456, "learning_rate": 2e-07, "loss": 0.0104, "step": 732 }, { "clip_ratio/high_max": 0.002110526907927124, "clip_ratio/high_mean": 0.000795112533523934, "clip_ratio/low_mean": 0.0006017726063873852, "clip_ratio/low_min": 5.0137175549025415e-05, "clip_ratio/region_mean": 0.0013968851199024357, "epoch": 0.06841316230042759, "grad_norm": 0.1425507813692093, "learning_rate": 2e-07, "loss": 0.0267, "step": 733 }, { "clip_ratio/high_max": 0.0015512536410824396, "clip_ratio/high_mean": 0.0007362874002865283, "clip_ratio/low_mean": 0.0005592849620370544, "clip_ratio/low_min": 2.2278916731011122e-05, "clip_ratio/region_mean": 0.0012955723323102575, "epoch": 0.06850649540042816, "grad_norm": 0.1322236955165863, "learning_rate": 2e-07, "loss": -0.0164, "step": 734 }, { "clip_ratio/high_max": 0.001973512833501445, "clip_ratio/high_mean": 0.0008434585961367702, "clip_ratio/low_mean": 0.0005515567390830256, "clip_ratio/low_min": 1.741917549225036e-05, "clip_ratio/region_mean": 0.0013950153079349548, "epoch": 0.06859982850042876, "grad_norm": 0.13598982989788055, "learning_rate": 2e-07, "loss": -0.0372, "step": 735 }, { "clip_ratio/high_max": 0.0013755441686953418, "clip_ratio/high_mean": 0.0006037279854353983, "clip_ratio/low_mean": 0.000685150738718221, "clip_ratio/low_min": 8.081283158389851e-05, "clip_ratio/region_mean": 0.0012888787205156405, "epoch": 0.06869316160042933, "grad_norm": 0.1304323375225067, "learning_rate": 2e-07, "loss": 0.0555, "step": 736 }, { "clip_ratio/high_max": 0.0016776171778474236, "clip_ratio/high_mean": 0.0006565636558661936, "clip_ratio/low_mean": 0.0006106670543886139, "clip_ratio/low_min": 6.424415823857998e-05, "clip_ratio/region_mean": 0.0012672307129832916, "epoch": 0.06878649470042991, "grad_norm": 0.14045096933841705, "learning_rate": 2e-07, "loss": 0.0346, "step": 737 }, { "clip_ratio/high_max": 0.0017824553833634127, "clip_ratio/high_mean": 0.0007497723127016798, "clip_ratio/low_mean": 0.0006868544642202323, "clip_ratio/low_min": 9.35509324335726e-05, "clip_ratio/region_mean": 0.0014366267641889863, "epoch": 0.0688798278004305, "grad_norm": 0.1476656198501587, "learning_rate": 2e-07, "loss": 0.0276, "step": 738 }, { "clip_ratio/high_max": 0.002014551526372088, "clip_ratio/high_mean": 0.0007785520538163837, "clip_ratio/low_mean": 0.0005678825891664019, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001346434644801775, "epoch": 0.06897316090043108, "grad_norm": 0.14279600977897644, "learning_rate": 2e-07, "loss": 0.0142, "step": 739 }, { "clip_ratio/high_max": 0.0016068922923295759, "clip_ratio/high_mean": 0.0006672596909993445, "clip_ratio/low_mean": 0.0005846841195307206, "clip_ratio/low_min": 3.3891572456923313e-05, "clip_ratio/region_mean": 0.001251943816896528, "epoch": 0.06906649400043166, "grad_norm": 0.1329497992992401, "learning_rate": 2e-07, "loss": -0.0003, "step": 740 }, { "clip_ratio/high_max": 0.0015477959932468366, "clip_ratio/high_mean": 0.0006884727881697472, "clip_ratio/low_mean": 0.0006761211188859306, "clip_ratio/low_min": 9.686158227850683e-05, "clip_ratio/region_mean": 0.0013645939288835507, "epoch": 0.06915982710043225, "grad_norm": 0.15340709686279297, "learning_rate": 2e-07, "loss": 0.0343, "step": 741 }, { "clip_ratio/high_max": 0.0017472034596721642, "clip_ratio/high_mean": 0.0005598792449745815, "clip_ratio/low_mean": 0.0006305084971245378, "clip_ratio/low_min": 2.0184221284580417e-05, "clip_ratio/region_mean": 0.0011903877566510346, "epoch": 0.06925316020043283, "grad_norm": 0.13781733810901642, "learning_rate": 2e-07, "loss": 0.1057, "step": 742 }, { "clip_ratio/high_max": 0.0018353771847614553, "clip_ratio/high_mean": 0.0007695300155319273, "clip_ratio/low_mean": 0.0006667081979685463, "clip_ratio/low_min": 1.2948000403412152e-05, "clip_ratio/region_mean": 0.0014362382571562193, "epoch": 0.06934649330043341, "grad_norm": 0.14211416244506836, "learning_rate": 2e-07, "loss": 0.0463, "step": 743 }, { "clip_ratio/high_max": 0.001881918633444002, "clip_ratio/high_mean": 0.0007042689558147686, "clip_ratio/low_mean": 0.0006600547994821682, "clip_ratio/low_min": 7.30643650967977e-05, "clip_ratio/region_mean": 0.0013643237507494632, "epoch": 0.069439826400434, "grad_norm": 0.17888714373111725, "learning_rate": 2e-07, "loss": 0.042, "step": 744 }, { "clip_ratio/high_max": 0.0019346333065186627, "clip_ratio/high_mean": 0.0007357829872489674, "clip_ratio/low_mean": 0.0006854591501905816, "clip_ratio/low_min": 4.964719664712902e-05, "clip_ratio/region_mean": 0.001421242137439549, "epoch": 0.06953315950043458, "grad_norm": 0.1444707065820694, "learning_rate": 2e-07, "loss": 0.0527, "step": 745 }, { "clip_ratio/high_max": 0.0020131122364546172, "clip_ratio/high_mean": 0.000841406850668136, "clip_ratio/low_mean": 0.0006184665116961696, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014598733541788533, "epoch": 0.06962649260043517, "grad_norm": 0.12694089114665985, "learning_rate": 2e-07, "loss": 0.0101, "step": 746 }, { "clip_ratio/high_max": 0.002002824559895089, "clip_ratio/high_mean": 0.0007816623128746869, "clip_ratio/low_mean": 0.0005684689558620448, "clip_ratio/low_min": 2.1011274839111138e-05, "clip_ratio/region_mean": 0.0013501312641892582, "epoch": 0.06971982570043575, "grad_norm": 0.13734646141529083, "learning_rate": 2e-07, "loss": 0.0347, "step": 747 }, { "clip_ratio/high_max": 0.0019175229899701662, "clip_ratio/high_mean": 0.0008016608317120699, "clip_ratio/low_mean": 0.0005673322029906558, "clip_ratio/low_min": 5.7647043831821065e-05, "clip_ratio/region_mean": 0.001368993031064747, "epoch": 0.06981315880043633, "grad_norm": 0.1341647356748581, "learning_rate": 2e-07, "loss": -0.0121, "step": 748 }, { "clip_ratio/high_max": 0.0019190673956472892, "clip_ratio/high_mean": 0.0006641351255893824, "clip_ratio/low_mean": 0.0006390485723386519, "clip_ratio/low_min": 0.0001085847761714831, "clip_ratio/region_mean": 0.0013031837152084336, "epoch": 0.06990649190043692, "grad_norm": 0.16204993426799774, "learning_rate": 2e-07, "loss": 0.0677, "step": 749 }, { "clip_ratio/high_max": 0.0018352304796280805, "clip_ratio/high_mean": 0.0007655756344320253, "clip_ratio/low_mean": 0.0005637933927573613, "clip_ratio/low_min": 1.9287762370368e-05, "clip_ratio/region_mean": 0.0013293690353748389, "epoch": 0.0699998250004375, "grad_norm": 0.18594947457313538, "learning_rate": 2e-07, "loss": 0.007, "step": 750 }, { "clip_ratio/high_max": 0.0021123119950061664, "clip_ratio/high_mean": 0.0007741561348666437, "clip_ratio/low_mean": 0.0007044008216325892, "clip_ratio/low_min": 1.152073764387751e-05, "clip_ratio/region_mean": 0.0014785569364903495, "epoch": 0.07009315810043808, "grad_norm": 0.13006354868412018, "learning_rate": 2e-07, "loss": 0.0321, "step": 751 }, { "clip_ratio/high_max": 0.0016299027010973077, "clip_ratio/high_mean": 0.0006894369453220861, "clip_ratio/low_mean": 0.0007583912774862256, "clip_ratio/low_min": 7.340219508478185e-05, "clip_ratio/region_mean": 0.0014478282209893223, "epoch": 0.07018649120043867, "grad_norm": 0.13340875506401062, "learning_rate": 2e-07, "loss": 0.0718, "step": 752 }, { "clip_ratio/high_max": 0.001568788105942076, "clip_ratio/high_mean": 0.0006704993847961305, "clip_ratio/low_mean": 0.0006226609420991736, "clip_ratio/low_min": 2.049232989520533e-05, "clip_ratio/region_mean": 0.0012931603196193464, "epoch": 0.07027982430043925, "grad_norm": 0.13387636840343475, "learning_rate": 2e-07, "loss": 0.0221, "step": 753 }, { "clip_ratio/high_max": 0.0017044080814230256, "clip_ratio/high_mean": 0.0007106898701749742, "clip_ratio/low_mean": 0.0006341700745906564, "clip_ratio/low_min": 4.087597244506469e-05, "clip_ratio/region_mean": 0.0013448599747789558, "epoch": 0.07037315740043983, "grad_norm": 0.13436150550842285, "learning_rate": 2e-07, "loss": 0.032, "step": 754 }, { "clip_ratio/high_max": 0.0017210624610015657, "clip_ratio/high_mean": 0.0006696337695757393, "clip_ratio/low_mean": 0.0007029233656794531, "clip_ratio/low_min": 6.71645193506265e-05, "clip_ratio/region_mean": 0.001372557133436203, "epoch": 0.07046649050044042, "grad_norm": 0.13905030488967896, "learning_rate": 2e-07, "loss": 0.0192, "step": 755 }, { "clip_ratio/high_max": 0.0018812806592904963, "clip_ratio/high_mean": 0.0007288844662980409, "clip_ratio/low_mean": 0.0006820002581662266, "clip_ratio/low_min": 3.229341291444143e-05, "clip_ratio/region_mean": 0.0014108846917224582, "epoch": 0.070559823600441, "grad_norm": 0.1712380200624466, "learning_rate": 2e-07, "loss": 0.0515, "step": 756 }, { "clip_ratio/high_max": 0.0019677303098433185, "clip_ratio/high_mean": 0.0008468619143968681, "clip_ratio/low_mean": 0.0007824374160918524, "clip_ratio/low_min": 1.0798203220474534e-05, "clip_ratio/region_mean": 0.0016292993386741728, "epoch": 0.07065315670044159, "grad_norm": 0.14097215235233307, "learning_rate": 2e-07, "loss": 0.0403, "step": 757 }, { "clip_ratio/high_max": 0.001998222178372089, "clip_ratio/high_mean": 0.0007868848406360485, "clip_ratio/low_mean": 0.0005887957786399056, "clip_ratio/low_min": 3.083038791373838e-05, "clip_ratio/region_mean": 0.0013756806292803958, "epoch": 0.07074648980044217, "grad_norm": 0.13767503201961517, "learning_rate": 2e-07, "loss": 0.0105, "step": 758 }, { "clip_ratio/high_max": 0.0017852562014013529, "clip_ratio/high_mean": 0.0006847150671092095, "clip_ratio/low_mean": 0.0007243162854138063, "clip_ratio/low_min": 1.058245834428817e-05, "clip_ratio/region_mean": 0.0014090313998167403, "epoch": 0.07083982290044274, "grad_norm": 0.13729645311832428, "learning_rate": 2e-07, "loss": 0.0393, "step": 759 }, { "clip_ratio/high_max": 0.0017310409639321733, "clip_ratio/high_mean": 0.0007158482267186628, "clip_ratio/low_mean": 0.0007941653584566666, "clip_ratio/low_min": 0.00011437252760515548, "clip_ratio/region_mean": 0.0015100135642569512, "epoch": 0.07093315600044334, "grad_norm": 0.17227871716022491, "learning_rate": 2e-07, "loss": 0.0624, "step": 760 }, { "clip_ratio/high_max": 0.0020095400941499975, "clip_ratio/high_mean": 0.0008180303320841631, "clip_ratio/low_mean": 0.0006249801717785886, "clip_ratio/low_min": 9.709491678222548e-06, "clip_ratio/region_mean": 0.0014430104820348788, "epoch": 0.07102648910044392, "grad_norm": 0.13231320679187775, "learning_rate": 2e-07, "loss": 0.0189, "step": 761 }, { "clip_ratio/high_max": 0.002000831071200082, "clip_ratio/high_mean": 0.0007673213367525022, "clip_ratio/low_mean": 0.0006424330531444866, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014097543462412432, "epoch": 0.0711198222004445, "grad_norm": 0.12706294655799866, "learning_rate": 2e-07, "loss": 0.0207, "step": 762 }, { "clip_ratio/high_max": 0.002017152917687781, "clip_ratio/high_mean": 0.0007408308883896098, "clip_ratio/low_mean": 0.0006969236601435114, "clip_ratio/low_min": 3.259520508436253e-05, "clip_ratio/region_mean": 0.0014377545085153542, "epoch": 0.07121315530044509, "grad_norm": 0.1454247534275055, "learning_rate": 2e-07, "loss": 0.0456, "step": 763 }, { "clip_ratio/high_max": 0.00197572779507027, "clip_ratio/high_mean": 0.0007024952356005087, "clip_ratio/low_mean": 0.0006915584126545582, "clip_ratio/low_min": 8.96700112207327e-06, "clip_ratio/region_mean": 0.0013940536446170881, "epoch": 0.07130648840044566, "grad_norm": 0.26601114869117737, "learning_rate": 2e-07, "loss": 0.023, "step": 764 }, { "clip_ratio/high_max": 0.0017416523187421262, "clip_ratio/high_mean": 0.0007268990266311448, "clip_ratio/low_mean": 0.0006505915353045566, "clip_ratio/low_min": 6.869563367217779e-05, "clip_ratio/region_mean": 0.001377490541926818, "epoch": 0.07139982150044624, "grad_norm": 0.15248876810073853, "learning_rate": 2e-07, "loss": 0.0485, "step": 765 }, { "clip_ratio/high_max": 0.0017295719881076366, "clip_ratio/high_mean": 0.0007227960995805915, "clip_ratio/low_mean": 0.0005879171567357844, "clip_ratio/low_min": 4.8788484491524287e-05, "clip_ratio/region_mean": 0.001310713265411323, "epoch": 0.07149315460044683, "grad_norm": 0.13423849642276764, "learning_rate": 2e-07, "loss": 0.0284, "step": 766 }, { "clip_ratio/high_max": 0.001580485695740208, "clip_ratio/high_mean": 0.0007052026430756086, "clip_ratio/low_mean": 0.0006351879883368383, "clip_ratio/low_min": 1.1011275091732386e-05, "clip_ratio/region_mean": 0.0013403906195890158, "epoch": 0.07158648770044741, "grad_norm": 0.13993693888187408, "learning_rate": 2e-07, "loss": 0.0155, "step": 767 }, { "clip_ratio/high_max": 0.0016208321358135436, "clip_ratio/high_mean": 0.0006385864789990592, "clip_ratio/low_mean": 0.0005598166471827426, "clip_ratio/low_min": 3.3498910852358676e-05, "clip_ratio/region_mean": 0.0011984031079919077, "epoch": 0.071679820800448, "grad_norm": 0.12588836252689362, "learning_rate": 2e-07, "loss": -0.0034, "step": 768 }, { "clip_ratio/high_max": 0.0016640617031953298, "clip_ratio/high_mean": 0.0006651537350990111, "clip_ratio/low_mean": 0.0005679716614395147, "clip_ratio/low_min": 2.9024866307736374e-05, "clip_ratio/region_mean": 0.0012331254038144834, "completions/clipped_ratio": 0.0189296177455357, "completions/max_length": 4096.0, "completions/max_terminated_length": 4094.0, "completions/mean_length": 669.2849731445312, "completions/mean_terminated_length": 603.1669311523438, "completions/min_length": 83.0, "completions/min_terminated_length": 83.0, "epoch": 0.07177315390044858, "grad_norm": 0.1200176328420639, "learning_rate": 2e-07, "loss": 0.0144, "num_tokens": 592274102.0, "reward": 0.587533175945282, "reward_std": 0.18658646941184998, "rewards/simpleverify_reward/mean": 0.5875331163406372, "rewards/simpleverify_reward/std": 0.49228036403656006, "step": 769 }, { "clip_ratio/high_max": 0.001763664997270098, "clip_ratio/high_mean": 0.0007165318029365153, "clip_ratio/low_mean": 0.0005440120407911309, "clip_ratio/low_min": 2.7551244784262963e-05, "clip_ratio/region_mean": 0.0012605438860191498, "epoch": 0.07186648700044916, "grad_norm": 0.11535928398370743, "learning_rate": 2e-07, "loss": 0.005, "step": 770 }, { "clip_ratio/high_max": 0.0018081527596223168, "clip_ratio/high_mean": 0.000701039403793402, "clip_ratio/low_mean": 0.0006027344516041921, "clip_ratio/low_min": 4.069938586326316e-05, "clip_ratio/region_mean": 0.0013037738572165836, "epoch": 0.07195982010044975, "grad_norm": 0.12335245311260223, "learning_rate": 2e-07, "loss": 0.0147, "step": 771 }, { "clip_ratio/high_max": 0.0017272335280722473, "clip_ratio/high_mean": 0.0007435985626216279, "clip_ratio/low_mean": 0.0006387972207448911, "clip_ratio/low_min": 5.4254132010100875e-05, "clip_ratio/region_mean": 0.001382395756081678, "epoch": 0.07205315320045033, "grad_norm": 0.1265745460987091, "learning_rate": 2e-07, "loss": 0.0006, "step": 772 }, { "clip_ratio/high_max": 0.0016171671450138092, "clip_ratio/high_mean": 0.0006421653197321575, "clip_ratio/low_mean": 0.0006703893322992371, "clip_ratio/low_min": 5.29526369064115e-06, "clip_ratio/region_mean": 0.0013125546574883629, "epoch": 0.07214648630045091, "grad_norm": 0.13422124087810516, "learning_rate": 2e-07, "loss": 0.065, "step": 773 }, { "clip_ratio/high_max": 0.0017161291470983997, "clip_ratio/high_mean": 0.0006915295398357557, "clip_ratio/low_mean": 0.0005328520865077735, "clip_ratio/low_min": 3.22438149851223e-05, "clip_ratio/region_mean": 0.0012243816418049391, "epoch": 0.0722398194004515, "grad_norm": 0.11311989277601242, "learning_rate": 2e-07, "loss": 0.0057, "step": 774 }, { "clip_ratio/high_max": 0.0017800446366891265, "clip_ratio/high_mean": 0.0007017651259957347, "clip_ratio/low_mean": 0.0005871846533409553, "clip_ratio/low_min": 1.553567017253954e-05, "clip_ratio/region_mean": 0.0012889497775177006, "epoch": 0.07233315250045208, "grad_norm": 0.12425410747528076, "learning_rate": 2e-07, "loss": 0.003, "step": 775 }, { "clip_ratio/high_max": 0.0019921678758691996, "clip_ratio/high_mean": 0.000781892782470095, "clip_ratio/low_mean": 0.0005476402147905901, "clip_ratio/low_min": 2.1736652342951857e-05, "clip_ratio/region_mean": 0.001329533006355632, "epoch": 0.07242648560045266, "grad_norm": 0.1242922991514206, "learning_rate": 2e-07, "loss": 0.0344, "step": 776 }, { "clip_ratio/high_max": 0.0019104493585473392, "clip_ratio/high_mean": 0.0007157452164392453, "clip_ratio/low_mean": 0.000570622438317514, "clip_ratio/low_min": 3.897562783095054e-05, "clip_ratio/region_mean": 0.0012863676456618123, "epoch": 0.07251981870045325, "grad_norm": 0.1277121901512146, "learning_rate": 2e-07, "loss": 0.0198, "step": 777 }, { "clip_ratio/high_max": 0.001965167684829794, "clip_ratio/high_mean": 0.0007945667421154212, "clip_ratio/low_mean": 0.000632874977782194, "clip_ratio/low_min": 6.881707122374792e-05, "clip_ratio/region_mean": 0.0014274417262640782, "epoch": 0.07261315180045383, "grad_norm": 0.15136922895908356, "learning_rate": 2e-07, "loss": 0.0025, "step": 778 }, { "clip_ratio/high_max": 0.001287887331272941, "clip_ratio/high_mean": 0.0005273313017823966, "clip_ratio/low_mean": 0.0006005700561217964, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011279013415332884, "epoch": 0.07270648490045442, "grad_norm": 0.13333232700824738, "learning_rate": 2e-07, "loss": 0.0026, "step": 779 }, { "clip_ratio/high_max": 0.0015332795992435422, "clip_ratio/high_mean": 0.0007034673071757425, "clip_ratio/low_mean": 0.0005831103248965519, "clip_ratio/low_min": 9.016199510369916e-05, "clip_ratio/region_mean": 0.0012865776297985576, "epoch": 0.072799818000455, "grad_norm": 0.1275341957807541, "learning_rate": 2e-07, "loss": -0.0002, "step": 780 }, { "clip_ratio/high_max": 0.0018385609946562909, "clip_ratio/high_mean": 0.0007549962556367973, "clip_ratio/low_mean": 0.00048272770436597057, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012377239618217573, "epoch": 0.07289315110045558, "grad_norm": 0.14282125234603882, "learning_rate": 2e-07, "loss": 0.0097, "step": 781 }, { "clip_ratio/high_max": 0.0016916198947001249, "clip_ratio/high_mean": 0.0006690671652904712, "clip_ratio/low_mean": 0.0004747746179418755, "clip_ratio/low_min": 2.428127481834963e-05, "clip_ratio/region_mean": 0.001143841800512746, "epoch": 0.07298648420045617, "grad_norm": 0.14204253256320953, "learning_rate": 2e-07, "loss": 0.0424, "step": 782 }, { "clip_ratio/high_max": 0.001592929838807322, "clip_ratio/high_mean": 0.000590239208577259, "clip_ratio/low_mean": 0.0006100029895605985, "clip_ratio/low_min": 5.1313657422724646e-05, "clip_ratio/region_mean": 0.0012002422008663416, "epoch": 0.07307981730045675, "grad_norm": 0.12809215486049652, "learning_rate": 2e-07, "loss": 0.0622, "step": 783 }, { "clip_ratio/high_max": 0.0015120770040084608, "clip_ratio/high_mean": 0.0006210517603904009, "clip_ratio/low_mean": 0.0005937460355198709, "clip_ratio/low_min": 1.650528247409966e-05, "clip_ratio/region_mean": 0.0012147977722634096, "epoch": 0.07317315040045733, "grad_norm": 0.14840084314346313, "learning_rate": 2e-07, "loss": 0.0408, "step": 784 }, { "clip_ratio/high_max": 0.001665672487433767, "clip_ratio/high_mean": 0.0006532960924232611, "clip_ratio/low_mean": 0.0005215191958996002, "clip_ratio/low_min": 1.5711413652752526e-05, "clip_ratio/region_mean": 0.0011748152755899355, "epoch": 0.07326648350045792, "grad_norm": 0.12252987176179886, "learning_rate": 2e-07, "loss": 0.0083, "step": 785 }, { "clip_ratio/high_max": 0.002016483079387399, "clip_ratio/high_mean": 0.0007688053024139663, "clip_ratio/low_mean": 0.0006342743154164054, "clip_ratio/low_min": 2.9696710043936037e-05, "clip_ratio/region_mean": 0.0014030796301085502, "epoch": 0.0733598166004585, "grad_norm": 0.12238584458827972, "learning_rate": 2e-07, "loss": 0.0244, "step": 786 }, { "clip_ratio/high_max": 0.0013633124290208798, "clip_ratio/high_mean": 0.000596500642132014, "clip_ratio/low_mean": 0.0005269209823381971, "clip_ratio/low_min": 2.2610221094510052e-05, "clip_ratio/region_mean": 0.001123421603551833, "epoch": 0.07345314970045909, "grad_norm": 0.13153524696826935, "learning_rate": 2e-07, "loss": 0.0264, "step": 787 }, { "clip_ratio/high_max": 0.001768229736626381, "clip_ratio/high_mean": 0.000666464155074209, "clip_ratio/low_mean": 0.0006321265200313064, "clip_ratio/low_min": 5.978787885396741e-05, "clip_ratio/region_mean": 0.0012985906651010737, "epoch": 0.07354648280045967, "grad_norm": 0.13322320580482483, "learning_rate": 2e-07, "loss": 0.0572, "step": 788 }, { "clip_ratio/high_max": 0.0018353796331211925, "clip_ratio/high_mean": 0.0006070550098229432, "clip_ratio/low_mean": 0.0005546537267946405, "clip_ratio/low_min": 1.0217427188763395e-05, "clip_ratio/region_mean": 0.0011617087184276897, "epoch": 0.07363981590046025, "grad_norm": 65.43255615234375, "learning_rate": 2e-07, "loss": 0.0636, "step": 789 }, { "clip_ratio/high_max": 0.001764729244314367, "clip_ratio/high_mean": 0.0006605897297049523, "clip_ratio/low_mean": 0.0005762765531471814, "clip_ratio/low_min": 1.3343134014576208e-05, "clip_ratio/region_mean": 0.0012368662683002185, "epoch": 0.07373314900046084, "grad_norm": 0.13482068479061127, "learning_rate": 2e-07, "loss": 0.0425, "step": 790 }, { "clip_ratio/high_max": 0.0014388743256859016, "clip_ratio/high_mean": 0.00052253001013014, "clip_ratio/low_mean": 0.0005727292355004465, "clip_ratio/low_min": 6.32050041531329e-05, "clip_ratio/region_mean": 0.0010952592419926077, "epoch": 0.07382648210046142, "grad_norm": 0.11618407815694809, "learning_rate": 2e-07, "loss": 0.0309, "step": 791 }, { "clip_ratio/high_max": 0.0016239406540989876, "clip_ratio/high_mean": 0.0007450235389114823, "clip_ratio/low_mean": 0.0005221978426561691, "clip_ratio/low_min": 1.3851324638380902e-05, "clip_ratio/region_mean": 0.0012672213488258421, "epoch": 0.073919815200462, "grad_norm": 0.13668769598007202, "learning_rate": 2e-07, "loss": -0.013, "step": 792 }, { "clip_ratio/high_max": 0.001792459050193429, "clip_ratio/high_mean": 0.0007293201470019994, "clip_ratio/low_mean": 0.0005229202092777996, "clip_ratio/low_min": 5.167751442058943e-05, "clip_ratio/region_mean": 0.0012522403922048397, "epoch": 0.07401314830046259, "grad_norm": 0.15190726518630981, "learning_rate": 2e-07, "loss": 0.0099, "step": 793 }, { "clip_ratio/high_max": 0.0016551038643228821, "clip_ratio/high_mean": 0.000632282426522579, "clip_ratio/low_mean": 0.0005150629094714532, "clip_ratio/low_min": 1.3241525266494136e-05, "clip_ratio/region_mean": 0.0011473453196231276, "epoch": 0.07410648140046316, "grad_norm": 0.13538256287574768, "learning_rate": 2e-07, "loss": 0.0317, "step": 794 }, { "clip_ratio/high_max": 0.0015021612744021695, "clip_ratio/high_mean": 0.0005892269764444791, "clip_ratio/low_mean": 0.0005791236362711061, "clip_ratio/low_min": 2.0741913431265857e-05, "clip_ratio/region_mean": 0.0011683506199915428, "epoch": 0.07419981450046374, "grad_norm": 0.13134124875068665, "learning_rate": 2e-07, "loss": 0.0637, "step": 795 }, { "clip_ratio/high_max": 0.0015224536873574834, "clip_ratio/high_mean": 0.0006306667846729397, "clip_ratio/low_mean": 0.0006323348025034647, "clip_ratio/low_min": 6.654258049820783e-05, "clip_ratio/region_mean": 0.001263001560801058, "epoch": 0.07429314760046433, "grad_norm": 0.12769021093845367, "learning_rate": 2e-07, "loss": 0.0623, "step": 796 }, { "clip_ratio/high_max": 0.0019472003514238168, "clip_ratio/high_mean": 0.0006880204455228522, "clip_ratio/low_mean": 0.0005959361151326448, "clip_ratio/low_min": 4.098026738574845e-05, "clip_ratio/region_mean": 0.0012839565824833699, "epoch": 0.07438648070046491, "grad_norm": 0.1261204332113266, "learning_rate": 2e-07, "loss": 0.0118, "step": 797 }, { "clip_ratio/high_max": 0.001734211400616914, "clip_ratio/high_mean": 0.0006724927679897519, "clip_ratio/low_mean": 0.0005937544665357564, "clip_ratio/low_min": 7.51308962207986e-05, "clip_ratio/region_mean": 0.0012662472327065188, "epoch": 0.0744798138004655, "grad_norm": 0.1381601244211197, "learning_rate": 2e-07, "loss": 0.0085, "step": 798 }, { "clip_ratio/high_max": 0.0017473062398494221, "clip_ratio/high_mean": 0.0007460927481588442, "clip_ratio/low_mean": 0.000571088321521529, "clip_ratio/low_min": 4.5364177822193597e-05, "clip_ratio/region_mean": 0.0013171810387575533, "epoch": 0.07457314690046608, "grad_norm": 0.12908893823623657, "learning_rate": 2e-07, "loss": 0.049, "step": 799 }, { "clip_ratio/high_max": 0.001583841149113141, "clip_ratio/high_mean": 0.0006087311021474306, "clip_ratio/low_mean": 0.0005442203791972133, "clip_ratio/low_min": 7.766869202896487e-06, "clip_ratio/region_mean": 0.0011529514704307076, "epoch": 0.07466648000046666, "grad_norm": 0.12730640172958374, "learning_rate": 2e-07, "loss": 0.0448, "step": 800 }, { "clip_ratio/high_max": 0.0015075810879352503, "clip_ratio/high_mean": 0.0005928674850110838, "clip_ratio/low_mean": 0.0004950751208525617, "clip_ratio/low_min": 1.340626386081567e-05, "clip_ratio/region_mean": 0.001087942593585467, "epoch": 0.07475981310046725, "grad_norm": 0.13703522086143494, "learning_rate": 2e-07, "loss": 0.034, "step": 801 }, { "clip_ratio/high_max": 0.0019027512244065292, "clip_ratio/high_mean": 0.0007680161215830594, "clip_ratio/low_mean": 0.0005871154407941503, "clip_ratio/low_min": 7.348618510150118e-06, "clip_ratio/region_mean": 0.0013551315969380084, "epoch": 0.07485314620046783, "grad_norm": 0.13827338814735413, "learning_rate": 2e-07, "loss": 0.016, "step": 802 }, { "clip_ratio/high_max": 0.0018460044448147528, "clip_ratio/high_mean": 0.0007641971424163785, "clip_ratio/low_mean": 0.0005808871537738014, "clip_ratio/low_min": 4.96315406053327e-05, "clip_ratio/region_mean": 0.0013450842780002858, "epoch": 0.07494647930046841, "grad_norm": 0.13513021171092987, "learning_rate": 2e-07, "loss": 0.0117, "step": 803 }, { "clip_ratio/high_max": 0.0016978867060970515, "clip_ratio/high_mean": 0.0007051718757793424, "clip_ratio/low_mean": 0.0005716891691918136, "clip_ratio/low_min": 2.6544778847892303e-05, "clip_ratio/region_mean": 0.001276861054066103, "epoch": 0.075039812400469, "grad_norm": 0.11753594130277634, "learning_rate": 2e-07, "loss": 0.029, "step": 804 }, { "clip_ratio/high_max": 0.0017336773889837787, "clip_ratio/high_mean": 0.0006946772664377932, "clip_ratio/low_mean": 0.0007058975515974453, "clip_ratio/low_min": 3.879847463394981e-05, "clip_ratio/region_mean": 0.0014005748089402914, "epoch": 0.07513314550046958, "grad_norm": 0.2023734301328659, "learning_rate": 2e-07, "loss": 0.0475, "step": 805 }, { "clip_ratio/high_max": 0.0016371474011975806, "clip_ratio/high_mean": 0.0006527393597934861, "clip_ratio/low_mean": 0.0005765201185568003, "clip_ratio/low_min": 1.821468913476565e-05, "clip_ratio/region_mean": 0.0012292595019971486, "epoch": 0.07522647860047016, "grad_norm": 0.11682592332363129, "learning_rate": 2e-07, "loss": 0.0137, "step": 806 }, { "clip_ratio/high_max": 0.0018218369878013618, "clip_ratio/high_mean": 0.0007625634534633718, "clip_ratio/low_mean": 0.0005656765097228345, "clip_ratio/low_min": 3.354207910888363e-05, "clip_ratio/region_mean": 0.001328239970462164, "epoch": 0.07531981170047075, "grad_norm": 0.17023275792598724, "learning_rate": 2e-07, "loss": 0.0284, "step": 807 }, { "clip_ratio/high_max": 0.0018063690622511785, "clip_ratio/high_mean": 0.0006768521107005654, "clip_ratio/low_mean": 0.0004902261634924798, "clip_ratio/low_min": 1.7428766568627907e-05, "clip_ratio/region_mean": 0.0011670782514556777, "epoch": 0.07541314480047133, "grad_norm": 0.1293930858373642, "learning_rate": 2e-07, "loss": 0.0438, "step": 808 }, { "clip_ratio/high_max": 0.001562705099786399, "clip_ratio/high_mean": 0.0006287382111622719, "clip_ratio/low_mean": 0.0005736205957873608, "clip_ratio/low_min": 3.597545855882345e-05, "clip_ratio/region_mean": 0.0012023587769363075, "epoch": 0.07550647790047192, "grad_norm": 0.12865273654460907, "learning_rate": 2e-07, "loss": 0.0381, "step": 809 }, { "clip_ratio/high_max": 0.0014954010766814463, "clip_ratio/high_mean": 0.0005929134431426064, "clip_ratio/low_mean": 0.0005462050958158216, "clip_ratio/low_min": 3.391011523490306e-05, "clip_ratio/region_mean": 0.0011391185471438803, "epoch": 0.0755998110004725, "grad_norm": 0.1434391736984253, "learning_rate": 2e-07, "loss": 0.0626, "step": 810 }, { "clip_ratio/high_max": 0.001866494865680579, "clip_ratio/high_mean": 0.0007073636807035655, "clip_ratio/low_mean": 0.0006035722271917621, "clip_ratio/low_min": 4.886163333139848e-05, "clip_ratio/region_mean": 0.0013109359315421898, "epoch": 0.07569314410047308, "grad_norm": 0.15212330222129822, "learning_rate": 2e-07, "loss": 0.0444, "step": 811 }, { "clip_ratio/high_max": 0.0017575124693394173, "clip_ratio/high_mean": 0.0007391291783278575, "clip_ratio/low_mean": 0.0005912526721658651, "clip_ratio/low_min": 2.2563160200661514e-05, "clip_ratio/region_mean": 0.0013303818413987756, "epoch": 0.07578647720047367, "grad_norm": 0.1445869654417038, "learning_rate": 2e-07, "loss": 0.0394, "step": 812 }, { "clip_ratio/high_max": 0.0016174005722859874, "clip_ratio/high_mean": 0.0006395413811333128, "clip_ratio/low_mean": 0.000532825183654495, "clip_ratio/low_min": 1.6974470781860873e-05, "clip_ratio/region_mean": 0.001172366552054882, "epoch": 0.07587981030047425, "grad_norm": 0.13306879997253418, "learning_rate": 2e-07, "loss": 0.0018, "step": 813 }, { "clip_ratio/high_max": 0.0018803581697284244, "clip_ratio/high_mean": 0.0006576155683433171, "clip_ratio/low_mean": 0.000547018185898196, "clip_ratio/low_min": 2.537104592192918e-05, "clip_ratio/region_mean": 0.001204633754241513, "epoch": 0.07597314340047483, "grad_norm": 0.1316138356924057, "learning_rate": 2e-07, "loss": 0.035, "step": 814 }, { "clip_ratio/high_max": 0.0017649402070674114, "clip_ratio/high_mean": 0.0007271963440871332, "clip_ratio/low_mean": 0.0006332378252409399, "clip_ratio/low_min": 3.3307031117146835e-05, "clip_ratio/region_mean": 0.0013604341620521154, "epoch": 0.07606647650047542, "grad_norm": 0.13097789883613586, "learning_rate": 2e-07, "loss": -0.0045, "step": 815 }, { "clip_ratio/high_max": 0.0019263663198216818, "clip_ratio/high_mean": 0.0007784564841131214, "clip_ratio/low_mean": 0.0006007261818012921, "clip_ratio/low_min": 1.2354220416455064e-05, "clip_ratio/region_mean": 0.001379182685923297, "epoch": 0.076159809600476, "grad_norm": 0.13646312057971954, "learning_rate": 2e-07, "loss": 0.0056, "step": 816 }, { "clip_ratio/high_max": 0.0018164188477385323, "clip_ratio/high_mean": 0.0006681560880679172, "clip_ratio/low_mean": 0.0006493937935374561, "clip_ratio/low_min": 4.98636691190768e-05, "clip_ratio/region_mean": 0.0013175498897908255, "epoch": 0.07625314270047658, "grad_norm": 0.13613185286521912, "learning_rate": 2e-07, "loss": 0.0688, "step": 817 }, { "clip_ratio/high_max": 0.0013788427932013292, "clip_ratio/high_mean": 0.000611609628322185, "clip_ratio/low_mean": 0.0006212674070411595, "clip_ratio/low_min": 3.884965644829208e-05, "clip_ratio/region_mean": 0.0012328770390013233, "epoch": 0.07634647580047717, "grad_norm": 0.12307032942771912, "learning_rate": 2e-07, "loss": 0.037, "step": 818 }, { "clip_ratio/high_max": 0.002161408028769074, "clip_ratio/high_mean": 0.0008264908301498508, "clip_ratio/low_mean": 0.0005874209418834653, "clip_ratio/low_min": 1.5090803117345786e-05, "clip_ratio/region_mean": 0.0014139117483864538, "epoch": 0.07643980890047775, "grad_norm": 0.13738411664962769, "learning_rate": 2e-07, "loss": 0.0343, "step": 819 }, { "clip_ratio/high_max": 0.001655316460528411, "clip_ratio/high_mean": 0.0005889028934689122, "clip_ratio/low_mean": 0.0006206248781381873, "clip_ratio/low_min": 4.959434409101959e-05, "clip_ratio/region_mean": 0.0012095277743355837, "epoch": 0.07653314200047834, "grad_norm": 0.1456535905599594, "learning_rate": 2e-07, "loss": 0.0566, "step": 820 }, { "clip_ratio/high_max": 0.0014790482928219717, "clip_ratio/high_mean": 0.0005915695764997508, "clip_ratio/low_mean": 0.0006586634935956681, "clip_ratio/low_min": 4.391547417981201e-05, "clip_ratio/region_mean": 0.0012502330719144084, "epoch": 0.07662647510047892, "grad_norm": 0.12696410715579987, "learning_rate": 2e-07, "loss": 0.0455, "step": 821 }, { "clip_ratio/high_max": 0.001555921964609297, "clip_ratio/high_mean": 0.0006570272526005283, "clip_ratio/low_mean": 0.0006139335309853777, "clip_ratio/low_min": 3.152605677314568e-05, "clip_ratio/region_mean": 0.0012709607617580332, "epoch": 0.0767198082004795, "grad_norm": 0.12894754111766815, "learning_rate": 2e-07, "loss": 0.0272, "step": 822 }, { "clip_ratio/high_max": 0.0014002352254465222, "clip_ratio/high_mean": 0.0006081001383790863, "clip_ratio/low_mean": 0.0005030052043366595, "clip_ratio/low_min": 5.296463177728583e-05, "clip_ratio/region_mean": 0.0011111053354397882, "epoch": 0.07681314130048009, "grad_norm": 0.12044192850589752, "learning_rate": 2e-07, "loss": 0.0091, "step": 823 }, { "clip_ratio/high_max": 0.0014157791265461128, "clip_ratio/high_mean": 0.0005488197130034678, "clip_ratio/low_mean": 0.0006328679864964215, "clip_ratio/low_min": 3.568117699614959e-05, "clip_ratio/region_mean": 0.001181687715870794, "epoch": 0.07690647440048066, "grad_norm": 0.12149026989936829, "learning_rate": 2e-07, "loss": 0.0641, "step": 824 }, { "clip_ratio/high_max": 0.0016838320625538472, "clip_ratio/high_mean": 0.0006304349844867829, "clip_ratio/low_mean": 0.0006053441138647031, "clip_ratio/low_min": 2.5909308078553295e-05, "clip_ratio/region_mean": 0.0012357791201793589, "epoch": 0.07699980750048124, "grad_norm": 0.13744395971298218, "learning_rate": 2e-07, "loss": 0.0477, "step": 825 }, { "clip_ratio/high_max": 0.0016691182463546284, "clip_ratio/high_mean": 0.0006874650243844371, "clip_ratio/low_mean": 0.0005815828972117743, "clip_ratio/low_min": 2.5036773877218366e-05, "clip_ratio/region_mean": 0.0012690479125012644, "epoch": 0.07709314060048184, "grad_norm": 0.20148490369319916, "learning_rate": 2e-07, "loss": 0.0438, "step": 826 }, { "clip_ratio/high_max": 0.0018083069153362885, "clip_ratio/high_mean": 0.0007352850598181249, "clip_ratio/low_mean": 0.00063953405515349, "clip_ratio/low_min": 4.0536031519877724e-05, "clip_ratio/region_mean": 0.0013748191340710036, "epoch": 0.07718647370048241, "grad_norm": 0.12896673381328583, "learning_rate": 2e-07, "loss": 0.0193, "step": 827 }, { "clip_ratio/high_max": 0.002085554788209265, "clip_ratio/high_mean": 0.0007741647568764165, "clip_ratio/low_mean": 0.0006427722582884599, "clip_ratio/low_min": 2.2887622435519006e-05, "clip_ratio/region_mean": 0.0014169370551826432, "epoch": 0.07727980680048299, "grad_norm": 0.12418148666620255, "learning_rate": 2e-07, "loss": 0.0451, "step": 828 }, { "clip_ratio/high_max": 0.001683254464296624, "clip_ratio/high_mean": 0.0006877275372971781, "clip_ratio/low_mean": 0.0006713811671943404, "clip_ratio/low_min": 2.333029351575533e-05, "clip_ratio/region_mean": 0.001359108733595349, "epoch": 0.07737313990048358, "grad_norm": 0.1289340853691101, "learning_rate": 2e-07, "loss": 0.052, "step": 829 }, { "clip_ratio/high_max": 0.0017290256982960273, "clip_ratio/high_mean": 0.0006129594457888743, "clip_ratio/low_mean": 0.0005570537887251703, "clip_ratio/low_min": 1.430860811524326e-05, "clip_ratio/region_mean": 0.0011700132417900022, "epoch": 0.07746647300048416, "grad_norm": 0.12778276205062866, "learning_rate": 2e-07, "loss": -0.0117, "step": 830 }, { "clip_ratio/high_max": 0.0019265570081188343, "clip_ratio/high_mean": 0.000739794782930403, "clip_ratio/low_mean": 0.0006098155099607538, "clip_ratio/low_min": 1.3625462997879367e-05, "clip_ratio/region_mean": 0.0013496102874341886, "epoch": 0.07755980610048475, "grad_norm": 0.13921457529067993, "learning_rate": 2e-07, "loss": -0.0001, "step": 831 }, { "clip_ratio/high_max": 0.0017783025687094778, "clip_ratio/high_mean": 0.0007399083333439194, "clip_ratio/low_mean": 0.0006000764351483667, "clip_ratio/low_min": 4.019117295683827e-05, "clip_ratio/region_mean": 0.0013399847630353179, "epoch": 0.07765313920048533, "grad_norm": 0.12935948371887207, "learning_rate": 2e-07, "loss": 0.0075, "step": 832 }, { "clip_ratio/high_max": 0.0018224665182060562, "clip_ratio/high_mean": 0.000690434679199825, "clip_ratio/low_mean": 0.0006447398927775794, "clip_ratio/low_min": 4.348593256509048e-05, "clip_ratio/region_mean": 0.001335174594714772, "epoch": 0.07774647230048591, "grad_norm": 0.13611753284931183, "learning_rate": 2e-07, "loss": 0.0346, "step": 833 }, { "clip_ratio/high_max": 0.0017993394139921293, "clip_ratio/high_mean": 0.0007234451495605754, "clip_ratio/low_mean": 0.0006524875216200599, "clip_ratio/low_min": 7.403199288091855e-05, "clip_ratio/region_mean": 0.0013759326284343842, "epoch": 0.0778398054004865, "grad_norm": 0.1413203477859497, "learning_rate": 2e-07, "loss": 0.0374, "step": 834 }, { "clip_ratio/high_max": 0.001584028846991714, "clip_ratio/high_mean": 0.0006908150317030959, "clip_ratio/low_mean": 0.0006102843035478145, "clip_ratio/low_min": 6.74017219353118e-05, "clip_ratio/region_mean": 0.001301099324336974, "epoch": 0.07793313850048708, "grad_norm": 0.14717626571655273, "learning_rate": 2e-07, "loss": 0.0372, "step": 835 }, { "clip_ratio/high_max": 0.0015361522528110072, "clip_ratio/high_mean": 0.0006697593089484144, "clip_ratio/low_mean": 0.0007000555451668333, "clip_ratio/low_min": 6.222843421710422e-05, "clip_ratio/region_mean": 0.0013698148650291841, "epoch": 0.07802647160048766, "grad_norm": 0.13847097754478455, "learning_rate": 2e-07, "loss": 0.0679, "step": 836 }, { "clip_ratio/high_max": 0.0017886962668853812, "clip_ratio/high_mean": 0.0007371825886366423, "clip_ratio/low_mean": 0.0005855020826857071, "clip_ratio/low_min": 5.6827787375368644e-05, "clip_ratio/region_mean": 0.0013226846676843707, "epoch": 0.07811980470048825, "grad_norm": 0.1522608995437622, "learning_rate": 2e-07, "loss": 0.0296, "step": 837 }, { "clip_ratio/high_max": 0.0015483665993087925, "clip_ratio/high_mean": 0.0006435153063648613, "clip_ratio/low_mean": 0.0005492567715918995, "clip_ratio/low_min": 3.705377639562357e-05, "clip_ratio/region_mean": 0.0011927720552193932, "epoch": 0.07821313780048883, "grad_norm": 0.13875828683376312, "learning_rate": 2e-07, "loss": 0.0453, "step": 838 }, { "clip_ratio/high_max": 0.0016914433545025531, "clip_ratio/high_mean": 0.0006929924966243561, "clip_ratio/low_mean": 0.0006704943316435674, "clip_ratio/low_min": 5.085479915578617e-05, "clip_ratio/region_mean": 0.0013634868191729765, "epoch": 0.07830647090048942, "grad_norm": 0.13252674043178558, "learning_rate": 2e-07, "loss": 0.0263, "step": 839 }, { "clip_ratio/high_max": 0.001797823129891185, "clip_ratio/high_mean": 0.0006902333789184922, "clip_ratio/low_mean": 0.0005146300554770278, "clip_ratio/low_min": 3.8217634028114844e-05, "clip_ratio/region_mean": 0.0012048634453094564, "epoch": 0.07839980400049, "grad_norm": 0.13485558331012726, "learning_rate": 2e-07, "loss": 0.03, "step": 840 }, { "clip_ratio/high_max": 0.0017605207067390438, "clip_ratio/high_mean": 0.0006810703071096214, "clip_ratio/low_mean": 0.0005693670418622787, "clip_ratio/low_min": 2.475845394656062e-05, "clip_ratio/region_mean": 0.001250437355338363, "epoch": 0.07849313710049058, "grad_norm": 0.1558636724948883, "learning_rate": 2e-07, "loss": 0.0377, "step": 841 }, { "clip_ratio/high_max": 0.001225426221935777, "clip_ratio/high_mean": 0.0005472269513120409, "clip_ratio/low_mean": 0.0005325346273821197, "clip_ratio/low_min": 2.735143334575696e-05, "clip_ratio/region_mean": 0.0010797615723276976, "epoch": 0.07858647020049117, "grad_norm": 0.12045590579509735, "learning_rate": 2e-07, "loss": 0.0297, "step": 842 }, { "clip_ratio/high_max": 0.001892310603579972, "clip_ratio/high_mean": 0.0008108117772280821, "clip_ratio/low_mean": 0.0005895059375689016, "clip_ratio/low_min": 3.2782509151729755e-05, "clip_ratio/region_mean": 0.0014003177020640578, "epoch": 0.07867980330049175, "grad_norm": 0.14394766092300415, "learning_rate": 2e-07, "loss": 0.008, "step": 843 }, { "clip_ratio/high_max": 0.0015954730024532182, "clip_ratio/high_mean": 0.0006656981067862944, "clip_ratio/low_mean": 0.0005600689346465515, "clip_ratio/low_min": 6.268016750254901e-05, "clip_ratio/region_mean": 0.0012257670605322346, "epoch": 0.07877313640049233, "grad_norm": 0.14831216633319855, "learning_rate": 2e-07, "loss": 0.0364, "step": 844 }, { "clip_ratio/high_max": 0.002108348962792661, "clip_ratio/high_mean": 0.0007428614298987668, "clip_ratio/low_mean": 0.0006216907886482659, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013645522230945062, "epoch": 0.07886646950049292, "grad_norm": 0.1329721361398697, "learning_rate": 2e-07, "loss": -0.0198, "step": 845 }, { "clip_ratio/high_max": 0.0015819431573618203, "clip_ratio/high_mean": 0.0006584189086424885, "clip_ratio/low_mean": 0.0006012320409354288, "clip_ratio/low_min": 6.011156438034959e-05, "clip_ratio/region_mean": 0.0012596509368449915, "epoch": 0.0789598026004935, "grad_norm": 0.14300298690795898, "learning_rate": 2e-07, "loss": 0.039, "step": 846 }, { "clip_ratio/high_max": 0.0016432298325526062, "clip_ratio/high_mean": 0.0007691864539083326, "clip_ratio/low_mean": 0.0005871288349226234, "clip_ratio/low_min": 8.327190334966872e-05, "clip_ratio/region_mean": 0.0013563152715505566, "epoch": 0.07905313570049408, "grad_norm": 1.0942280292510986, "learning_rate": 2e-07, "loss": 0.0337, "step": 847 }, { "clip_ratio/high_max": 0.002003811692702584, "clip_ratio/high_mean": 0.0008154770657711197, "clip_ratio/low_mean": 0.0006275693067436805, "clip_ratio/low_min": 6.367930200212868e-05, "clip_ratio/region_mean": 0.0014430463306780439, "epoch": 0.07914646880049467, "grad_norm": 0.12842611968517303, "learning_rate": 2e-07, "loss": -0.0011, "step": 848 }, { "clip_ratio/high_max": 0.0023378841069643386, "clip_ratio/high_mean": 0.0008084991222858662, "clip_ratio/low_mean": 0.0005760022086178651, "clip_ratio/low_min": 1.062383125827182e-05, "clip_ratio/region_mean": 0.0013845013636455406, "epoch": 0.07923980190049525, "grad_norm": 0.1393127292394638, "learning_rate": 2e-07, "loss": -0.0049, "step": 849 }, { "clip_ratio/high_max": 0.001980731678486336, "clip_ratio/high_mean": 0.0007093695840012515, "clip_ratio/low_mean": 0.0006512841009680415, "clip_ratio/low_min": 7.900724313003593e-05, "clip_ratio/region_mean": 0.0013606537104351446, "epoch": 0.07933313500049584, "grad_norm": 0.1291321963071823, "learning_rate": 2e-07, "loss": 0.0587, "step": 850 }, { "clip_ratio/high_max": 0.0020913483822369017, "clip_ratio/high_mean": 0.0007465839371434413, "clip_ratio/low_mean": 0.0006139702927612234, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013605542371806223, "epoch": 0.07942646810049642, "grad_norm": 0.13774874806404114, "learning_rate": 2e-07, "loss": 0.0335, "step": 851 }, { "clip_ratio/high_max": 0.0016417938531958498, "clip_ratio/high_mean": 0.0006433386115531903, "clip_ratio/low_mean": 0.0007103466268745251, "clip_ratio/low_min": 0.0001104758148358087, "clip_ratio/region_mean": 0.0013536852493416518, "epoch": 0.079519801200497, "grad_norm": 0.13146109879016876, "learning_rate": 2e-07, "loss": 0.0146, "step": 852 }, { "clip_ratio/high_max": 0.0016960887514869682, "clip_ratio/high_mean": 0.0006667942943749949, "clip_ratio/low_mean": 0.0006191753309394699, "clip_ratio/low_min": 2.8034493880113587e-05, "clip_ratio/region_mean": 0.0012859696144005284, "epoch": 0.07961313430049759, "grad_norm": 0.14730513095855713, "learning_rate": 2e-07, "loss": 0.0516, "step": 853 }, { "clip_ratio/high_max": 0.0018062445415125694, "clip_ratio/high_mean": 0.0007630339205206838, "clip_ratio/low_mean": 0.00066736639746523, "clip_ratio/low_min": 4.1812202653090935e-05, "clip_ratio/region_mean": 0.001430400265235221, "epoch": 0.07970646740049817, "grad_norm": 0.1876087635755539, "learning_rate": 2e-07, "loss": 0.0382, "step": 854 }, { "clip_ratio/high_max": 0.0018276439514011145, "clip_ratio/high_mean": 0.0007223620868899161, "clip_ratio/low_mean": 0.0005319120364219998, "clip_ratio/low_min": 3.890041625709273e-05, "clip_ratio/region_mean": 0.0012542741496872623, "epoch": 0.07979980050049874, "grad_norm": 0.3433104455471039, "learning_rate": 2e-07, "loss": 0.0101, "step": 855 }, { "clip_ratio/high_max": 0.0016408365445386153, "clip_ratio/high_mean": 0.0006067716803954681, "clip_ratio/low_mean": 0.0006541430766446865, "clip_ratio/low_min": 3.3152620744658634e-05, "clip_ratio/region_mean": 0.0012609147634066176, "epoch": 0.07989313360049934, "grad_norm": 0.15551145374774933, "learning_rate": 2e-07, "loss": 0.0506, "step": 856 }, { "clip_ratio/high_max": 0.001711485194391571, "clip_ratio/high_mean": 0.0008184422476915643, "clip_ratio/low_mean": 0.0005334352727004443, "clip_ratio/low_min": 5.369946939026704e-05, "clip_ratio/region_mean": 0.0013518775303964503, "epoch": 0.07998646670049991, "grad_norm": 0.14987154304981232, "learning_rate": 2e-07, "loss": 0.0044, "step": 857 }, { "clip_ratio/high_max": 0.0015381181401608046, "clip_ratio/high_mean": 0.0006723325132043101, "clip_ratio/low_mean": 0.0006101987401052611, "clip_ratio/low_min": 1.3241525266494136e-05, "clip_ratio/region_mean": 0.001282531302422285, "epoch": 0.08007979980050049, "grad_norm": 0.12890847027301788, "learning_rate": 2e-07, "loss": 0.0486, "step": 858 }, { "clip_ratio/high_max": 0.0018899553833762184, "clip_ratio/high_mean": 0.0008515239642292727, "clip_ratio/low_mean": 0.0006195432670210721, "clip_ratio/low_min": 1.5033072486403398e-05, "clip_ratio/region_mean": 0.0014710672257933766, "epoch": 0.08017313290050108, "grad_norm": 0.5196543335914612, "learning_rate": 2e-07, "loss": 0.0174, "step": 859 }, { "clip_ratio/high_max": 0.001675735657045152, "clip_ratio/high_mean": 0.0006546287941091578, "clip_ratio/low_mean": 0.0007412694076265325, "clip_ratio/low_min": 0.00011557375773918466, "clip_ratio/region_mean": 0.0013958981980977114, "epoch": 0.08026646600050166, "grad_norm": 0.14806358516216278, "learning_rate": 2e-07, "loss": 0.0824, "step": 860 }, { "clip_ratio/high_max": 0.002012482080317568, "clip_ratio/high_mean": 0.0007685529762966326, "clip_ratio/low_mean": 0.0007797358339303173, "clip_ratio/low_min": 9.013973067339975e-05, "clip_ratio/region_mean": 0.0015482887865800876, "epoch": 0.08035979910050225, "grad_norm": 0.1390140801668167, "learning_rate": 2e-07, "loss": 0.0786, "step": 861 }, { "clip_ratio/high_max": 0.001979123044293374, "clip_ratio/high_mean": 0.0008341717712028185, "clip_ratio/low_mean": 0.0005610369353235001, "clip_ratio/low_min": 1.775568125594873e-05, "clip_ratio/region_mean": 0.0013952087028883398, "epoch": 0.08045313220050283, "grad_norm": 0.14272533357143402, "learning_rate": 2e-07, "loss": -0.0057, "step": 862 }, { "clip_ratio/high_max": 0.0018688918753468897, "clip_ratio/high_mean": 0.0007769048606860451, "clip_ratio/low_mean": 0.0007264294918059022, "clip_ratio/low_min": 1.4714797998749418e-05, "clip_ratio/region_mean": 0.0015033343188406434, "epoch": 0.08054646530050341, "grad_norm": 0.15064172446727753, "learning_rate": 2e-07, "loss": 0.0556, "step": 863 }, { "clip_ratio/high_max": 0.0019289929732622113, "clip_ratio/high_mean": 0.0006883197856950574, "clip_ratio/low_mean": 0.0006164780979815987, "clip_ratio/low_min": 7.705895131948637e-05, "clip_ratio/region_mean": 0.0013047978936810978, "epoch": 0.080639798400504, "grad_norm": 0.14954252541065216, "learning_rate": 2e-07, "loss": 0.0629, "step": 864 }, { "clip_ratio/high_max": 0.0017198956957145128, "clip_ratio/high_mean": 0.0006002512454870157, "clip_ratio/low_mean": 0.0006767428003513487, "clip_ratio/low_min": 3.0250856980273966e-05, "clip_ratio/region_mean": 0.0012769940112775657, "epoch": 0.08073313150050458, "grad_norm": 0.1485782265663147, "learning_rate": 2e-07, "loss": 0.0776, "step": 865 }, { "clip_ratio/high_max": 0.001844557893491583, "clip_ratio/high_mean": 0.0006985152731431299, "clip_ratio/low_mean": 0.000572817625652533, "clip_ratio/low_min": 1.9512285689415876e-05, "clip_ratio/region_mean": 0.0012713329160760622, "epoch": 0.08082646460050516, "grad_norm": 0.12726767361164093, "learning_rate": 2e-07, "loss": 0.0308, "step": 866 }, { "clip_ratio/high_max": 0.001900109571579378, "clip_ratio/high_mean": 0.0007454441038134973, "clip_ratio/low_mean": 0.0006937586458661826, "clip_ratio/low_min": 7.015682240307797e-05, "clip_ratio/region_mean": 0.0014392027514986694, "epoch": 0.08091979770050575, "grad_norm": 0.12965478003025055, "learning_rate": 2e-07, "loss": 0.0143, "step": 867 }, { "clip_ratio/high_max": 0.001997934221435571, "clip_ratio/high_mean": 0.0008330866985488683, "clip_ratio/low_mean": 0.0005939876173215453, "clip_ratio/low_min": 7.09348896634765e-05, "clip_ratio/region_mean": 0.0014270743304223288, "epoch": 0.08101313080050633, "grad_norm": 0.14231403172016144, "learning_rate": 2e-07, "loss": -0.0501, "step": 868 }, { "clip_ratio/high_max": 0.0017240982197108679, "clip_ratio/high_mean": 0.0007752552810416091, "clip_ratio/low_mean": 0.0005868253047083272, "clip_ratio/low_min": 2.007709554163739e-05, "clip_ratio/region_mean": 0.0013620805875689257, "epoch": 0.08110646390050691, "grad_norm": 0.13820980489253998, "learning_rate": 2e-07, "loss": 0.0259, "step": 869 }, { "clip_ratio/high_max": 0.0019745566314668395, "clip_ratio/high_mean": 0.0007456436433130875, "clip_ratio/low_mean": 0.0006063898872525897, "clip_ratio/low_min": 2.5769651074369904e-05, "clip_ratio/region_mean": 0.0013520335451175924, "epoch": 0.0811997970005075, "grad_norm": 0.14774057269096375, "learning_rate": 2e-07, "loss": -0.0127, "step": 870 }, { "clip_ratio/high_max": 0.0017938436831173021, "clip_ratio/high_mean": 0.0007533786119893193, "clip_ratio/low_mean": 0.0006193013759911992, "clip_ratio/low_min": 6.230970575415995e-05, "clip_ratio/region_mean": 0.001372679998894455, "epoch": 0.08129313010050808, "grad_norm": 0.13127848505973816, "learning_rate": 2e-07, "loss": 0.0167, "step": 871 }, { "clip_ratio/high_max": 0.0016195896459976211, "clip_ratio/high_mean": 0.0006557989345310489, "clip_ratio/low_mean": 0.0006329556072159903, "clip_ratio/low_min": 2.4389313693973236e-05, "clip_ratio/region_mean": 0.0012887545053672511, "epoch": 0.08138646320050867, "grad_norm": 0.14775978028774261, "learning_rate": 2e-07, "loss": 0.0072, "step": 872 }, { "clip_ratio/high_max": 0.002060043469100492, "clip_ratio/high_mean": 0.0008124862033582758, "clip_ratio/low_mean": 0.0006765411544620292, "clip_ratio/low_min": 4.907749462290667e-05, "clip_ratio/region_mean": 0.0014890273596392944, "epoch": 0.08147979630050925, "grad_norm": 0.1471288800239563, "learning_rate": 2e-07, "loss": 0.0266, "step": 873 }, { "clip_ratio/high_max": 0.0014006509009050205, "clip_ratio/high_mean": 0.000629991080131731, "clip_ratio/low_mean": 0.0006208190370671218, "clip_ratio/low_min": 1.1877613360411488e-05, "clip_ratio/region_mean": 0.0012508101353887469, "epoch": 0.08157312940050983, "grad_norm": 0.1477607935667038, "learning_rate": 2e-07, "loss": 0.0438, "step": 874 }, { "clip_ratio/high_max": 0.0018564271194918547, "clip_ratio/high_mean": 0.0007484257548640016, "clip_ratio/low_mean": 0.0006713709681207547, "clip_ratio/low_min": 6.827870947745396e-05, "clip_ratio/region_mean": 0.001419796721165767, "epoch": 0.08166646250051042, "grad_norm": 0.14852215349674225, "learning_rate": 2e-07, "loss": 0.002, "step": 875 }, { "clip_ratio/high_max": 0.0017884806729853153, "clip_ratio/high_mean": 0.0006731024604960112, "clip_ratio/low_mean": 0.0006758268045814475, "clip_ratio/low_min": 3.679217661556322e-05, "clip_ratio/region_mean": 0.0013489292105077766, "epoch": 0.081759795600511, "grad_norm": 0.15335319936275482, "learning_rate": 2e-07, "loss": 0.0503, "step": 876 }, { "clip_ratio/high_max": 0.0019437867013039067, "clip_ratio/high_mean": 0.000774052869019215, "clip_ratio/low_mean": 0.0006866722815175308, "clip_ratio/low_min": 3.823641600320116e-05, "clip_ratio/region_mean": 0.001460725128708873, "epoch": 0.08185312870051158, "grad_norm": 0.15180355310440063, "learning_rate": 2e-07, "loss": 0.0248, "step": 877 }, { "clip_ratio/high_max": 0.001648773206397891, "clip_ratio/high_mean": 0.000675855781082646, "clip_ratio/low_mean": 0.0005633903510897653, "clip_ratio/low_min": 5.451296783576254e-05, "clip_ratio/region_mean": 0.0012392461685521994, "epoch": 0.08194646180051217, "grad_norm": 0.17971192300319672, "learning_rate": 2e-07, "loss": 0.0467, "step": 878 }, { "clip_ratio/high_max": 0.0021123115402588155, "clip_ratio/high_mean": 0.0007828916368453065, "clip_ratio/low_mean": 0.000799987308710115, "clip_ratio/low_min": 8.088581671472639e-05, "clip_ratio/region_mean": 0.0015828789619263262, "epoch": 0.08203979490051275, "grad_norm": 0.16863180696964264, "learning_rate": 2e-07, "loss": 0.0538, "step": 879 }, { "clip_ratio/high_max": 0.001910745704662986, "clip_ratio/high_mean": 0.0007282940996446996, "clip_ratio/low_mean": 0.0006974485531827668, "clip_ratio/low_min": 4.059912225784501e-05, "clip_ratio/region_mean": 0.0014257426701078657, "epoch": 0.08213312800051333, "grad_norm": 0.1560388058423996, "learning_rate": 2e-07, "loss": 0.0371, "step": 880 }, { "clip_ratio/high_max": 0.0017949828252312727, "clip_ratio/high_mean": 0.0007138938981370302, "clip_ratio/low_mean": 0.0007347150403802516, "clip_ratio/low_min": 2.5444866878387984e-05, "clip_ratio/region_mean": 0.0014486089821730275, "epoch": 0.08222646110051392, "grad_norm": 0.15449275076389313, "learning_rate": 2e-07, "loss": 0.0503, "step": 881 }, { "clip_ratio/high_max": 0.0018755537530523725, "clip_ratio/high_mean": 0.0007707539934926899, "clip_ratio/low_mean": 0.0007557611788797658, "clip_ratio/low_min": 9.487800525675993e-05, "clip_ratio/region_mean": 0.0015265151596395299, "epoch": 0.0823197942005145, "grad_norm": 0.14759862422943115, "learning_rate": 2e-07, "loss": 0.0436, "step": 882 }, { "clip_ratio/high_max": 0.0016725193599995691, "clip_ratio/high_mean": 0.0006895127080497332, "clip_ratio/low_mean": 0.000665772467982606, "clip_ratio/low_min": 1.0169215784117114e-05, "clip_ratio/region_mean": 0.0013552851814893074, "epoch": 0.08241312730051509, "grad_norm": 0.14946848154067993, "learning_rate": 2e-07, "loss": 0.0167, "step": 883 }, { "clip_ratio/high_max": 0.0018691809018491767, "clip_ratio/high_mean": 0.0007826018154446501, "clip_ratio/low_mean": 0.0006689381716569187, "clip_ratio/low_min": 2.4245207896456122e-05, "clip_ratio/region_mean": 0.0014515399852825794, "epoch": 0.08250646040051567, "grad_norm": 0.15613532066345215, "learning_rate": 2e-07, "loss": 0.0197, "step": 884 }, { "clip_ratio/high_max": 0.0018033101987384725, "clip_ratio/high_mean": 0.00068618035675172, "clip_ratio/low_mean": 0.0006260787340579554, "clip_ratio/low_min": 3.101827678619884e-05, "clip_ratio/region_mean": 0.0013122591008141171, "epoch": 0.08259979350051624, "grad_norm": 0.15417198836803436, "learning_rate": 2e-07, "loss": 0.0415, "step": 885 }, { "clip_ratio/high_max": 0.0014340401685331017, "clip_ratio/high_mean": 0.0005341353835319751, "clip_ratio/low_mean": 0.0007240354243549518, "clip_ratio/low_min": 5.067976417194586e-05, "clip_ratio/region_mean": 0.001258170828805305, "epoch": 0.08269312660051684, "grad_norm": 0.15052969753742218, "learning_rate": 2e-07, "loss": 0.1043, "step": 886 }, { "clip_ratio/high_max": 0.0015787077973072883, "clip_ratio/high_mean": 0.0007242591291287681, "clip_ratio/low_mean": 0.0006903689554746961, "clip_ratio/low_min": 3.094790872637532e-05, "clip_ratio/region_mean": 0.0014146280809654854, "epoch": 0.08278645970051741, "grad_norm": 0.15324755012989044, "learning_rate": 2e-07, "loss": 0.0139, "step": 887 }, { "clip_ratio/high_max": 0.0018118570296792313, "clip_ratio/high_mean": 0.0007107348083081888, "clip_ratio/low_mean": 0.0006355828336381819, "clip_ratio/low_min": 3.053248656215146e-05, "clip_ratio/region_mean": 0.0013463176510413177, "epoch": 0.08287979280051799, "grad_norm": 0.15231993794441223, "learning_rate": 2e-07, "loss": 0.0326, "step": 888 }, { "clip_ratio/high_max": 0.001823573427827796, "clip_ratio/high_mean": 0.0007488795381505042, "clip_ratio/low_mean": 0.0007158957960200496, "clip_ratio/low_min": 3.773344087676378e-05, "clip_ratio/region_mean": 0.0014647753232566174, "epoch": 0.08297312590051859, "grad_norm": 0.15197741985321045, "learning_rate": 2e-07, "loss": 0.026, "step": 889 }, { "clip_ratio/high_max": 0.0021500605798792094, "clip_ratio/high_mean": 0.0008346428749064216, "clip_ratio/low_mean": 0.0006667869165539742, "clip_ratio/low_min": 6.639120147156063e-05, "clip_ratio/region_mean": 0.0015014297896414064, "epoch": 0.08306645900051916, "grad_norm": 0.16495245695114136, "learning_rate": 2e-07, "loss": 0.0614, "step": 890 }, { "clip_ratio/high_max": 0.0018786937398544978, "clip_ratio/high_mean": 0.0007208866736618802, "clip_ratio/low_mean": 0.0006879121774545638, "clip_ratio/low_min": 2.4550824491598178e-05, "clip_ratio/region_mean": 0.0014087988383835182, "epoch": 0.08315979210051976, "grad_norm": 0.14222539961338043, "learning_rate": 2e-07, "loss": 0.0472, "step": 891 }, { "clip_ratio/high_max": 0.0018867180260713212, "clip_ratio/high_mean": 0.0007152818816393847, "clip_ratio/low_mean": 0.0006859085910946305, "clip_ratio/low_min": 2.0654330000979826e-05, "clip_ratio/region_mean": 0.0014011904459039215, "epoch": 0.08325312520052033, "grad_norm": 0.15165096521377563, "learning_rate": 2e-07, "loss": 0.0126, "step": 892 }, { "clip_ratio/high_max": 0.0018001270400418434, "clip_ratio/high_mean": 0.0007458786594725098, "clip_ratio/low_mean": 0.0007364626853814116, "clip_ratio/low_min": 1.8780048776534386e-05, "clip_ratio/region_mean": 0.0014823413148405962, "epoch": 0.08334645830052091, "grad_norm": 0.158020481467247, "learning_rate": 2e-07, "loss": 0.0347, "step": 893 }, { "clip_ratio/high_max": 0.0016442542146251071, "clip_ratio/high_mean": 0.0007394768999802181, "clip_ratio/low_mean": 0.0006795691297156736, "clip_ratio/low_min": 2.5206694772350602e-05, "clip_ratio/region_mean": 0.0014190459914971143, "epoch": 0.0834397914005215, "grad_norm": 0.22650913894176483, "learning_rate": 2e-07, "loss": 0.0074, "step": 894 }, { "clip_ratio/high_max": 0.0018098328328051139, "clip_ratio/high_mean": 0.0007236579522214015, "clip_ratio/low_mean": 0.000725617581338156, "clip_ratio/low_min": 2.7409633730712812e-05, "clip_ratio/region_mean": 0.0014492755435639992, "epoch": 0.08353312450052208, "grad_norm": 0.15240046381950378, "learning_rate": 2e-07, "loss": 0.029, "step": 895 }, { "clip_ratio/high_max": 0.0018638131441548467, "clip_ratio/high_mean": 0.0007621964232384926, "clip_ratio/low_mean": 0.0007263141533258022, "clip_ratio/low_min": 2.9648386771441437e-05, "clip_ratio/region_mean": 0.001488510566559853, "epoch": 0.08362645760052266, "grad_norm": 0.1562882363796234, "learning_rate": 2e-07, "loss": 0.028, "step": 896 }, { "clip_ratio/high_max": 0.0016560972198931267, "clip_ratio/high_mean": 0.0006526611077788402, "clip_ratio/low_mean": 0.00044857291049993364, "clip_ratio/low_min": 1.9857408915413544e-05, "clip_ratio/region_mean": 0.0011012340219167527, "completions/clipped_ratio": 0.0175258091517857, "completions/max_length": 4096.0, "completions/max_terminated_length": 4093.0, "completions/mean_length": 693.3145751953125, "completions/mean_terminated_length": 632.6159057617188, "completions/min_length": 73.0, "completions/min_terminated_length": 73.0, "epoch": 0.08371979070052325, "grad_norm": 0.13171540200710297, "learning_rate": 2e-07, "loss": -0.0333, "num_tokens": 683181651.0, "reward": 0.5909947156906128, "reward_std": 0.18449048697948456, "rewards/simpleverify_reward/mean": 0.5909947156906128, "rewards/simpleverify_reward/std": 0.4916522800922394, "step": 897 }, { "clip_ratio/high_max": 0.0012316569664108101, "clip_ratio/high_mean": 0.00048433541087433696, "clip_ratio/low_mean": 0.000601938466388674, "clip_ratio/low_min": 5.258713645162061e-05, "clip_ratio/region_mean": 0.0010862738854484633, "epoch": 0.08381312380052383, "grad_norm": 0.11051229387521744, "learning_rate": 2e-07, "loss": 0.0634, "step": 898 }, { "clip_ratio/high_max": 0.001546220068121329, "clip_ratio/high_mean": 0.0005836970358359395, "clip_ratio/low_mean": 0.0006119621484685922, "clip_ratio/low_min": 1.1283625099167693e-05, "clip_ratio/region_mean": 0.001195659177028574, "epoch": 0.08390645690052441, "grad_norm": 0.1331675797700882, "learning_rate": 2e-07, "loss": 0.0292, "step": 899 }, { "clip_ratio/high_max": 0.0018290210373379523, "clip_ratio/high_mean": 0.0006926975593160023, "clip_ratio/low_mean": 0.000570325897570001, "clip_ratio/low_min": 1.014281042444054e-05, "clip_ratio/region_mean": 0.0012630234450625721, "epoch": 0.083999790000525, "grad_norm": 0.13456912338733673, "learning_rate": 2e-07, "loss": 0.0141, "step": 900 }, { "clip_ratio/high_max": 0.0016368049691664055, "clip_ratio/high_mean": 0.0005792823758383747, "clip_ratio/low_mean": 0.0005743690871895524, "clip_ratio/low_min": 6.18761255282152e-05, "clip_ratio/region_mean": 0.0011536514575709589, "epoch": 0.08409312310052558, "grad_norm": 0.13325537741184235, "learning_rate": 2e-07, "loss": 0.0562, "step": 901 }, { "clip_ratio/high_max": 0.0016441021907667164, "clip_ratio/high_mean": 0.000682180736475857, "clip_ratio/low_mean": 0.0005876615796296392, "clip_ratio/low_min": 6.639639286731835e-05, "clip_ratio/region_mean": 0.001269842279725708, "epoch": 0.08418645620052617, "grad_norm": 0.13410881161689758, "learning_rate": 2e-07, "loss": 0.036, "step": 902 }, { "clip_ratio/high_max": 0.0014794917096878635, "clip_ratio/high_mean": 0.0006082955871988815, "clip_ratio/low_mean": 0.0005183369721635245, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011266325745964423, "epoch": 0.08427978930052675, "grad_norm": 0.17643125355243683, "learning_rate": 2e-07, "loss": 0.0128, "step": 903 }, { "clip_ratio/high_max": 0.0018354681833443465, "clip_ratio/high_mean": 0.0006911804730407312, "clip_ratio/low_mean": 0.0006056933525542263, "clip_ratio/low_min": 3.297692819614895e-05, "clip_ratio/region_mean": 0.0012968737974006217, "epoch": 0.08437312240052733, "grad_norm": 0.14177632331848145, "learning_rate": 2e-07, "loss": 0.0076, "step": 904 }, { "clip_ratio/high_max": 0.0015734645057818852, "clip_ratio/high_mean": 0.0006471936403613654, "clip_ratio/low_mean": 0.00044559212528838543, "clip_ratio/low_min": 2.3994695766305085e-05, "clip_ratio/region_mean": 0.0010927857583737932, "epoch": 0.08446645550052792, "grad_norm": 0.1294097900390625, "learning_rate": 2e-07, "loss": 0.0556, "step": 905 }, { "clip_ratio/high_max": 0.0016185065942408983, "clip_ratio/high_mean": 0.0006577680896953098, "clip_ratio/low_mean": 0.0005718813936255174, "clip_ratio/low_min": 1.945429903571494e-05, "clip_ratio/region_mean": 0.0012296495042392053, "epoch": 0.0845597886005285, "grad_norm": 0.12381117045879364, "learning_rate": 2e-07, "loss": 0.0312, "step": 906 }, { "clip_ratio/high_max": 0.001384539053105982, "clip_ratio/high_mean": 0.0006249527596082771, "clip_ratio/low_mean": 0.000523399712619721, "clip_ratio/low_min": 1.9171779058524407e-05, "clip_ratio/region_mean": 0.001148352494055871, "epoch": 0.08465312170052908, "grad_norm": 0.1327299028635025, "learning_rate": 2e-07, "loss": 0.0317, "step": 907 }, { "clip_ratio/high_max": 0.0016575021654716693, "clip_ratio/high_mean": 0.0007232843272504397, "clip_ratio/low_mean": 0.0005636911209876416, "clip_ratio/low_min": 3.611651118262671e-05, "clip_ratio/region_mean": 0.0012869754500570707, "epoch": 0.08474645480052967, "grad_norm": 0.13217133283615112, "learning_rate": 2e-07, "loss": 0.0362, "step": 908 }, { "clip_ratio/high_max": 0.001459163748222636, "clip_ratio/high_mean": 0.000605471712333383, "clip_ratio/low_mean": 0.0005223097605266958, "clip_ratio/low_min": 1.0770290828077123e-05, "clip_ratio/region_mean": 0.001127781502873404, "epoch": 0.08483978790053025, "grad_norm": 0.12879282236099243, "learning_rate": 2e-07, "loss": 0.0025, "step": 909 }, { "clip_ratio/high_max": 0.0016396696446463466, "clip_ratio/high_mean": 0.000699904059729306, "clip_ratio/low_mean": 0.0005736393550250796, "clip_ratio/low_min": 5.5327671361737885e-05, "clip_ratio/region_mean": 0.0012735434429487213, "epoch": 0.08493312100053083, "grad_norm": 0.13327962160110474, "learning_rate": 2e-07, "loss": 0.0159, "step": 910 }, { "clip_ratio/high_max": 0.001338091547950171, "clip_ratio/high_mean": 0.0006124109331722138, "clip_ratio/low_mean": 0.0005062381969764829, "clip_ratio/low_min": 3.639639999164501e-05, "clip_ratio/region_mean": 0.001118649135605665, "epoch": 0.08502645410053142, "grad_norm": 0.12741810083389282, "learning_rate": 2e-07, "loss": 0.0399, "step": 911 }, { "clip_ratio/high_max": 0.0016357092608814128, "clip_ratio/high_mean": 0.0006429650420614053, "clip_ratio/low_mean": 0.0006365568369801622, "clip_ratio/low_min": 1.9909477941837395e-05, "clip_ratio/region_mean": 0.0012795218062819913, "epoch": 0.085119787200532, "grad_norm": 0.13313420116901398, "learning_rate": 2e-07, "loss": 0.0496, "step": 912 }, { "clip_ratio/high_max": 0.0013013774641876807, "clip_ratio/high_mean": 0.0005311715563038888, "clip_ratio/low_mean": 0.0005567028892983217, "clip_ratio/low_min": 2.4046942598943133e-05, "clip_ratio/region_mean": 0.0010878744542424101, "epoch": 0.08521312030053259, "grad_norm": 0.12633016705513, "learning_rate": 2e-07, "loss": 0.0547, "step": 913 }, { "clip_ratio/high_max": 0.0016045389311329927, "clip_ratio/high_mean": 0.0006457148965637316, "clip_ratio/low_mean": 0.0005750907694164198, "clip_ratio/low_min": 1.975659870367963e-05, "clip_ratio/region_mean": 0.0012208056650706567, "epoch": 0.08530645340053317, "grad_norm": 0.14885802567005157, "learning_rate": 2e-07, "loss": 0.0405, "step": 914 }, { "clip_ratio/high_max": 0.0015958873882482294, "clip_ratio/high_mean": 0.0006219446295290254, "clip_ratio/low_mean": 0.0006039345562385279, "clip_ratio/low_min": 1.7715419744490646e-05, "clip_ratio/region_mean": 0.0012258791757631116, "epoch": 0.08539978650053374, "grad_norm": 0.14801421761512756, "learning_rate": 2e-07, "loss": 0.0227, "step": 915 }, { "clip_ratio/high_max": 0.0017836761944636237, "clip_ratio/high_mean": 0.0006767797167412937, "clip_ratio/low_mean": 0.0005639445716951741, "clip_ratio/low_min": 3.646466348072863e-05, "clip_ratio/region_mean": 0.0012407242975314148, "epoch": 0.08549311960053434, "grad_norm": 0.1382058560848236, "learning_rate": 2e-07, "loss": 0.035, "step": 916 }, { "clip_ratio/high_max": 0.0019303390145068988, "clip_ratio/high_mean": 0.0006446091765610618, "clip_ratio/low_mean": 0.0005069432818345376, "clip_ratio/low_min": 1.9383164726605173e-05, "clip_ratio/region_mean": 0.0011515524638525676, "epoch": 0.08558645270053492, "grad_norm": 0.12561194598674774, "learning_rate": 2e-07, "loss": 0.0076, "step": 917 }, { "clip_ratio/high_max": 0.0016396929386246484, "clip_ratio/high_mean": 0.0006356117628456559, "clip_ratio/low_mean": 0.0005789197766716825, "clip_ratio/low_min": 1.4504525097436272e-05, "clip_ratio/region_mean": 0.0012145315668021794, "epoch": 0.0856797858005355, "grad_norm": 0.12058184295892715, "learning_rate": 2e-07, "loss": 0.0243, "step": 918 }, { "clip_ratio/high_max": 0.0014865816010569688, "clip_ratio/high_mean": 0.0006612684783249279, "clip_ratio/low_mean": 0.0005225264426371723, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001183794907774427, "epoch": 0.08577311890053609, "grad_norm": 0.13732269406318665, "learning_rate": 2e-07, "loss": 0.0079, "step": 919 }, { "clip_ratio/high_max": 0.0014245239854062675, "clip_ratio/high_mean": 0.0006365222707245266, "clip_ratio/low_mean": 0.0006876149236632045, "clip_ratio/low_min": 4.5640611460839864e-05, "clip_ratio/region_mean": 0.0013241371998446994, "epoch": 0.08586645200053666, "grad_norm": 0.12787336111068726, "learning_rate": 2e-07, "loss": 0.0305, "step": 920 }, { "clip_ratio/high_max": 0.0017049480084097013, "clip_ratio/high_mean": 0.0006316717317531584, "clip_ratio/low_mean": 0.0005368405563785927, "clip_ratio/low_min": 3.286567243776517e-05, "clip_ratio/region_mean": 0.00116851229176973, "epoch": 0.08595978510053724, "grad_norm": 0.12538205087184906, "learning_rate": 2e-07, "loss": 0.0451, "step": 921 }, { "clip_ratio/high_max": 0.0017987938990700059, "clip_ratio/high_mean": 0.000727051061403472, "clip_ratio/low_mean": 0.0004899194418612751, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012169704787083901, "epoch": 0.08605311820053783, "grad_norm": 0.13710756599903107, "learning_rate": 2e-07, "loss": 0.0062, "step": 922 }, { "clip_ratio/high_max": 0.0016849308740347624, "clip_ratio/high_mean": 0.0006324390815279912, "clip_ratio/low_mean": 0.000549695181689458, "clip_ratio/low_min": 2.8599823963304516e-05, "clip_ratio/region_mean": 0.0011821342886833008, "epoch": 0.08614645130053841, "grad_norm": 0.1363169103860855, "learning_rate": 2e-07, "loss": 0.0498, "step": 923 }, { "clip_ratio/high_max": 0.0013095399353915127, "clip_ratio/high_mean": 0.000587751136663428, "clip_ratio/low_mean": 0.0004892511037724034, "clip_ratio/low_min": 3.272365074735717e-05, "clip_ratio/region_mean": 0.0010770022236101795, "epoch": 0.086239784400539, "grad_norm": 0.13535773754119873, "learning_rate": 2e-07, "loss": 0.0496, "step": 924 }, { "clip_ratio/high_max": 0.0017012190692184959, "clip_ratio/high_mean": 0.0006338730900097289, "clip_ratio/low_mean": 0.0006506138779514004, "clip_ratio/low_min": 6.710718434987939e-05, "clip_ratio/region_mean": 0.0012844869925174862, "epoch": 0.08633311750053958, "grad_norm": 0.14865043759346008, "learning_rate": 2e-07, "loss": 0.0538, "step": 925 }, { "clip_ratio/high_max": 0.001827272502850974, "clip_ratio/high_mean": 0.0007094328302628128, "clip_ratio/low_mean": 0.0006520816878037294, "clip_ratio/low_min": 8.030322533159051e-06, "clip_ratio/region_mean": 0.0013615145253424998, "epoch": 0.08642645060054016, "grad_norm": 0.14650647342205048, "learning_rate": 2e-07, "loss": 0.0351, "step": 926 }, { "clip_ratio/high_max": 0.002131748879037332, "clip_ratio/high_mean": 0.0007151886584324529, "clip_ratio/low_mean": 0.0006508305814350024, "clip_ratio/low_min": 4.6251254389062524e-05, "clip_ratio/region_mean": 0.0013660192526003812, "epoch": 0.08651978370054075, "grad_norm": 0.14973606169223785, "learning_rate": 2e-07, "loss": 0.0594, "step": 927 }, { "clip_ratio/high_max": 0.0015337518671003636, "clip_ratio/high_mean": 0.0006350528683469747, "clip_ratio/low_mean": 0.0005688353803634527, "clip_ratio/low_min": 7.010342687863158e-05, "clip_ratio/region_mean": 0.0012038882341585122, "epoch": 0.08661311680054133, "grad_norm": 0.1481391042470932, "learning_rate": 2e-07, "loss": 0.0297, "step": 928 }, { "clip_ratio/high_max": 0.0013615010975627229, "clip_ratio/high_mean": 0.0005450303506222554, "clip_ratio/low_mean": 0.0006698200668324716, "clip_ratio/low_min": 0.0001279893012906541, "clip_ratio/region_mean": 0.0012148504392825998, "epoch": 0.08670644990054191, "grad_norm": 0.15031468868255615, "learning_rate": 2e-07, "loss": 0.1025, "step": 929 }, { "clip_ratio/high_max": 0.0017725435245665722, "clip_ratio/high_mean": 0.0007050289477774641, "clip_ratio/low_mean": 0.000619553843534959, "clip_ratio/low_min": 2.2806201286584837e-05, "clip_ratio/region_mean": 0.0013245827940409072, "epoch": 0.0867997830005425, "grad_norm": 0.1373024731874466, "learning_rate": 2e-07, "loss": 0.0409, "step": 930 }, { "clip_ratio/high_max": 0.0020070315003977157, "clip_ratio/high_mean": 0.0008343212612089701, "clip_ratio/low_mean": 0.0005558394204854267, "clip_ratio/low_min": 7.678482052142499e-05, "clip_ratio/region_mean": 0.0013901606907893438, "epoch": 0.08689311610054308, "grad_norm": 0.14910583198070526, "learning_rate": 2e-07, "loss": -0.0223, "step": 931 }, { "clip_ratio/high_max": 0.001787763940228615, "clip_ratio/high_mean": 0.0007375061795755755, "clip_ratio/low_mean": 0.0005720901608583517, "clip_ratio/low_min": 3.441156150074676e-05, "clip_ratio/region_mean": 0.0013095963113300968, "epoch": 0.08698644920054366, "grad_norm": 0.13903598487377167, "learning_rate": 2e-07, "loss": -0.0026, "step": 932 }, { "clip_ratio/high_max": 0.0017603152555238921, "clip_ratio/high_mean": 0.0006573434729943983, "clip_ratio/low_mean": 0.0005951918428763747, "clip_ratio/low_min": 4.925007124256808e-05, "clip_ratio/region_mean": 0.0012525353231467307, "epoch": 0.08707978230054425, "grad_norm": 0.1309700757265091, "learning_rate": 2e-07, "loss": 0.0323, "step": 933 }, { "clip_ratio/high_max": 0.0014851174819341395, "clip_ratio/high_mean": 0.0006932877495273715, "clip_ratio/low_mean": 0.0005705613793907105, "clip_ratio/low_min": 4.488106060307473e-05, "clip_ratio/region_mean": 0.001263849171664333, "epoch": 0.08717311540054483, "grad_norm": 0.13729554414749146, "learning_rate": 2e-07, "loss": 0.0158, "step": 934 }, { "clip_ratio/high_max": 0.0017485947500972543, "clip_ratio/high_mean": 0.0007261312421178445, "clip_ratio/low_mean": 0.0006434208994505752, "clip_ratio/low_min": 5.5449204410251696e-05, "clip_ratio/region_mean": 0.001369552184769418, "epoch": 0.08726644850054542, "grad_norm": 0.13647888600826263, "learning_rate": 2e-07, "loss": 0.0512, "step": 935 }, { "clip_ratio/high_max": 0.0014082616762607358, "clip_ratio/high_mean": 0.0005941977287875488, "clip_ratio/low_mean": 0.0006057078635421931, "clip_ratio/low_min": 4.207480469631264e-05, "clip_ratio/region_mean": 0.001199905622343067, "epoch": 0.087359781600546, "grad_norm": 0.12021670490503311, "learning_rate": 2e-07, "loss": 0.0202, "step": 936 }, { "clip_ratio/high_max": 0.0014971503624110483, "clip_ratio/high_mean": 0.0006213501619640738, "clip_ratio/low_mean": 0.0007087758913257858, "clip_ratio/low_min": 5.9460407101141755e-05, "clip_ratio/region_mean": 0.0013301260405569337, "epoch": 0.08745311470054658, "grad_norm": 0.1369185745716095, "learning_rate": 2e-07, "loss": 0.0434, "step": 937 }, { "clip_ratio/high_max": 0.00167594023514539, "clip_ratio/high_mean": 0.0006369892980728764, "clip_ratio/low_mean": 0.0005752286551796715, "clip_ratio/low_min": 1.2628814147319645e-05, "clip_ratio/region_mean": 0.001212217946886085, "epoch": 0.08754644780054717, "grad_norm": 0.1603764295578003, "learning_rate": 2e-07, "loss": 0.0219, "step": 938 }, { "clip_ratio/high_max": 0.00185001769932569, "clip_ratio/high_mean": 0.0007100316797732376, "clip_ratio/low_mean": 0.0005651447208947502, "clip_ratio/low_min": 8.640950909466483e-06, "clip_ratio/region_mean": 0.0012751764043059666, "epoch": 0.08763978090054775, "grad_norm": 0.1361202746629715, "learning_rate": 2e-07, "loss": -0.0021, "step": 939 }, { "clip_ratio/high_max": 0.0017439968542021234, "clip_ratio/high_mean": 0.0006408352073776769, "clip_ratio/low_mean": 0.0006287747082751594, "clip_ratio/low_min": 2.891475651267683e-05, "clip_ratio/region_mean": 0.0012696098783635534, "epoch": 0.08773311400054833, "grad_norm": 0.1413341909646988, "learning_rate": 2e-07, "loss": 0.0326, "step": 940 }, { "clip_ratio/high_max": 0.0017212278908118606, "clip_ratio/high_mean": 0.0006696018208458554, "clip_ratio/low_mean": 0.0005703463830286637, "clip_ratio/low_min": 2.417275118204998e-05, "clip_ratio/region_mean": 0.001239948207512498, "epoch": 0.08782644710054892, "grad_norm": 0.15333615243434906, "learning_rate": 2e-07, "loss": 0.0331, "step": 941 }, { "clip_ratio/high_max": 0.0017269724812649656, "clip_ratio/high_mean": 0.0006890201184432954, "clip_ratio/low_mean": 0.0006185145830386318, "clip_ratio/low_min": 2.2830275156593416e-05, "clip_ratio/region_mean": 0.001307534705119906, "epoch": 0.0879197802005495, "grad_norm": 0.12947024405002594, "learning_rate": 2e-07, "loss": 0.0166, "step": 942 }, { "clip_ratio/high_max": 0.001715404443530133, "clip_ratio/high_mean": 0.0006204548408277333, "clip_ratio/low_mean": 0.0005898995459574508, "clip_ratio/low_min": 2.2046743652026635e-05, "clip_ratio/region_mean": 0.0012103544031560887, "epoch": 0.08801311330055009, "grad_norm": 0.15068522095680237, "learning_rate": 2e-07, "loss": 0.0237, "step": 943 }, { "clip_ratio/high_max": 0.0016891044288058765, "clip_ratio/high_mean": 0.0007275959396793041, "clip_ratio/low_mean": 0.0005888412961212453, "clip_ratio/low_min": 9.14544943952933e-06, "clip_ratio/region_mean": 0.0013164372503524646, "epoch": 0.08810644640055067, "grad_norm": 0.1549796313047409, "learning_rate": 2e-07, "loss": 0.0392, "step": 944 }, { "clip_ratio/high_max": 0.0020497103323577903, "clip_ratio/high_mean": 0.0008110955805022968, "clip_ratio/low_mean": 0.0006500741073978133, "clip_ratio/low_min": 3.300243042758666e-05, "clip_ratio/region_mean": 0.0014611696969950572, "epoch": 0.08819977950055125, "grad_norm": 0.16115222871303558, "learning_rate": 2e-07, "loss": 0.0074, "step": 945 }, { "clip_ratio/high_max": 0.0019432917761150748, "clip_ratio/high_mean": 0.0007601940524182282, "clip_ratio/low_mean": 0.0005579055095950025, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013180995592847466, "epoch": 0.08829311260055184, "grad_norm": 0.14384886622428894, "learning_rate": 2e-07, "loss": -0.0176, "step": 946 }, { "clip_ratio/high_max": 0.0019829376233246876, "clip_ratio/high_mean": 0.0007140566958696581, "clip_ratio/low_mean": 0.0005057172093074769, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012197739088151138, "epoch": 0.08838644570055242, "grad_norm": 0.13894817233085632, "learning_rate": 2e-07, "loss": 0.0374, "step": 947 }, { "clip_ratio/high_max": 0.0015101680801308248, "clip_ratio/high_mean": 0.0006568424823854002, "clip_ratio/low_mean": 0.0006320193861029111, "clip_ratio/low_min": 8.233397693402367e-05, "clip_ratio/region_mean": 0.001288861869397806, "epoch": 0.088479778800553, "grad_norm": 0.14588098227977753, "learning_rate": 2e-07, "loss": 0.0289, "step": 948 }, { "clip_ratio/high_max": 0.0017957466625375673, "clip_ratio/high_mean": 0.0007309983657250996, "clip_ratio/low_mean": 0.0006404835730791092, "clip_ratio/low_min": 7.05012625985546e-05, "clip_ratio/region_mean": 0.0013714819397137035, "epoch": 0.08857311190055359, "grad_norm": 0.14395838975906372, "learning_rate": 2e-07, "loss": 0.0442, "step": 949 }, { "clip_ratio/high_max": 0.0015422342694364488, "clip_ratio/high_mean": 0.0006393678486347198, "clip_ratio/low_mean": 0.0005715184206565027, "clip_ratio/low_min": 1.9817986412817845e-05, "clip_ratio/region_mean": 0.0012108862792956643, "epoch": 0.08866644500055416, "grad_norm": 0.1354590803384781, "learning_rate": 2e-07, "loss": 0.0281, "step": 950 }, { "clip_ratio/high_max": 0.002122895653883461, "clip_ratio/high_mean": 0.0008152450664056232, "clip_ratio/low_mean": 0.0005847554530191701, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014000005394336767, "epoch": 0.08875977810055474, "grad_norm": 0.14133866131305695, "learning_rate": 2e-07, "loss": 0.0383, "step": 951 }, { "clip_ratio/high_max": 0.0018833830145013053, "clip_ratio/high_mean": 0.0008094449585769325, "clip_ratio/low_mean": 0.000754854501792579, "clip_ratio/low_min": 6.446466704801423e-05, "clip_ratio/region_mean": 0.0015642994767404161, "epoch": 0.08885311120055533, "grad_norm": 0.14470089972019196, "learning_rate": 2e-07, "loss": 0.0115, "step": 952 }, { "clip_ratio/high_max": 0.0018304592394997599, "clip_ratio/high_mean": 0.0007313287478609709, "clip_ratio/low_mean": 0.0006170689212012803, "clip_ratio/low_min": 1.1741499292838853e-05, "clip_ratio/region_mean": 0.0013483976508723572, "epoch": 0.08894644430055591, "grad_norm": 0.1376315802335739, "learning_rate": 2e-07, "loss": 0.0291, "step": 953 }, { "clip_ratio/high_max": 0.0016200674144783989, "clip_ratio/high_mean": 0.0006944479609956034, "clip_ratio/low_mean": 0.0006019007887516636, "clip_ratio/low_min": 1.0088780982187018e-05, "clip_ratio/region_mean": 0.0012963487206434365, "epoch": 0.0890397774005565, "grad_norm": 0.1357940286397934, "learning_rate": 2e-07, "loss": 0.005, "step": 954 }, { "clip_ratio/high_max": 0.0015606282177031972, "clip_ratio/high_mean": 0.0006005847390042618, "clip_ratio/low_mean": 0.0005312463963491609, "clip_ratio/low_min": 1.215480369864963e-05, "clip_ratio/region_mean": 0.0011318311371724121, "epoch": 0.08913311050055708, "grad_norm": 0.13029451668262482, "learning_rate": 2e-07, "loss": -0.0155, "step": 955 }, { "clip_ratio/high_max": 0.0017927694389072713, "clip_ratio/high_mean": 0.0007009340861259261, "clip_ratio/low_mean": 0.000638322975646588, "clip_ratio/low_min": 3.661770688268007e-05, "clip_ratio/region_mean": 0.0013392570508585777, "epoch": 0.08922644360055766, "grad_norm": 0.15635481476783752, "learning_rate": 2e-07, "loss": 0.0138, "step": 956 }, { "clip_ratio/high_max": 0.0020249964618415106, "clip_ratio/high_mean": 0.0007233247033582302, "clip_ratio/low_mean": 0.0006676106404484017, "clip_ratio/low_min": 2.1915166144026443e-05, "clip_ratio/region_mean": 0.0013909353365306742, "epoch": 0.08931977670055825, "grad_norm": 0.14826840162277222, "learning_rate": 2e-07, "loss": 0.0081, "step": 957 }, { "clip_ratio/high_max": 0.0015558527138637146, "clip_ratio/high_mean": 0.000683901034790324, "clip_ratio/low_mean": 0.0006652513166045537, "clip_ratio/low_min": 4.6724190724489745e-05, "clip_ratio/region_mean": 0.0013491523422999308, "epoch": 0.08941310980055883, "grad_norm": 0.144377201795578, "learning_rate": 2e-07, "loss": 0.0721, "step": 958 }, { "clip_ratio/high_max": 0.0017109764012275264, "clip_ratio/high_mean": 0.0006485602607426699, "clip_ratio/low_mean": 0.0007003342125244671, "clip_ratio/low_min": 3.6682793506770395e-05, "clip_ratio/region_mean": 0.0013488944678101689, "epoch": 0.08950644290055941, "grad_norm": 0.15789814293384552, "learning_rate": 2e-07, "loss": 0.0403, "step": 959 }, { "clip_ratio/high_max": 0.0018197315039287787, "clip_ratio/high_mean": 0.0007248766050906852, "clip_ratio/low_mean": 0.000606198409514036, "clip_ratio/low_min": 1.2729124136967584e-05, "clip_ratio/region_mean": 0.0013310750218806788, "epoch": 0.08959977600056, "grad_norm": 0.1431267261505127, "learning_rate": 2e-07, "loss": 0.0447, "step": 960 }, { "clip_ratio/high_max": 0.001526966687379172, "clip_ratio/high_mean": 0.0006514838023576885, "clip_ratio/low_mean": 0.0006190362310007913, "clip_ratio/low_min": 2.126883828168502e-05, "clip_ratio/region_mean": 0.0012705200242635328, "epoch": 0.08969310910056058, "grad_norm": 0.1427212804555893, "learning_rate": 2e-07, "loss": 0.0496, "step": 961 }, { "clip_ratio/high_max": 0.0015224709204630926, "clip_ratio/high_mean": 0.0006256362039493979, "clip_ratio/low_mean": 0.0006114253646956058, "clip_ratio/low_min": 3.632313018897548e-05, "clip_ratio/region_mean": 0.001237061576830456, "epoch": 0.08978644220056116, "grad_norm": 0.1447361409664154, "learning_rate": 2e-07, "loss": 0.0384, "step": 962 }, { "clip_ratio/high_max": 0.0017098629650718067, "clip_ratio/high_mean": 0.0006859605318823014, "clip_ratio/low_mean": 0.0005490450348588638, "clip_ratio/low_min": 2.386863070569234e-05, "clip_ratio/region_mean": 0.0012350055658316705, "epoch": 0.08987977530056175, "grad_norm": 1.3041605949401855, "learning_rate": 2e-07, "loss": 0.0336, "step": 963 }, { "clip_ratio/high_max": 0.0018612694220792037, "clip_ratio/high_mean": 0.0006058393337298185, "clip_ratio/low_mean": 0.0005430824721770477, "clip_ratio/low_min": 1.2744698324240744e-05, "clip_ratio/region_mean": 0.0011489218013593927, "epoch": 0.08997310840056233, "grad_norm": 0.13747842609882355, "learning_rate": 2e-07, "loss": 0.0623, "step": 964 }, { "clip_ratio/high_max": 0.0018122640140063595, "clip_ratio/high_mean": 0.0007432564925693441, "clip_ratio/low_mean": 0.0005991596044623293, "clip_ratio/low_min": 4.3590611312538385e-05, "clip_ratio/region_mean": 0.0013424160715658218, "epoch": 0.09006644150056292, "grad_norm": 0.1335994154214859, "learning_rate": 2e-07, "loss": 0.0045, "step": 965 }, { "clip_ratio/high_max": 0.001826896026614122, "clip_ratio/high_mean": 0.0007491540145565523, "clip_ratio/low_mean": 0.0006790126553823939, "clip_ratio/low_min": 2.9523570447054226e-05, "clip_ratio/region_mean": 0.0014281667063187342, "epoch": 0.0901597746005635, "grad_norm": 0.15497714281082153, "learning_rate": 2e-07, "loss": 0.0311, "step": 966 }, { "clip_ratio/high_max": 0.001737251728627598, "clip_ratio/high_mean": 0.0006904335887156776, "clip_ratio/low_mean": 0.0005882085733901476, "clip_ratio/low_min": 8.813500153337372e-05, "clip_ratio/region_mean": 0.0012786421793862246, "epoch": 0.09025310770056408, "grad_norm": 0.1342792510986328, "learning_rate": 2e-07, "loss": 0.0671, "step": 967 }, { "clip_ratio/high_max": 0.0018149910429201555, "clip_ratio/high_mean": 0.0007157150294005987, "clip_ratio/low_mean": 0.0005523388044821331, "clip_ratio/low_min": 4.987630745745264e-06, "clip_ratio/region_mean": 0.001268053809326375, "epoch": 0.09034644080056467, "grad_norm": 0.1338386982679367, "learning_rate": 2e-07, "loss": 0.0237, "step": 968 }, { "clip_ratio/high_max": 0.0017770997328625526, "clip_ratio/high_mean": 0.0007023067373665981, "clip_ratio/low_mean": 0.0005684520961040107, "clip_ratio/low_min": 1.171289386547869e-05, "clip_ratio/region_mean": 0.0012707587775366846, "epoch": 0.09043977390056525, "grad_norm": 0.14405693113803864, "learning_rate": 2e-07, "loss": 0.0249, "step": 969 }, { "clip_ratio/high_max": 0.001679560162301641, "clip_ratio/high_mean": 0.0006905743957759114, "clip_ratio/low_mean": 0.0005830924365000101, "clip_ratio/low_min": 3.5374699109524954e-05, "clip_ratio/region_mean": 0.0012736668359139003, "epoch": 0.09053310700056583, "grad_norm": 0.8259018063545227, "learning_rate": 2e-07, "loss": 0.025, "step": 970 }, { "clip_ratio/high_max": 0.0017475907079642639, "clip_ratio/high_mean": 0.0007005008719715988, "clip_ratio/low_mean": 0.0005397357708716299, "clip_ratio/low_min": 2.332093754375819e-05, "clip_ratio/region_mean": 0.0012402366410242394, "epoch": 0.09062644010056642, "grad_norm": 0.14208810031414032, "learning_rate": 2e-07, "loss": 0.0035, "step": 971 }, { "clip_ratio/high_max": 0.0016138025239342824, "clip_ratio/high_mean": 0.0006210274350451073, "clip_ratio/low_mean": 0.0007686681119594141, "clip_ratio/low_min": 7.396710861939937e-05, "clip_ratio/region_mean": 0.0013896955097152386, "epoch": 0.090719773200567, "grad_norm": 0.15578195452690125, "learning_rate": 2e-07, "loss": 0.0477, "step": 972 }, { "clip_ratio/high_max": 0.0016197395088966005, "clip_ratio/high_mean": 0.0006766420756321168, "clip_ratio/low_mean": 0.0005795812576252501, "clip_ratio/low_min": 2.3780301489750855e-05, "clip_ratio/region_mean": 0.0012562233459902927, "epoch": 0.09081310630056758, "grad_norm": 0.14359670877456665, "learning_rate": 2e-07, "loss": 0.0479, "step": 973 }, { "clip_ratio/high_max": 0.002236423817521427, "clip_ratio/high_mean": 0.0007784818935760995, "clip_ratio/low_mean": 0.00065988097776426, "clip_ratio/low_min": 4.9225084694626275e-05, "clip_ratio/region_mean": 0.0014383628949872218, "epoch": 0.09090643940056817, "grad_norm": 0.1559831202030182, "learning_rate": 2e-07, "loss": 0.0336, "step": 974 }, { "clip_ratio/high_max": 0.0017459721348132007, "clip_ratio/high_mean": 0.0007116125016182195, "clip_ratio/low_mean": 0.0005034045825595967, "clip_ratio/low_min": 1.4509576431009918e-05, "clip_ratio/region_mean": 0.0012150170550739858, "epoch": 0.09099977250056875, "grad_norm": 0.14853475987911224, "learning_rate": 2e-07, "loss": -0.0235, "step": 975 }, { "clip_ratio/high_max": 0.0019015687321370933, "clip_ratio/high_mean": 0.000772454404796008, "clip_ratio/low_mean": 0.0006130928877610131, "clip_ratio/low_min": 6.95139578965609e-06, "clip_ratio/region_mean": 0.0013855473043804523, "epoch": 0.09109310560056934, "grad_norm": 0.15120293200016022, "learning_rate": 2e-07, "loss": 0.0229, "step": 976 }, { "clip_ratio/high_max": 0.0015068525608512573, "clip_ratio/high_mean": 0.0006119116042100359, "clip_ratio/low_mean": 0.0006469875588663854, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012588991630764212, "epoch": 0.09118643870056992, "grad_norm": 0.14164192974567413, "learning_rate": 2e-07, "loss": 0.0475, "step": 977 }, { "clip_ratio/high_max": 0.0017150776475318708, "clip_ratio/high_mean": 0.0007344686237047426, "clip_ratio/low_mean": 0.0006252166194826714, "clip_ratio/low_min": 8.17527779872762e-06, "clip_ratio/region_mean": 0.0013596852477348875, "epoch": 0.0912797718005705, "grad_norm": 0.143987238407135, "learning_rate": 2e-07, "loss": 0.0031, "step": 978 }, { "clip_ratio/high_max": 0.0016577611313550733, "clip_ratio/high_mean": 0.0007134181360015646, "clip_ratio/low_mean": 0.0005907669346925104, "clip_ratio/low_min": 5.184766268939711e-05, "clip_ratio/region_mean": 0.0013041850907029584, "epoch": 0.09137310490057109, "grad_norm": 0.14742669463157654, "learning_rate": 2e-07, "loss": 0.0162, "step": 979 }, { "clip_ratio/high_max": 0.0019714172631211113, "clip_ratio/high_mean": 0.0007283400027517928, "clip_ratio/low_mean": 0.0006567300588358194, "clip_ratio/low_min": 4.029608408018248e-05, "clip_ratio/region_mean": 0.0013850700524926651, "epoch": 0.09146643800057166, "grad_norm": 0.16006894409656525, "learning_rate": 2e-07, "loss": 0.0347, "step": 980 }, { "clip_ratio/high_max": 0.00166111431644822, "clip_ratio/high_mean": 0.0007188373247117852, "clip_ratio/low_mean": 0.0006070865529181901, "clip_ratio/low_min": 2.476297777320724e-05, "clip_ratio/region_mean": 0.0013259238803584594, "epoch": 0.09155977110057224, "grad_norm": 0.144860178232193, "learning_rate": 2e-07, "loss": 0.0599, "step": 981 }, { "clip_ratio/high_max": 0.0017787624110496836, "clip_ratio/high_mean": 0.0006380644599630614, "clip_ratio/low_mean": 0.0005485832753038267, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011866476852446795, "epoch": 0.09165310420057284, "grad_norm": 0.13645587861537933, "learning_rate": 2e-07, "loss": -0.0041, "step": 982 }, { "clip_ratio/high_max": 0.0016415845420851838, "clip_ratio/high_mean": 0.0007056201520754257, "clip_ratio/low_mean": 0.0006400940783350961, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001345714208582649, "epoch": 0.09174643730057341, "grad_norm": 0.14309969544410706, "learning_rate": 2e-07, "loss": -0.0419, "step": 983 }, { "clip_ratio/high_max": 0.0018460395440342836, "clip_ratio/high_mean": 0.0007390852915705182, "clip_ratio/low_mean": 0.0006256327988012345, "clip_ratio/low_min": 9.77937725110678e-06, "clip_ratio/region_mean": 0.001364718107652152, "epoch": 0.09183977040057399, "grad_norm": 0.16194909811019897, "learning_rate": 2e-07, "loss": 0.0488, "step": 984 }, { "clip_ratio/high_max": 0.0016744554959586821, "clip_ratio/high_mean": 0.000622940982793807, "clip_ratio/low_mean": 0.0005605103960988345, "clip_ratio/low_min": 2.8809553441533353e-05, "clip_ratio/region_mean": 0.0011834513570647687, "epoch": 0.09193310350057458, "grad_norm": 0.13443738222122192, "learning_rate": 2e-07, "loss": 0.0267, "step": 985 }, { "clip_ratio/high_max": 0.0014915238753019366, "clip_ratio/high_mean": 0.0005742997709603515, "clip_ratio/low_mean": 0.0005802489740744932, "clip_ratio/low_min": 2.3241779217642033e-05, "clip_ratio/region_mean": 0.00115454872866394, "epoch": 0.09202643660057516, "grad_norm": 0.13556648790836334, "learning_rate": 2e-07, "loss": 0.0538, "step": 986 }, { "clip_ratio/high_max": 0.0017074177194444928, "clip_ratio/high_mean": 0.0007071668587741442, "clip_ratio/low_mean": 0.0006908290124556515, "clip_ratio/low_min": 7.401355560432421e-05, "clip_ratio/region_mean": 0.001397995845763944, "epoch": 0.09211976970057575, "grad_norm": 0.22078122198581696, "learning_rate": 2e-07, "loss": 0.0436, "step": 987 }, { "clip_ratio/high_max": 0.0018324677294003777, "clip_ratio/high_mean": 0.0007154002159950323, "clip_ratio/low_mean": 0.0006683971587335691, "clip_ratio/low_min": 4.0759900002740324e-05, "clip_ratio/region_mean": 0.0013837973892805167, "epoch": 0.09221310280057633, "grad_norm": 0.1451905369758606, "learning_rate": 2e-07, "loss": 0.0278, "step": 988 }, { "clip_ratio/high_max": 0.0020742138804052956, "clip_ratio/high_mean": 0.0008382299147342565, "clip_ratio/low_mean": 0.0006021280514687533, "clip_ratio/low_min": 3.286960145487683e-05, "clip_ratio/region_mean": 0.0014403579698409885, "epoch": 0.09230643590057691, "grad_norm": 0.14585813879966736, "learning_rate": 2e-07, "loss": -0.0088, "step": 989 }, { "clip_ratio/high_max": 0.0016401061075157486, "clip_ratio/high_mean": 0.0006616678638238227, "clip_ratio/low_mean": 0.0007205850324680796, "clip_ratio/low_min": 3.407484200579347e-05, "clip_ratio/region_mean": 0.0013822528962919023, "epoch": 0.0923997690005775, "grad_norm": 0.21004173159599304, "learning_rate": 2e-07, "loss": 0.0501, "step": 990 }, { "clip_ratio/high_max": 0.001992966113903094, "clip_ratio/high_mean": 0.0007904664817033336, "clip_ratio/low_mean": 0.0006689060101052746, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014593724445148837, "epoch": 0.09249310210057808, "grad_norm": 0.17767773568630219, "learning_rate": 2e-07, "loss": 0.0468, "step": 991 }, { "clip_ratio/high_max": 0.0017025673387252027, "clip_ratio/high_mean": 0.0006677370520264958, "clip_ratio/low_mean": 0.000675117062201025, "clip_ratio/low_min": 4.993695711164037e-05, "clip_ratio/region_mean": 0.0013428540805762168, "epoch": 0.09258643520057866, "grad_norm": 0.16823220252990723, "learning_rate": 2e-07, "loss": 0.0569, "step": 992 }, { "clip_ratio/high_max": 0.001995711438212311, "clip_ratio/high_mean": 0.0007550971040473087, "clip_ratio/low_mean": 0.0007045860766083933, "clip_ratio/low_min": 3.3524544051033445e-05, "clip_ratio/region_mean": 0.0014596832006645855, "epoch": 0.09267976830057925, "grad_norm": 0.6557042002677917, "learning_rate": 2e-07, "loss": 0.0396, "step": 993 }, { "clip_ratio/high_max": 0.0015517973006353714, "clip_ratio/high_mean": 0.0006013079510012176, "clip_ratio/low_mean": 0.0007357747108471813, "clip_ratio/low_min": 4.014886644654325e-05, "clip_ratio/region_mean": 0.0013370826782193035, "epoch": 0.09277310140057983, "grad_norm": 0.15774820744991302, "learning_rate": 2e-07, "loss": 0.0793, "step": 994 }, { "clip_ratio/high_max": 0.00193346312516951, "clip_ratio/high_mean": 0.0006697505014017224, "clip_ratio/low_mean": 0.0006344943794829305, "clip_ratio/low_min": 3.7387299016700126e-05, "clip_ratio/region_mean": 0.001304244831771939, "epoch": 0.09286643450058042, "grad_norm": 0.15775588154792786, "learning_rate": 2e-07, "loss": 0.0672, "step": 995 }, { "clip_ratio/high_max": 0.0015834565965633374, "clip_ratio/high_mean": 0.0006258201337914215, "clip_ratio/low_mean": 0.0007221523264888674, "clip_ratio/low_min": 0.00011272557094343938, "clip_ratio/region_mean": 0.0013479724584612995, "epoch": 0.092959767600581, "grad_norm": 0.18795281648635864, "learning_rate": 2e-07, "loss": 0.1025, "step": 996 }, { "clip_ratio/high_max": 0.0020258818367437925, "clip_ratio/high_mean": 0.0007455363183908048, "clip_ratio/low_mean": 0.0005791101830254775, "clip_ratio/low_min": 6.646107976848725e-06, "clip_ratio/region_mean": 0.001324646465945989, "epoch": 0.09305310070058158, "grad_norm": 0.17034779489040375, "learning_rate": 2e-07, "loss": 0.0394, "step": 997 }, { "clip_ratio/high_max": 0.0017022054889821447, "clip_ratio/high_mean": 0.0006251930335565703, "clip_ratio/low_mean": 0.0007103255993570201, "clip_ratio/low_min": 1.2003072697552852e-05, "clip_ratio/region_mean": 0.0013355186201806646, "epoch": 0.09314643380058217, "grad_norm": 0.1744387447834015, "learning_rate": 2e-07, "loss": 0.0541, "step": 998 }, { "clip_ratio/high_max": 0.0017306126392213628, "clip_ratio/high_mean": 0.0008039728636504151, "clip_ratio/low_mean": 0.00067266517180542, "clip_ratio/low_min": 7.050772228467395e-05, "clip_ratio/region_mean": 0.0014766380190849304, "epoch": 0.09323976690058275, "grad_norm": 0.16662435233592987, "learning_rate": 2e-07, "loss": 0.0177, "step": 999 }, { "clip_ratio/high_max": 0.0015837332321098074, "clip_ratio/high_mean": 0.0006577183075933135, "clip_ratio/low_mean": 0.0005895754638913786, "clip_ratio/low_min": 2.6663136850402225e-05, "clip_ratio/region_mean": 0.001247293796041049, "epoch": 0.09333310000058333, "grad_norm": 0.16519631445407867, "learning_rate": 2e-07, "loss": 0.0312, "step": 1000 }, { "clip_ratio/high_max": 0.0018397188614471816, "clip_ratio/high_mean": 0.0006786323519918369, "clip_ratio/low_mean": 0.0007147982960304944, "clip_ratio/low_min": 3.5099701563012786e-05, "clip_ratio/region_mean": 0.0013934306480223313, "epoch": 0.09342643310058392, "grad_norm": 0.15442317724227905, "learning_rate": 2e-07, "loss": 0.0294, "step": 1001 }, { "clip_ratio/high_max": 0.0016074186460173223, "clip_ratio/high_mean": 0.0007432934926328016, "clip_ratio/low_mean": 0.0006852986152807716, "clip_ratio/low_min": 9.945174133463297e-05, "clip_ratio/region_mean": 0.0014285920697147958, "epoch": 0.0935197662005845, "grad_norm": 0.16437748074531555, "learning_rate": 2e-07, "loss": 0.027, "step": 1002 }, { "clip_ratio/high_max": 0.0017652895166975213, "clip_ratio/high_mean": 0.0007256365670400555, "clip_ratio/low_mean": 0.0006940888197277673, "clip_ratio/low_min": 3.884432226186618e-05, "clip_ratio/region_mean": 0.0014197253804013599, "epoch": 0.09361309930058508, "grad_norm": 0.15147742629051208, "learning_rate": 2e-07, "loss": 0.0209, "step": 1003 }, { "clip_ratio/high_max": 0.0015656403338653035, "clip_ratio/high_mean": 0.0006210986393853091, "clip_ratio/low_mean": 0.0006704529841954354, "clip_ratio/low_min": 1.7858314095064998e-05, "clip_ratio/region_mean": 0.0012915516417706385, "epoch": 0.09370643240058567, "grad_norm": 0.1535167396068573, "learning_rate": 2e-07, "loss": 0.0281, "step": 1004 }, { "clip_ratio/high_max": 0.0017470990351284854, "clip_ratio/high_mean": 0.0007216647591121728, "clip_ratio/low_mean": 0.0006341245734802214, "clip_ratio/low_min": 2.708146303120884e-05, "clip_ratio/region_mean": 0.0013557892962126061, "epoch": 0.09379976550058625, "grad_norm": 0.16980122029781342, "learning_rate": 2e-07, "loss": 0.0224, "step": 1005 }, { "clip_ratio/high_max": 0.0014837493290542625, "clip_ratio/high_mean": 0.0006286433053901419, "clip_ratio/low_mean": 0.0006663286676484859, "clip_ratio/low_min": 3.884534271492157e-05, "clip_ratio/region_mean": 0.0012949719566677231, "epoch": 0.09389309860058684, "grad_norm": 0.15000736713409424, "learning_rate": 2e-07, "loss": 0.0008, "step": 1006 }, { "clip_ratio/high_max": 0.00159943537437357, "clip_ratio/high_mean": 0.000727240692867781, "clip_ratio/low_mean": 0.0005816931261506397, "clip_ratio/low_min": 4.880149026575964e-05, "clip_ratio/region_mean": 0.0013089338499412406, "epoch": 0.09398643170058742, "grad_norm": 0.2467365562915802, "learning_rate": 2e-07, "loss": 0.0026, "step": 1007 }, { "clip_ratio/high_max": 0.001948192177223973, "clip_ratio/high_mean": 0.0007603163394378498, "clip_ratio/low_mean": 0.0007879956629039953, "clip_ratio/low_min": 5.997062362439465e-05, "clip_ratio/region_mean": 0.001548312015074771, "epoch": 0.094079764800588, "grad_norm": 0.19664718210697174, "learning_rate": 2e-07, "loss": 0.0386, "step": 1008 }, { "clip_ratio/high_max": 0.0014701535037602298, "clip_ratio/high_mean": 0.0006556087682838552, "clip_ratio/low_mean": 0.0007301270270545501, "clip_ratio/low_min": 2.053219395747874e-05, "clip_ratio/region_mean": 0.0013857358280802146, "epoch": 0.09417309790058859, "grad_norm": 0.16688448190689087, "learning_rate": 2e-07, "loss": 0.0373, "step": 1009 }, { "clip_ratio/high_max": 0.001752082313032588, "clip_ratio/high_mean": 0.0008138333596434677, "clip_ratio/low_mean": 0.0006142715483292704, "clip_ratio/low_min": 4.132232425035909e-05, "clip_ratio/region_mean": 0.0014281049407145474, "epoch": 0.09426643100058917, "grad_norm": 0.1566309928894043, "learning_rate": 2e-07, "loss": -0.0074, "step": 1010 }, { "clip_ratio/high_max": 0.0021022875807830133, "clip_ratio/high_mean": 0.0008798499784461455, "clip_ratio/low_mean": 0.0006395479522325331, "clip_ratio/low_min": 3.419388758629793e-05, "clip_ratio/region_mean": 0.0015193979088508058, "epoch": 0.09435976410058974, "grad_norm": 15.408679008483887, "learning_rate": 2e-07, "loss": -0.0012, "step": 1011 }, { "clip_ratio/high_max": 0.0022488931062980555, "clip_ratio/high_mean": 0.0008231203883042326, "clip_ratio/low_mean": 0.0007045458714856068, "clip_ratio/low_min": 6.297800791799091e-05, "clip_ratio/region_mean": 0.0015276662452379242, "epoch": 0.09445309720059034, "grad_norm": 0.17172129452228546, "learning_rate": 2e-07, "loss": 0.0516, "step": 1012 }, { "clip_ratio/high_max": 0.001885849102109205, "clip_ratio/high_mean": 0.0007454884434991982, "clip_ratio/low_mean": 0.0007858692370064091, "clip_ratio/low_min": 8.737896678212564e-05, "clip_ratio/region_mean": 0.0015313577387132682, "epoch": 0.09454643030059091, "grad_norm": 0.21107324957847595, "learning_rate": 2e-07, "loss": 0.0899, "step": 1013 }, { "clip_ratio/high_max": 0.002142695702787023, "clip_ratio/high_mean": 0.0008224787079598173, "clip_ratio/low_mean": 0.0007909259602456586, "clip_ratio/low_min": 6.9275625719456e-05, "clip_ratio/region_mean": 0.0016134046236402355, "epoch": 0.09463976340059149, "grad_norm": 0.17175063490867615, "learning_rate": 2e-07, "loss": 0.0006, "step": 1014 }, { "clip_ratio/high_max": 0.0017917125369422138, "clip_ratio/high_mean": 0.0007654903401999036, "clip_ratio/low_mean": 0.0008358193044841755, "clip_ratio/low_min": 5.6921004215837456e-05, "clip_ratio/region_mean": 0.001601309617399238, "epoch": 0.09473309650059208, "grad_norm": 0.16896438598632812, "learning_rate": 2e-07, "loss": 0.0602, "step": 1015 }, { "clip_ratio/high_max": 0.0015088408872543368, "clip_ratio/high_mean": 0.000706128064848599, "clip_ratio/low_mean": 0.0007358761886280263, "clip_ratio/low_min": 6.021507488185307e-05, "clip_ratio/region_mean": 0.001442004260752583, "epoch": 0.09482642960059266, "grad_norm": 0.18385401368141174, "learning_rate": 2e-07, "loss": 0.0075, "step": 1016 }, { "clip_ratio/high_max": 0.0018926592892967165, "clip_ratio/high_mean": 0.00077394840263878, "clip_ratio/low_mean": 0.0006563860533788102, "clip_ratio/low_min": 6.40326338725572e-05, "clip_ratio/region_mean": 0.0014303344651125371, "epoch": 0.09491976270059325, "grad_norm": 0.1653454303741455, "learning_rate": 2e-07, "loss": 0.0077, "step": 1017 }, { "clip_ratio/high_max": 0.0017751133636920713, "clip_ratio/high_mean": 0.0007377612637355924, "clip_ratio/low_mean": 0.0007535277945862617, "clip_ratio/low_min": 9.821678941079881e-05, "clip_ratio/region_mean": 0.0014912890328560024, "epoch": 0.09501309580059383, "grad_norm": 0.1792379915714264, "learning_rate": 2e-07, "loss": 0.0602, "step": 1018 }, { "clip_ratio/high_max": 0.0018354958374402486, "clip_ratio/high_mean": 0.0007327909297600854, "clip_ratio/low_mean": 0.0007791606894897996, "clip_ratio/low_min": 9.465854100199067e-05, "clip_ratio/region_mean": 0.0015119516210688744, "epoch": 0.09510642890059441, "grad_norm": 0.22554445266723633, "learning_rate": 2e-07, "loss": 0.0382, "step": 1019 }, { "clip_ratio/high_max": 0.0019489285623421893, "clip_ratio/high_mean": 0.0007125211504899198, "clip_ratio/low_mean": 0.0006913592633281951, "clip_ratio/low_min": 7.708815246587619e-05, "clip_ratio/region_mean": 0.001403880407451652, "epoch": 0.095199762000595, "grad_norm": 0.4581732451915741, "learning_rate": 2e-07, "loss": 0.005, "step": 1020 }, { "clip_ratio/high_max": 0.0018844361020455835, "clip_ratio/high_mean": 0.0007366990582795552, "clip_ratio/low_mean": 0.000903823612134147, "clip_ratio/low_min": 1.6237983800238e-05, "clip_ratio/region_mean": 0.0016405226597271394, "epoch": 0.09529309510059558, "grad_norm": 0.17599578201770782, "learning_rate": 2e-07, "loss": 0.0516, "step": 1021 }, { "clip_ratio/high_max": 0.0019190964958397672, "clip_ratio/high_mean": 0.0007706724791205488, "clip_ratio/low_mean": 0.0008597128071414772, "clip_ratio/low_min": 2.5530397579132114e-05, "clip_ratio/region_mean": 0.0016303852535202168, "epoch": 0.09538642820059616, "grad_norm": 0.19194719195365906, "learning_rate": 2e-07, "loss": 0.0501, "step": 1022 }, { "clip_ratio/high_max": 0.0024685868629603647, "clip_ratio/high_mean": 0.0008620815242466051, "clip_ratio/low_mean": 0.0007094381926435744, "clip_ratio/low_min": 4.1917113776435144e-05, "clip_ratio/region_mean": 0.0015715197296231054, "epoch": 0.09547976130059675, "grad_norm": 0.187911719083786, "learning_rate": 2e-07, "loss": 0.0019, "step": 1023 }, { "clip_ratio/high_max": 0.0018579211828182451, "clip_ratio/high_mean": 0.0008154256738635013, "clip_ratio/low_mean": 0.0007594119797431631, "clip_ratio/low_min": 2.96014804916922e-05, "clip_ratio/region_mean": 0.0015748375881230459, "epoch": 0.09557309440059733, "grad_norm": 0.1773652583360672, "learning_rate": 2e-07, "loss": 0.0377, "step": 1024 }, { "clip_ratio/high_max": 0.0013976690242998302, "clip_ratio/high_mean": 0.0005390550768424873, "clip_ratio/low_mean": 0.000551065691979602, "clip_ratio/low_min": 2.2425547285820358e-05, "clip_ratio/region_mean": 0.0010901207533606794, "completions/clipped_ratio": 0.02099609375, "completions/max_length": 4096.0, "completions/max_terminated_length": 4088.0, "completions/mean_length": 690.4118041992188, "completions/mean_terminated_length": 617.3742065429688, "completions/min_length": 83.0, "completions/min_terminated_length": 83.0, "epoch": 0.09566642750059791, "grad_norm": 0.13995230197906494, "learning_rate": 2e-07, "loss": 0.0476, "num_tokens": 773930701.0, "reward": 0.5898699164390564, "reward_std": 0.17693860828876495, "rewards/simpleverify_reward/mean": 0.5898699164390564, "rewards/simpleverify_reward/std": 0.4918592870235443, "step": 1025 }, { "clip_ratio/high_max": 0.0013708305050386116, "clip_ratio/high_mean": 0.0005564877110373345, "clip_ratio/low_mean": 0.0005445496390166227, "clip_ratio/low_min": 3.373268373252358e-05, "clip_ratio/region_mean": 0.0011010373491444625, "epoch": 0.0957597606005985, "grad_norm": 0.14929437637329102, "learning_rate": 2e-07, "loss": 0.0585, "step": 1026 }, { "clip_ratio/high_max": 0.0017153605149360374, "clip_ratio/high_mean": 0.0006554219262397964, "clip_ratio/low_mean": 0.0004980738613085123, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011534957666299306, "epoch": 0.09585309370059908, "grad_norm": 0.14730463922023773, "learning_rate": 2e-07, "loss": -0.0194, "step": 1027 }, { "clip_ratio/high_max": 0.00166615027046646, "clip_ratio/high_mean": 0.0007327789026021492, "clip_ratio/low_mean": 0.0007074002787703648, "clip_ratio/low_min": 3.99471064156387e-05, "clip_ratio/region_mean": 0.0014401792286662385, "epoch": 0.09594642680059967, "grad_norm": 0.15482264757156372, "learning_rate": 2e-07, "loss": 0.0075, "step": 1028 }, { "clip_ratio/high_max": 0.0014134803204797208, "clip_ratio/high_mean": 0.0005714197950510425, "clip_ratio/low_mean": 0.000509914338181261, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010813341032189783, "epoch": 0.09603975990060025, "grad_norm": 0.13401024043560028, "learning_rate": 2e-07, "loss": 0.0361, "step": 1029 }, { "clip_ratio/high_max": 0.0015113866866158787, "clip_ratio/high_mean": 0.0005519047317648074, "clip_ratio/low_mean": 0.0006340970085148001, "clip_ratio/low_min": 5.568407777900575e-05, "clip_ratio/region_mean": 0.0011860017657454591, "epoch": 0.09613309300060083, "grad_norm": 0.15947654843330383, "learning_rate": 2e-07, "loss": 0.0641, "step": 1030 }, { "clip_ratio/high_max": 0.0017595314857317135, "clip_ratio/high_mean": 0.0006714881651532778, "clip_ratio/low_mean": 0.0005206111136430991, "clip_ratio/low_min": 4.037277994939359e-05, "clip_ratio/region_mean": 0.0011920992837985978, "epoch": 0.09622642610060142, "grad_norm": 0.14622628688812256, "learning_rate": 2e-07, "loss": 0.0063, "step": 1031 }, { "clip_ratio/high_max": 0.0016829784653964452, "clip_ratio/high_mean": 0.000701106313499622, "clip_ratio/low_mean": 0.0005317854620443541, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012328918164712377, "epoch": 0.096319759200602, "grad_norm": 0.15084731578826904, "learning_rate": 2e-07, "loss": 0.0151, "step": 1032 }, { "clip_ratio/high_max": 0.001985624199733138, "clip_ratio/high_mean": 0.0006370335358951706, "clip_ratio/low_mean": 0.000595378189245821, "clip_ratio/low_min": 3.0260463972808793e-05, "clip_ratio/region_mean": 0.0012324117051321082, "epoch": 0.09641309230060258, "grad_norm": 0.1505519449710846, "learning_rate": 2e-07, "loss": 0.0456, "step": 1033 }, { "clip_ratio/high_max": 0.0015977936636772938, "clip_ratio/high_mean": 0.0006431656383938389, "clip_ratio/low_mean": 0.0004967154718542588, "clip_ratio/low_min": 3.102475056948606e-05, "clip_ratio/region_mean": 0.0011398811402614228, "epoch": 0.09650642540060317, "grad_norm": 0.1491953581571579, "learning_rate": 2e-07, "loss": 0.0158, "step": 1034 }, { "clip_ratio/high_max": 0.0015207495071081212, "clip_ratio/high_mean": 0.0006131693971838104, "clip_ratio/low_mean": 0.0005767561560787726, "clip_ratio/low_min": 1.4010312042955775e-05, "clip_ratio/region_mean": 0.001189925535072689, "epoch": 0.09659975850060375, "grad_norm": 0.15639768540859222, "learning_rate": 2e-07, "loss": 0.0477, "step": 1035 }, { "clip_ratio/high_max": 0.0015935265109874308, "clip_ratio/high_mean": 0.0005967850875094882, "clip_ratio/low_mean": 0.0005910684585614945, "clip_ratio/low_min": 4.3231441850366537e-05, "clip_ratio/region_mean": 0.001187853551527951, "epoch": 0.09669309160060433, "grad_norm": 0.18650835752487183, "learning_rate": 2e-07, "loss": 0.069, "step": 1036 }, { "clip_ratio/high_max": 0.0017091749359678943, "clip_ratio/high_mean": 0.0007145257477532141, "clip_ratio/low_mean": 0.0005595102029474219, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001274035923415795, "epoch": 0.09678642470060492, "grad_norm": 0.24907439947128296, "learning_rate": 2e-07, "loss": -0.0216, "step": 1037 }, { "clip_ratio/high_max": 0.0017085937906813342, "clip_ratio/high_mean": 0.0005706035117327701, "clip_ratio/low_mean": 0.0005487108974193688, "clip_ratio/low_min": 2.8067012863175478e-05, "clip_ratio/region_mean": 0.0011193144055141602, "epoch": 0.0968797578006055, "grad_norm": 0.3066822290420532, "learning_rate": 2e-07, "loss": 0.0485, "step": 1038 }, { "clip_ratio/high_max": 0.0019128457097394858, "clip_ratio/high_mean": 0.0006901382630530861, "clip_ratio/low_mean": 0.0005473594628710998, "clip_ratio/low_min": 2.7933224373555277e-05, "clip_ratio/region_mean": 0.0012374977413855959, "epoch": 0.09697309090060609, "grad_norm": 0.14152072370052338, "learning_rate": 2e-07, "loss": 0.0242, "step": 1039 }, { "clip_ratio/high_max": 0.0019373311988601927, "clip_ratio/high_mean": 0.0007144366827560589, "clip_ratio/low_mean": 0.0006691817761748098, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001383618469844805, "epoch": 0.09706642400060667, "grad_norm": 0.15431392192840576, "learning_rate": 2e-07, "loss": 0.0244, "step": 1040 }, { "clip_ratio/high_max": 0.0015983971788955387, "clip_ratio/high_mean": 0.0006523084957734682, "clip_ratio/low_mean": 0.0004925415369143593, "clip_ratio/low_min": 1.3309198948263656e-05, "clip_ratio/region_mean": 0.001144850073615089, "epoch": 0.09715975710060724, "grad_norm": 0.12544314563274384, "learning_rate": 2e-07, "loss": 0.0304, "step": 1041 }, { "clip_ratio/high_max": 0.0016927644610404968, "clip_ratio/high_mean": 0.0006322097342490451, "clip_ratio/low_mean": 0.0005311445547704352, "clip_ratio/low_min": 3.0010249247425236e-05, "clip_ratio/region_mean": 0.0011633543072093744, "epoch": 0.09725309020060784, "grad_norm": 0.15861967206001282, "learning_rate": 2e-07, "loss": 0.037, "step": 1042 }, { "clip_ratio/high_max": 0.0019728181177924853, "clip_ratio/high_mean": 0.0007186180155258626, "clip_ratio/low_mean": 0.000612304139394837, "clip_ratio/low_min": 1.2303149560466409e-05, "clip_ratio/region_mean": 0.0013309221394592896, "epoch": 0.09734642330060841, "grad_norm": 0.1456659436225891, "learning_rate": 2e-07, "loss": 0.0043, "step": 1043 }, { "clip_ratio/high_max": 0.001429147527232999, "clip_ratio/high_mean": 0.0005652504569297889, "clip_ratio/low_mean": 0.0005678042971339892, "clip_ratio/low_min": 1.3191220205044374e-05, "clip_ratio/region_mean": 0.0011330547458783258, "epoch": 0.09743975640060899, "grad_norm": 0.31245291233062744, "learning_rate": 2e-07, "loss": 0.0791, "step": 1044 }, { "clip_ratio/high_max": 0.001503012503235368, "clip_ratio/high_mean": 0.0005738440486311447, "clip_ratio/low_mean": 0.0005912666965741664, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.00116511074884329, "epoch": 0.09753308950060958, "grad_norm": 0.13973017036914825, "learning_rate": 2e-07, "loss": 0.0327, "step": 1045 }, { "clip_ratio/high_max": 0.0016222177218878642, "clip_ratio/high_mean": 0.0006519748549180804, "clip_ratio/low_mean": 0.0004923872584186029, "clip_ratio/low_min": 2.0903729819110595e-05, "clip_ratio/region_mean": 0.0011443621515354607, "epoch": 0.09762642260061016, "grad_norm": 0.12260133773088455, "learning_rate": 2e-07, "loss": -0.0103, "step": 1046 }, { "clip_ratio/high_max": 0.0014562792002834613, "clip_ratio/high_mean": 0.0006700640396957169, "clip_ratio/low_mean": 0.0005166578301896152, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011867218890984077, "epoch": 0.09771975570061076, "grad_norm": 0.15883275866508484, "learning_rate": 2e-07, "loss": 0.0343, "step": 1047 }, { "clip_ratio/high_max": 0.0017374639755871613, "clip_ratio/high_mean": 0.0006556741409440292, "clip_ratio/low_mean": 0.0005638174307023291, "clip_ratio/low_min": 2.913617299782345e-05, "clip_ratio/region_mean": 0.001219491594383726, "epoch": 0.09781308880061133, "grad_norm": 0.1466199904680252, "learning_rate": 2e-07, "loss": 0.0112, "step": 1048 }, { "clip_ratio/high_max": 0.0017404112077201717, "clip_ratio/high_mean": 0.0006267549870244693, "clip_ratio/low_mean": 0.0005989712371956557, "clip_ratio/low_min": 1.059322039509425e-05, "clip_ratio/region_mean": 0.001225726242410019, "epoch": 0.09790642190061191, "grad_norm": 0.152729794383049, "learning_rate": 2e-07, "loss": -0.0074, "step": 1049 }, { "clip_ratio/high_max": 0.0017583181615918875, "clip_ratio/high_mean": 0.0006218709204404149, "clip_ratio/low_mean": 0.0004920797855447745, "clip_ratio/low_min": 4.138556505495217e-05, "clip_ratio/region_mean": 0.0011139507259940729, "epoch": 0.0979997550006125, "grad_norm": 0.1558413803577423, "learning_rate": 2e-07, "loss": -0.0068, "step": 1050 }, { "clip_ratio/high_max": 0.001681872905464843, "clip_ratio/high_mean": 0.0006473012481365004, "clip_ratio/low_mean": 0.0005469163843372371, "clip_ratio/low_min": 1.3194005077821203e-05, "clip_ratio/region_mean": 0.0011942176133743487, "epoch": 0.09809308810061308, "grad_norm": 0.15378768742084503, "learning_rate": 2e-07, "loss": 0.0227, "step": 1051 }, { "clip_ratio/high_max": 0.0016823123987705912, "clip_ratio/high_mean": 0.0006352785358103574, "clip_ratio/low_mean": 0.0005787915106338914, "clip_ratio/low_min": 3.9322143493336625e-05, "clip_ratio/region_mean": 0.0012140700455347542, "epoch": 0.09818642120061366, "grad_norm": 0.18295440077781677, "learning_rate": 2e-07, "loss": 0.0578, "step": 1052 }, { "clip_ratio/high_max": 0.00156029951540404, "clip_ratio/high_mean": 0.0006158499127195682, "clip_ratio/low_mean": 0.000605827273830073, "clip_ratio/low_min": 7.475710754079046e-05, "clip_ratio/region_mean": 0.00122167719018762, "epoch": 0.09827975430061425, "grad_norm": 0.185609370470047, "learning_rate": 2e-07, "loss": 0.052, "step": 1053 }, { "clip_ratio/high_max": 0.0017839683241618332, "clip_ratio/high_mean": 0.000639261832475313, "clip_ratio/low_mean": 0.000535863864570274, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011751256861316506, "epoch": 0.09837308740061483, "grad_norm": 0.16138224303722382, "learning_rate": 2e-07, "loss": 0.0393, "step": 1054 }, { "clip_ratio/high_max": 0.0014318059402285144, "clip_ratio/high_mean": 0.0005877007552044233, "clip_ratio/low_mean": 0.0005861316112714121, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011738323482859414, "epoch": 0.09846642050061541, "grad_norm": 0.15012574195861816, "learning_rate": 2e-07, "loss": 0.0659, "step": 1055 }, { "clip_ratio/high_max": 0.0015647463078494184, "clip_ratio/high_mean": 0.0005451857987281983, "clip_ratio/low_mean": 0.000563231964406441, "clip_ratio/low_min": 2.1317902792361565e-05, "clip_ratio/region_mean": 0.0011084177385782823, "epoch": 0.098559753600616, "grad_norm": 0.14288288354873657, "learning_rate": 2e-07, "loss": 0.0694, "step": 1056 }, { "clip_ratio/high_max": 0.00181792484363541, "clip_ratio/high_mean": 0.0006802855314163025, "clip_ratio/low_mean": 0.0005797089024781599, "clip_ratio/low_min": 9.185772796627134e-06, "clip_ratio/region_mean": 0.0012599944129760843, "epoch": 0.09865308670061658, "grad_norm": 0.1612895429134369, "learning_rate": 2e-07, "loss": 0.0007, "step": 1057 }, { "clip_ratio/high_max": 0.0016530869870621245, "clip_ratio/high_mean": 0.0006721153831676929, "clip_ratio/low_mean": 0.0005238746107352199, "clip_ratio/low_min": 2.754517481662333e-05, "clip_ratio/region_mean": 0.001195989996631397, "epoch": 0.09874641980061717, "grad_norm": 0.15619292855262756, "learning_rate": 2e-07, "loss": -0.0041, "step": 1058 }, { "clip_ratio/high_max": 0.0014645564697275404, "clip_ratio/high_mean": 0.0005740648603023146, "clip_ratio/low_mean": 0.0004823029157705605, "clip_ratio/low_min": 1.414667258359259e-05, "clip_ratio/region_mean": 0.0010563677751633804, "epoch": 0.09883975290061775, "grad_norm": 0.13636276125907898, "learning_rate": 2e-07, "loss": 0.0267, "step": 1059 }, { "clip_ratio/high_max": 0.001571941982547287, "clip_ratio/high_mean": 0.000631949253147468, "clip_ratio/low_mean": 0.0006597009651159169, "clip_ratio/low_min": 2.4781918909866363e-05, "clip_ratio/region_mean": 0.0012916501982545014, "epoch": 0.09893308600061833, "grad_norm": 0.14225031435489655, "learning_rate": 2e-07, "loss": 0.0456, "step": 1060 }, { "clip_ratio/high_max": 0.0019511353857524227, "clip_ratio/high_mean": 0.0007577137312182458, "clip_ratio/low_mean": 0.0006999586876190733, "clip_ratio/low_min": 3.5877712434739806e-05, "clip_ratio/region_mean": 0.001457672413380351, "epoch": 0.09902641910061892, "grad_norm": 0.1480303257703781, "learning_rate": 2e-07, "loss": 0.0174, "step": 1061 }, { "clip_ratio/high_max": 0.0014545251797244418, "clip_ratio/high_mean": 0.0005318421599440626, "clip_ratio/low_mean": 0.0005600913987109379, "clip_ratio/low_min": 2.7184005375602283e-05, "clip_ratio/region_mean": 0.001091933521820465, "epoch": 0.0991197522006195, "grad_norm": 0.1296086609363556, "learning_rate": 2e-07, "loss": 0.0379, "step": 1062 }, { "clip_ratio/high_max": 0.001769831585988868, "clip_ratio/high_mean": 0.0006847328713774914, "clip_ratio/low_mean": 0.0005841700203745859, "clip_ratio/low_min": 7.000138248258736e-05, "clip_ratio/region_mean": 0.0012689029244938865, "epoch": 0.09921308530062008, "grad_norm": 0.14934933185577393, "learning_rate": 2e-07, "loss": 0.0433, "step": 1063 }, { "clip_ratio/high_max": 0.0016910056801862083, "clip_ratio/high_mean": 0.0006449812690334511, "clip_ratio/low_mean": 0.00047210925731633324, "clip_ratio/low_min": 2.206668705184711e-05, "clip_ratio/region_mean": 0.0011170905527251307, "epoch": 0.09930641840062067, "grad_norm": 0.1892910897731781, "learning_rate": 2e-07, "loss": 0.0391, "step": 1064 }, { "clip_ratio/high_max": 0.0017985400481848046, "clip_ratio/high_mean": 0.0006342442629829748, "clip_ratio/low_mean": 0.0006132221387815662, "clip_ratio/low_min": 2.548469365137862e-05, "clip_ratio/region_mean": 0.001247466429049382, "epoch": 0.09939975150062125, "grad_norm": 0.16051152348518372, "learning_rate": 2e-07, "loss": 0.0457, "step": 1065 }, { "clip_ratio/high_max": 0.0015568154776701704, "clip_ratio/high_mean": 0.0006393065114025376, "clip_ratio/low_mean": 0.0004391210504763876, "clip_ratio/low_min": 8.713230272405781e-06, "clip_ratio/region_mean": 0.0010784275600599358, "epoch": 0.09949308460062183, "grad_norm": 1.3166816234588623, "learning_rate": 2e-07, "loss": -0.013, "step": 1066 }, { "clip_ratio/high_max": 0.001725747097225394, "clip_ratio/high_mean": 0.0007226662546599982, "clip_ratio/low_mean": 0.0005737946212320821, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012964608504262287, "epoch": 0.09958641770062242, "grad_norm": 0.2146015614271164, "learning_rate": 2e-07, "loss": 0.0369, "step": 1067 }, { "clip_ratio/high_max": 0.0015932645401335321, "clip_ratio/high_mean": 0.0006487094378826441, "clip_ratio/low_mean": 0.0006255900807445869, "clip_ratio/low_min": 2.4414345716650132e-05, "clip_ratio/region_mean": 0.0012742995240841992, "epoch": 0.099679750800623, "grad_norm": 0.16877827048301697, "learning_rate": 2e-07, "loss": 0.0135, "step": 1068 }, { "clip_ratio/high_max": 0.0017049649941327516, "clip_ratio/high_mean": 0.0006027634099154966, "clip_ratio/low_mean": 0.000525465987266216, "clip_ratio/low_min": 1.828036693041213e-05, "clip_ratio/region_mean": 0.001128229410824133, "epoch": 0.09977308390062359, "grad_norm": 0.14494383335113525, "learning_rate": 2e-07, "loss": 0.033, "step": 1069 }, { "clip_ratio/high_max": 0.0019533320773916785, "clip_ratio/high_mean": 0.0006804655768064549, "clip_ratio/low_mean": 0.0005800279086543014, "clip_ratio/low_min": 3.0525213333021384e-05, "clip_ratio/region_mean": 0.0012604934636328835, "epoch": 0.09986641700062417, "grad_norm": 0.18165990710258484, "learning_rate": 2e-07, "loss": 0.0411, "step": 1070 }, { "clip_ratio/high_max": 0.0017516920124762692, "clip_ratio/high_mean": 0.0006923058917891467, "clip_ratio/low_mean": 0.0005152330304554198, "clip_ratio/low_min": 1.0527201993681956e-05, "clip_ratio/region_mean": 0.001207538924063556, "epoch": 0.09995975010062474, "grad_norm": 0.1563708484172821, "learning_rate": 2e-07, "loss": 0.0188, "step": 1071 }, { "clip_ratio/high_max": 0.002205959754064679, "clip_ratio/high_mean": 0.0008133001065289136, "clip_ratio/low_mean": 0.0005205037459745654, "clip_ratio/low_min": 3.971318119511125e-05, "clip_ratio/region_mean": 0.0013338038334040903, "epoch": 0.10005308320062534, "grad_norm": 0.16933055222034454, "learning_rate": 2e-07, "loss": 0.0096, "step": 1072 }, { "clip_ratio/high_max": 0.0016144393266586121, "clip_ratio/high_mean": 0.0006797967826059903, "clip_ratio/low_mean": 0.000625458856120531, "clip_ratio/low_min": 5.390580008679535e-05, "clip_ratio/region_mean": 0.0013052556387265213, "epoch": 0.10014641630062592, "grad_norm": 0.15998722612857819, "learning_rate": 2e-07, "loss": 0.053, "step": 1073 }, { "clip_ratio/high_max": 0.001618497452000156, "clip_ratio/high_mean": 0.0006795561039325548, "clip_ratio/low_mean": 0.0005770796806245926, "clip_ratio/low_min": 1.8051649931294378e-05, "clip_ratio/region_mean": 0.0012566357836476527, "epoch": 0.1002397494006265, "grad_norm": 0.46215784549713135, "learning_rate": 2e-07, "loss": -0.0022, "step": 1074 }, { "clip_ratio/high_max": 0.0014677761646453291, "clip_ratio/high_mean": 0.0006326147122308612, "clip_ratio/low_mean": 0.0006362157328112517, "clip_ratio/low_min": 8.827174588077469e-05, "clip_ratio/region_mean": 0.0012688304268522188, "epoch": 0.10033308250062709, "grad_norm": 0.14772547781467438, "learning_rate": 2e-07, "loss": 0.024, "step": 1075 }, { "clip_ratio/high_max": 0.0017867706628749147, "clip_ratio/high_mean": 0.0007088584261509823, "clip_ratio/low_mean": 0.0006073488439142238, "clip_ratio/low_min": 6.200931966304779e-05, "clip_ratio/region_mean": 0.0013162072536943015, "epoch": 0.10042641560062766, "grad_norm": 0.16033732891082764, "learning_rate": 2e-07, "loss": 0.0076, "step": 1076 }, { "clip_ratio/high_max": 0.0019086736101598945, "clip_ratio/high_mean": 0.0007327171297220048, "clip_ratio/low_mean": 0.0005449875043268548, "clip_ratio/low_min": 1.1973180335189682e-05, "clip_ratio/region_mean": 0.0012777046576957218, "epoch": 0.10051974870062824, "grad_norm": 0.14922833442687988, "learning_rate": 2e-07, "loss": 0.0348, "step": 1077 }, { "clip_ratio/high_max": 0.001682754336798098, "clip_ratio/high_mean": 0.0006540556223626481, "clip_ratio/low_mean": 0.0006115350752224913, "clip_ratio/low_min": 3.2965815080387983e-05, "clip_ratio/region_mean": 0.001265590686671203, "epoch": 0.10061308180062883, "grad_norm": 0.15510930120944977, "learning_rate": 2e-07, "loss": 0.0303, "step": 1078 }, { "clip_ratio/high_max": 0.0016412366057920735, "clip_ratio/high_mean": 0.0006496340547528234, "clip_ratio/low_mean": 0.000531300976035709, "clip_ratio/low_min": 6.883260084578069e-06, "clip_ratio/region_mean": 0.0011809350253315642, "epoch": 0.10070641490062941, "grad_norm": 0.160722553730011, "learning_rate": 2e-07, "loss": 0.0446, "step": 1079 }, { "clip_ratio/high_max": 0.0018106453208019957, "clip_ratio/high_mean": 0.0006634130095335422, "clip_ratio/low_mean": 0.00048186040521613904, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011452734397607855, "epoch": 0.10079974800063, "grad_norm": 0.16193906962871552, "learning_rate": 2e-07, "loss": 0.0316, "step": 1080 }, { "clip_ratio/high_max": 0.001604195716936374, "clip_ratio/high_mean": 0.0006386842724168673, "clip_ratio/low_mean": 0.0005794836188215413, "clip_ratio/low_min": 3.738424493349157e-05, "clip_ratio/region_mean": 0.0012181678976048715, "epoch": 0.10089308110063058, "grad_norm": 0.15314896404743195, "learning_rate": 2e-07, "loss": 0.0216, "step": 1081 }, { "clip_ratio/high_max": 0.001700196251476882, "clip_ratio/high_mean": 0.0006559405646839878, "clip_ratio/low_mean": 0.000527644129761029, "clip_ratio/low_min": 5.089099067845382e-05, "clip_ratio/region_mean": 0.001183584681712091, "epoch": 0.10098641420063116, "grad_norm": 0.13899900019168854, "learning_rate": 2e-07, "loss": 0.0248, "step": 1082 }, { "clip_ratio/high_max": 0.001700643239018973, "clip_ratio/high_mean": 0.0005800772114525898, "clip_ratio/low_mean": 0.0005704596151190344, "clip_ratio/low_min": 5.239350502961315e-05, "clip_ratio/region_mean": 0.001150536809291225, "epoch": 0.10107974730063175, "grad_norm": 0.14391295611858368, "learning_rate": 2e-07, "loss": 0.0523, "step": 1083 }, { "clip_ratio/high_max": 0.001693254987912951, "clip_ratio/high_mean": 0.0006048928216841887, "clip_ratio/low_mean": 0.0006519196713270503, "clip_ratio/low_min": 5.325371512299171e-05, "clip_ratio/region_mean": 0.001256812502106186, "epoch": 0.10117308040063233, "grad_norm": 0.18438707292079926, "learning_rate": 2e-07, "loss": 0.0216, "step": 1084 }, { "clip_ratio/high_max": 0.0018662692018551752, "clip_ratio/high_mean": 0.0007015835162746953, "clip_ratio/low_mean": 0.0005792635474790586, "clip_ratio/low_min": 2.3260817215486895e-05, "clip_ratio/region_mean": 0.0012808470746676903, "epoch": 0.10126641350063291, "grad_norm": 0.15572968125343323, "learning_rate": 2e-07, "loss": 0.0436, "step": 1085 }, { "clip_ratio/high_max": 0.0014514608592435252, "clip_ratio/high_mean": 0.0005984738600091077, "clip_ratio/low_mean": 0.0005457624638438574, "clip_ratio/low_min": 3.4351396607235074e-05, "clip_ratio/region_mean": 0.0011442363138485234, "epoch": 0.1013597466006335, "grad_norm": 0.16951200366020203, "learning_rate": 2e-07, "loss": 0.0354, "step": 1086 }, { "clip_ratio/high_max": 0.0015592558866046602, "clip_ratio/high_mean": 0.0006010137303746887, "clip_ratio/low_mean": 0.0005154142709216103, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011164280003868043, "epoch": 0.10145307970063408, "grad_norm": 0.1528424322605133, "learning_rate": 2e-07, "loss": -0.0017, "step": 1087 }, { "clip_ratio/high_max": 0.001561774492074619, "clip_ratio/high_mean": 0.0006247370829441934, "clip_ratio/low_mean": 0.0006543066174344858, "clip_ratio/low_min": 7.471283925042371e-05, "clip_ratio/region_mean": 0.0012790437140210997, "epoch": 0.10154641280063466, "grad_norm": 0.15505114197731018, "learning_rate": 2e-07, "loss": 0.0563, "step": 1088 }, { "clip_ratio/high_max": 0.001500566337199416, "clip_ratio/high_mean": 0.0005511521485459525, "clip_ratio/low_mean": 0.0007105448621587129, "clip_ratio/low_min": 3.5685592592926696e-05, "clip_ratio/region_mean": 0.0012616970307135489, "epoch": 0.10163974590063525, "grad_norm": 0.1579626351594925, "learning_rate": 2e-07, "loss": 0.0625, "step": 1089 }, { "clip_ratio/high_max": 0.0014233183137548622, "clip_ratio/high_mean": 0.0006094261316320626, "clip_ratio/low_mean": 0.000625259337539319, "clip_ratio/low_min": 9.81181874522008e-05, "clip_ratio/region_mean": 0.001234685470990371, "epoch": 0.10173307900063583, "grad_norm": 0.1554982215166092, "learning_rate": 2e-07, "loss": 0.0133, "step": 1090 }, { "clip_ratio/high_max": 0.0017466573044657707, "clip_ratio/high_mean": 0.0006900563366798451, "clip_ratio/low_mean": 0.000565061139241152, "clip_ratio/low_min": 4.8456347940373234e-05, "clip_ratio/region_mean": 0.001255117473192513, "epoch": 0.10182641210063642, "grad_norm": 0.1822282075881958, "learning_rate": 2e-07, "loss": 0.0283, "step": 1091 }, { "clip_ratio/high_max": 0.001652716462558601, "clip_ratio/high_mean": 0.000627658413577592, "clip_ratio/low_mean": 0.0005839091190864565, "clip_ratio/low_min": 4.170819465798559e-05, "clip_ratio/region_mean": 0.0012115675017412286, "epoch": 0.101919745200637, "grad_norm": 0.16696639358997345, "learning_rate": 2e-07, "loss": 0.0129, "step": 1092 }, { "clip_ratio/high_max": 0.001645483138418058, "clip_ratio/high_mean": 0.000600817336817272, "clip_ratio/low_mean": 0.0005196066249482101, "clip_ratio/low_min": 1.3679142284672707e-05, "clip_ratio/region_mean": 0.0011204239308426622, "epoch": 0.10201307830063758, "grad_norm": 0.1599513441324234, "learning_rate": 2e-07, "loss": 0.0171, "step": 1093 }, { "clip_ratio/high_max": 0.0015299071328627178, "clip_ratio/high_mean": 0.0005785768280475168, "clip_ratio/low_mean": 0.0006065301568014547, "clip_ratio/low_min": 7.88146280683577e-06, "clip_ratio/region_mean": 0.0011851070012198761, "epoch": 0.10210641140063817, "grad_norm": 0.14695550501346588, "learning_rate": 2e-07, "loss": 0.0199, "step": 1094 }, { "clip_ratio/high_max": 0.0019350838447280694, "clip_ratio/high_mean": 0.0007338022969634039, "clip_ratio/low_mean": 0.0006955898188607534, "clip_ratio/low_min": 1.4534884030581452e-05, "clip_ratio/region_mean": 0.0014293921303760726, "epoch": 0.10219974450063875, "grad_norm": 0.16174593567848206, "learning_rate": 2e-07, "loss": 0.083, "step": 1095 }, { "clip_ratio/high_max": 0.001641252307308605, "clip_ratio/high_mean": 0.000620701682237268, "clip_ratio/low_mean": 0.0006615999991481658, "clip_ratio/low_min": 2.306206624780316e-05, "clip_ratio/region_mean": 0.001282301680475939, "epoch": 0.10229307760063933, "grad_norm": 0.14572051167488098, "learning_rate": 2e-07, "loss": 0.0652, "step": 1096 }, { "clip_ratio/high_max": 0.001657885772146983, "clip_ratio/high_mean": 0.0006771035150450189, "clip_ratio/low_mean": 0.0007343262859649258, "clip_ratio/low_min": 4.648864432965638e-05, "clip_ratio/region_mean": 0.0014114297919149976, "epoch": 0.10238641070063992, "grad_norm": 0.6493206024169922, "learning_rate": 2e-07, "loss": 0.0582, "step": 1097 }, { "clip_ratio/high_max": 0.0018525566192693077, "clip_ratio/high_mean": 0.0007341996642935555, "clip_ratio/low_mean": 0.0006568676835740916, "clip_ratio/low_min": 4.4178036205266835e-05, "clip_ratio/region_mean": 0.0013910673442296684, "epoch": 0.1024797438006405, "grad_norm": 0.1657799482345581, "learning_rate": 2e-07, "loss": 0.0576, "step": 1098 }, { "clip_ratio/high_max": 0.0016571992855460849, "clip_ratio/high_mean": 0.0006144828257674817, "clip_ratio/low_mean": 0.0007577101841889089, "clip_ratio/low_min": 5.179845084057888e-05, "clip_ratio/region_mean": 0.001372193048155168, "epoch": 0.10257307690064109, "grad_norm": 0.20611099898815155, "learning_rate": 2e-07, "loss": 0.0781, "step": 1099 }, { "clip_ratio/high_max": 0.001434320127373212, "clip_ratio/high_mean": 0.0005880656754015945, "clip_ratio/low_mean": 0.0006825906748417765, "clip_ratio/low_min": 3.599530327846878e-05, "clip_ratio/region_mean": 0.0012706563466053922, "epoch": 0.10266641000064167, "grad_norm": 0.17862387001514435, "learning_rate": 2e-07, "loss": 0.0793, "step": 1100 }, { "clip_ratio/high_max": 0.0019495309315971099, "clip_ratio/high_mean": 0.000805816613137722, "clip_ratio/low_mean": 0.0005715735478588613, "clip_ratio/low_min": 4.104381787328748e-05, "clip_ratio/region_mean": 0.0013773901591775939, "epoch": 0.10275974310064225, "grad_norm": 0.22093947231769562, "learning_rate": 2e-07, "loss": -0.0076, "step": 1101 }, { "clip_ratio/high_max": 0.0019233182829339057, "clip_ratio/high_mean": 0.0007954319789860165, "clip_ratio/low_mean": 0.000625573087745579, "clip_ratio/low_min": 2.0298797608120367e-05, "clip_ratio/region_mean": 0.0014210050358087756, "epoch": 0.10285307620064284, "grad_norm": 0.19207599759101868, "learning_rate": 2e-07, "loss": 0.0078, "step": 1102 }, { "clip_ratio/high_max": 0.0015221704234136268, "clip_ratio/high_mean": 0.0005945479224465089, "clip_ratio/low_mean": 0.0006161075580166653, "clip_ratio/low_min": 2.7014967599825468e-05, "clip_ratio/region_mean": 0.0012106554822821636, "epoch": 0.10294640930064342, "grad_norm": 0.1413605958223343, "learning_rate": 2e-07, "loss": 0.041, "step": 1103 }, { "clip_ratio/high_max": 0.0016122898141475162, "clip_ratio/high_mean": 0.0006016110264681629, "clip_ratio/low_mean": 0.0006949706130399136, "clip_ratio/low_min": 1.769786285876762e-05, "clip_ratio/region_mean": 0.0012965816567884758, "epoch": 0.103039742400644, "grad_norm": 0.1498638540506363, "learning_rate": 2e-07, "loss": 0.0548, "step": 1104 }, { "clip_ratio/high_max": 0.0017425209625798743, "clip_ratio/high_mean": 0.0006616099690290866, "clip_ratio/low_mean": 0.0006061206313461298, "clip_ratio/low_min": 1.1741499292838853e-05, "clip_ratio/region_mean": 0.0012677305967372376, "epoch": 0.10313307550064459, "grad_norm": 0.15358155965805054, "learning_rate": 2e-07, "loss": -0.0052, "step": 1105 }, { "clip_ratio/high_max": 0.0017331620183540508, "clip_ratio/high_mean": 0.0007212812506622868, "clip_ratio/low_mean": 0.0007488276642106939, "clip_ratio/low_min": 3.766162717511179e-05, "clip_ratio/region_mean": 0.0014701088948640972, "epoch": 0.10322640860064516, "grad_norm": 0.3596377670764923, "learning_rate": 2e-07, "loss": 0.0485, "step": 1106 }, { "clip_ratio/high_max": 0.0016821270546643063, "clip_ratio/high_mean": 0.0006388401106960373, "clip_ratio/low_mean": 0.0006594250371563248, "clip_ratio/low_min": 4.4572099795914255e-05, "clip_ratio/region_mean": 0.0012982651242054999, "epoch": 0.10331974170064574, "grad_norm": 0.14002206921577454, "learning_rate": 2e-07, "loss": 0.0356, "step": 1107 }, { "clip_ratio/high_max": 0.0016101407418318558, "clip_ratio/high_mean": 0.0005903952160224435, "clip_ratio/low_mean": 0.0007644207107659895, "clip_ratio/low_min": 9.530784063827014e-05, "clip_ratio/region_mean": 0.0013548158967751078, "epoch": 0.10341307480064633, "grad_norm": 0.17775137722492218, "learning_rate": 2e-07, "loss": 0.1128, "step": 1108 }, { "clip_ratio/high_max": 0.0017406380939064547, "clip_ratio/high_mean": 0.0007249852251334232, "clip_ratio/low_mean": 0.0006339689480228117, "clip_ratio/low_min": 5.9723689446400385e-05, "clip_ratio/region_mean": 0.0013589541449618991, "epoch": 0.10350640790064691, "grad_norm": 0.19935919344425201, "learning_rate": 2e-07, "loss": 0.0237, "step": 1109 }, { "clip_ratio/high_max": 0.0019679307042679284, "clip_ratio/high_mean": 0.0006746289054717636, "clip_ratio/low_mean": 0.0006979214067541761, "clip_ratio/low_min": 5.121557023812784e-05, "clip_ratio/region_mean": 0.0013725503122259397, "epoch": 0.1035997410006475, "grad_norm": 0.23512428998947144, "learning_rate": 2e-07, "loss": 0.0148, "step": 1110 }, { "clip_ratio/high_max": 0.0017864119927253341, "clip_ratio/high_mean": 0.0006599070266020135, "clip_ratio/low_mean": 0.0005742720040871063, "clip_ratio/low_min": 5.224253527558176e-05, "clip_ratio/region_mean": 0.0012341790352365933, "epoch": 0.10369307410064808, "grad_norm": 0.15648329257965088, "learning_rate": 2e-07, "loss": 0.0504, "step": 1111 }, { "clip_ratio/high_max": 0.0016795804840512574, "clip_ratio/high_mean": 0.0006713345701427897, "clip_ratio/low_mean": 0.000683567318446876, "clip_ratio/low_min": 4.23903229602729e-05, "clip_ratio/region_mean": 0.0013549018658522982, "epoch": 0.10378640720064866, "grad_norm": 0.17392602562904358, "learning_rate": 2e-07, "loss": 0.0135, "step": 1112 }, { "clip_ratio/high_max": 0.0017916250071721151, "clip_ratio/high_mean": 0.0007649974313608254, "clip_ratio/low_mean": 0.0006185337115311995, "clip_ratio/low_min": 3.6902183637721464e-05, "clip_ratio/region_mean": 0.0013835311292496044, "epoch": 0.10387974030064925, "grad_norm": 0.2709124982357025, "learning_rate": 2e-07, "loss": 0.0237, "step": 1113 }, { "clip_ratio/high_max": 0.0019190239436284173, "clip_ratio/high_mean": 0.0006751879027433461, "clip_ratio/low_mean": 0.0006608903404412558, "clip_ratio/low_min": 8.29456839710474e-05, "clip_ratio/region_mean": 0.0013360782431846019, "epoch": 0.10397307340064983, "grad_norm": 0.18092484772205353, "learning_rate": 2e-07, "loss": 0.0423, "step": 1114 }, { "clip_ratio/high_max": 0.0014537269635184202, "clip_ratio/high_mean": 0.0006930093031769502, "clip_ratio/low_mean": 0.0007284347866516327, "clip_ratio/low_min": 4.047339734825073e-05, "clip_ratio/region_mean": 0.0014214440707291942, "epoch": 0.10406640650065041, "grad_norm": 0.1914963573217392, "learning_rate": 2e-07, "loss": 0.1066, "step": 1115 }, { "clip_ratio/high_max": 0.0016021225055737887, "clip_ratio/high_mean": 0.0006459154392359778, "clip_ratio/low_mean": 0.0006769977626390755, "clip_ratio/low_min": 0.0001051740100592724, "clip_ratio/region_mean": 0.0013229132491687778, "epoch": 0.104159739600651, "grad_norm": 0.2495952844619751, "learning_rate": 2e-07, "loss": 0.0563, "step": 1116 }, { "clip_ratio/high_max": 0.0019777498237090185, "clip_ratio/high_mean": 0.0007893728088674834, "clip_ratio/low_mean": 0.0006029953219695017, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013923681581218261, "epoch": 0.10425307270065158, "grad_norm": 0.16072824597358704, "learning_rate": 2e-07, "loss": -0.0176, "step": 1117 }, { "clip_ratio/high_max": 0.0018720912412391044, "clip_ratio/high_mean": 0.0006642057987846783, "clip_ratio/low_mean": 0.0006460151471401332, "clip_ratio/low_min": 1.4228799045667984e-05, "clip_ratio/region_mean": 0.0013102209304634016, "epoch": 0.10434640580065216, "grad_norm": 0.16775690019130707, "learning_rate": 2e-07, "loss": 0.0311, "step": 1118 }, { "clip_ratio/high_max": 0.001742502186971251, "clip_ratio/high_mean": 0.0007207712133094901, "clip_ratio/low_mean": 0.0006960818463994656, "clip_ratio/low_min": 2.9430421363940695e-05, "clip_ratio/region_mean": 0.001416853083355818, "epoch": 0.10443973890065275, "grad_norm": 0.6444618701934814, "learning_rate": 2e-07, "loss": -0.0063, "step": 1119 }, { "clip_ratio/high_max": 0.00141055361382314, "clip_ratio/high_mean": 0.0006426684703910723, "clip_ratio/low_mean": 0.0005982020120427478, "clip_ratio/low_min": 2.3839796995162033e-05, "clip_ratio/region_mean": 0.0012408705006237142, "epoch": 0.10453307200065333, "grad_norm": 0.18336622416973114, "learning_rate": 2e-07, "loss": -0.0052, "step": 1120 }, { "clip_ratio/high_max": 0.0017139737574325409, "clip_ratio/high_mean": 0.0007289939931069966, "clip_ratio/low_mean": 0.0005799311393275275, "clip_ratio/low_min": 1.778599835233763e-05, "clip_ratio/region_mean": 0.0013089251406199764, "epoch": 0.10462640510065392, "grad_norm": 0.18604180216789246, "learning_rate": 2e-07, "loss": 0.0319, "step": 1121 }, { "clip_ratio/high_max": 0.0019018440143554471, "clip_ratio/high_mean": 0.0007385402004729258, "clip_ratio/low_mean": 0.0006917208015693177, "clip_ratio/low_min": 7.095688397384947e-05, "clip_ratio/region_mean": 0.0014302609852165915, "epoch": 0.1047197382006545, "grad_norm": 0.15553155541419983, "learning_rate": 2e-07, "loss": 0.0325, "step": 1122 }, { "clip_ratio/high_max": 0.0018789733039739076, "clip_ratio/high_mean": 0.0007071196014294401, "clip_ratio/low_mean": 0.0006871028799650958, "clip_ratio/low_min": 3.462401173237595e-05, "clip_ratio/region_mean": 0.0013942224904894829, "epoch": 0.10481307130065508, "grad_norm": 0.1714010089635849, "learning_rate": 2e-07, "loss": 0.0149, "step": 1123 }, { "clip_ratio/high_max": 0.0016275035777653102, "clip_ratio/high_mean": 0.0007292515292647295, "clip_ratio/low_mean": 0.000791360653238371, "clip_ratio/low_min": 8.394015003432287e-05, "clip_ratio/region_mean": 0.0015206121825031005, "epoch": 0.10490640440065567, "grad_norm": 0.18012382090091705, "learning_rate": 2e-07, "loss": 0.0392, "step": 1124 }, { "clip_ratio/high_max": 0.0018016122849076055, "clip_ratio/high_mean": 0.0008042313565965742, "clip_ratio/low_mean": 0.00068803240719717, "clip_ratio/low_min": 7.830328104319051e-05, "clip_ratio/region_mean": 0.0014922637310519349, "epoch": 0.10499973750065625, "grad_norm": 0.1693631261587143, "learning_rate": 2e-07, "loss": 0.0082, "step": 1125 }, { "clip_ratio/high_max": 0.0016284908888337668, "clip_ratio/high_mean": 0.0006352777154461364, "clip_ratio/low_mean": 0.0007887508982094005, "clip_ratio/low_min": 6.218793532752898e-05, "clip_ratio/region_mean": 0.0014240286000131164, "epoch": 0.10509307060065683, "grad_norm": 0.16774801909923553, "learning_rate": 2e-07, "loss": 0.0638, "step": 1126 }, { "clip_ratio/high_max": 0.0016479643290949753, "clip_ratio/high_mean": 0.0006184939456943539, "clip_ratio/low_mean": 0.0006981065835134359, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001316600522841327, "epoch": 0.10518640370065742, "grad_norm": 0.17191198468208313, "learning_rate": 2e-07, "loss": 0.0398, "step": 1127 }, { "clip_ratio/high_max": 0.0018514398179831915, "clip_ratio/high_mean": 0.000778455576437409, "clip_ratio/low_mean": 0.0006837699947936926, "clip_ratio/low_min": 2.872992536140373e-05, "clip_ratio/region_mean": 0.0014622256094298791, "epoch": 0.105279736800658, "grad_norm": 0.18933561444282532, "learning_rate": 2e-07, "loss": 0.0094, "step": 1128 }, { "clip_ratio/high_max": 0.001868574840045767, "clip_ratio/high_mean": 0.0006547029206558364, "clip_ratio/low_mean": 0.0006830069323768839, "clip_ratio/low_min": 4.284865917725256e-05, "clip_ratio/region_mean": 0.0013377098803175613, "epoch": 0.10537306990065858, "grad_norm": 0.20581325888633728, "learning_rate": 2e-07, "loss": 0.0508, "step": 1129 }, { "clip_ratio/high_max": 0.001731505701172864, "clip_ratio/high_mean": 0.0007339230287470855, "clip_ratio/low_mean": 0.0007276413107319968, "clip_ratio/low_min": 1.3377568393480033e-05, "clip_ratio/region_mean": 0.0014615643449360505, "epoch": 0.10546640300065917, "grad_norm": 0.19026395678520203, "learning_rate": 2e-07, "loss": -0.0188, "step": 1130 }, { "clip_ratio/high_max": 0.0017855706901173107, "clip_ratio/high_mean": 0.0007221573177957907, "clip_ratio/low_mean": 0.000752309193558176, "clip_ratio/low_min": 5.358645375963533e-05, "clip_ratio/region_mean": 0.0014744665131729562, "epoch": 0.10555973610065975, "grad_norm": 0.17866875231266022, "learning_rate": 2e-07, "loss": 0.0544, "step": 1131 }, { "clip_ratio/high_max": 0.0017396401963196695, "clip_ratio/high_mean": 0.0006553527291544015, "clip_ratio/low_mean": 0.0007032078810880193, "clip_ratio/low_min": 5.5048583817551844e-05, "clip_ratio/region_mean": 0.0013585606175183784, "epoch": 0.10565306920066034, "grad_norm": 0.2077604979276657, "learning_rate": 2e-07, "loss": 0.0721, "step": 1132 }, { "clip_ratio/high_max": 0.0019366508204257116, "clip_ratio/high_mean": 0.0007306157513085054, "clip_ratio/low_mean": 0.000763090914915665, "clip_ratio/low_min": 6.938521983101964e-05, "clip_ratio/region_mean": 0.001493706618930446, "epoch": 0.10574640230066092, "grad_norm": 0.17884354293346405, "learning_rate": 2e-07, "loss": 0.0418, "step": 1133 }, { "clip_ratio/high_max": 0.0018405324954073876, "clip_ratio/high_mean": 0.0007919335039332509, "clip_ratio/low_mean": 0.000879623978107702, "clip_ratio/low_min": 8.656440422782907e-05, "clip_ratio/region_mean": 0.0016715575038688257, "epoch": 0.1058397354006615, "grad_norm": 1.292534351348877, "learning_rate": 2e-07, "loss": 0.0464, "step": 1134 }, { "clip_ratio/high_max": 0.0018374226237938274, "clip_ratio/high_mean": 0.000749734355849796, "clip_ratio/low_mean": 0.0007330738135351567, "clip_ratio/low_min": 3.488685888441978e-05, "clip_ratio/region_mean": 0.0014828081511950586, "epoch": 0.10593306850066209, "grad_norm": 0.17767179012298584, "learning_rate": 2e-07, "loss": 0.0334, "step": 1135 }, { "clip_ratio/high_max": 0.0016330299913533963, "clip_ratio/high_mean": 0.0006284384216996841, "clip_ratio/low_mean": 0.0008460454155283514, "clip_ratio/low_min": 0.00015475264990527648, "clip_ratio/region_mean": 0.0014744838554179296, "epoch": 0.10602640160066266, "grad_norm": 0.32256028056144714, "learning_rate": 2e-07, "loss": 0.0845, "step": 1136 }, { "clip_ratio/high_max": 0.0019067546782025602, "clip_ratio/high_mean": 0.0008688514717505313, "clip_ratio/low_mean": 0.0007212979035102762, "clip_ratio/low_min": 1.437442460883176e-05, "clip_ratio/region_mean": 0.0015901494116405956, "epoch": 0.10611973470066324, "grad_norm": 0.18215949833393097, "learning_rate": 2e-07, "loss": -0.0118, "step": 1137 }, { "clip_ratio/high_max": 0.0022245179570745677, "clip_ratio/high_mean": 0.000872844597324729, "clip_ratio/low_mean": 0.0007488478386221686, "clip_ratio/low_min": 6.22961861154181e-05, "clip_ratio/region_mean": 0.0016216924486798234, "epoch": 0.10621306780066384, "grad_norm": 0.17662769556045532, "learning_rate": 2e-07, "loss": 0.0039, "step": 1138 }, { "clip_ratio/high_max": 0.0016332259547198191, "clip_ratio/high_mean": 0.0006142159618320875, "clip_ratio/low_mean": 0.0007663615488127107, "clip_ratio/low_min": 5.744968984799925e-05, "clip_ratio/region_mean": 0.0013805774942738935, "epoch": 0.10630640090066441, "grad_norm": 0.21307222545146942, "learning_rate": 2e-07, "loss": 0.0672, "step": 1139 }, { "clip_ratio/high_max": 0.0020056764369655866, "clip_ratio/high_mean": 0.0007443730701197637, "clip_ratio/low_mean": 0.0007626226670254255, "clip_ratio/low_min": 1.3833554476150312e-05, "clip_ratio/region_mean": 0.0015069957262312528, "epoch": 0.10639973400066499, "grad_norm": 0.2009405791759491, "learning_rate": 2e-07, "loss": 0.058, "step": 1140 }, { "clip_ratio/high_max": 0.0018943691247841343, "clip_ratio/high_mean": 0.000753516324039083, "clip_ratio/low_mean": 0.0007014564471319318, "clip_ratio/low_min": 4.9380751079297625e-05, "clip_ratio/region_mean": 0.001454972749343142, "epoch": 0.10649306710066558, "grad_norm": 0.20019090175628662, "learning_rate": 2e-07, "loss": 0.0113, "step": 1141 }, { "clip_ratio/high_max": 0.0021072390154586174, "clip_ratio/high_mean": 0.0007974841118993936, "clip_ratio/low_mean": 0.0008537994108337443, "clip_ratio/low_min": 4.875806735071819e-05, "clip_ratio/region_mean": 0.0016512835427420214, "epoch": 0.10658640020066616, "grad_norm": 0.1795814484357834, "learning_rate": 2e-07, "loss": 0.0401, "step": 1142 }, { "clip_ratio/high_max": 0.001778037676558597, "clip_ratio/high_mean": 0.0007957225243444555, "clip_ratio/low_mean": 0.000743653788958909, "clip_ratio/low_min": 6.830601250840118e-06, "clip_ratio/region_mean": 0.0015393763242173009, "epoch": 0.10667973330066675, "grad_norm": 0.20919446647167206, "learning_rate": 2e-07, "loss": 0.0513, "step": 1143 }, { "clip_ratio/high_max": 0.0016695245467417408, "clip_ratio/high_mean": 0.0005807170582556864, "clip_ratio/low_mean": 0.0008029220389289549, "clip_ratio/low_min": 3.820877100224607e-05, "clip_ratio/region_mean": 0.0013836390789947473, "epoch": 0.10677306640066733, "grad_norm": 0.23670423030853271, "learning_rate": 2e-07, "loss": 0.0718, "step": 1144 }, { "clip_ratio/high_max": 0.0015789384251547744, "clip_ratio/high_mean": 0.0006654261649146065, "clip_ratio/low_mean": 0.0007718578090134542, "clip_ratio/low_min": 6.615464008064009e-05, "clip_ratio/region_mean": 0.0014372839832503814, "epoch": 0.10686639950066791, "grad_norm": 2.1207847595214844, "learning_rate": 2e-07, "loss": 0.0607, "step": 1145 }, { "clip_ratio/high_max": 0.0017458004149375483, "clip_ratio/high_mean": 0.0007489466952392831, "clip_ratio/low_mean": 0.0008803221035122988, "clip_ratio/low_min": 9.561286242387723e-05, "clip_ratio/region_mean": 0.0016292688051180448, "epoch": 0.1069597326006685, "grad_norm": 0.23434318602085114, "learning_rate": 2e-07, "loss": 0.0441, "step": 1146 }, { "clip_ratio/high_max": 0.0018621850995259592, "clip_ratio/high_mean": 0.0007752439939849864, "clip_ratio/low_mean": 0.0009569797439326067, "clip_ratio/low_min": 4.49408098575077e-05, "clip_ratio/region_mean": 0.0017322237472399138, "epoch": 0.10705306570066908, "grad_norm": 0.23642611503601074, "learning_rate": 2e-07, "loss": 0.0198, "step": 1147 }, { "clip_ratio/high_max": 0.002140612225048244, "clip_ratio/high_mean": 0.0007878617288952228, "clip_ratio/low_mean": 0.0008089639450190589, "clip_ratio/low_min": 0.00010770332119136583, "clip_ratio/region_mean": 0.0015968256630003452, "epoch": 0.10714639880066966, "grad_norm": 0.2382020354270935, "learning_rate": 2e-07, "loss": 0.0354, "step": 1148 }, { "clip_ratio/high_max": 0.0017905512431752868, "clip_ratio/high_mean": 0.0007838075089239283, "clip_ratio/low_mean": 0.0008416560613113688, "clip_ratio/low_min": 3.822678354481468e-05, "clip_ratio/region_mean": 0.0016254635847872123, "epoch": 0.10723973190067025, "grad_norm": 0.22861531376838684, "learning_rate": 2e-07, "loss": 0.0223, "step": 1149 }, { "clip_ratio/high_max": 0.001998607265704777, "clip_ratio/high_mean": 0.0007658508702661493, "clip_ratio/low_mean": 0.0008628988343843957, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0016287497164739762, "epoch": 0.10733306500067083, "grad_norm": 0.18002702295780182, "learning_rate": 2e-07, "loss": 0.0025, "step": 1150 }, { "clip_ratio/high_max": 0.0020809012348763645, "clip_ratio/high_mean": 0.0008540978724340675, "clip_ratio/low_mean": 0.0008565541411371669, "clip_ratio/low_min": 0.00014070615179662127, "clip_ratio/region_mean": 0.0017106519771914463, "epoch": 0.10742639810067142, "grad_norm": 0.2044249027967453, "learning_rate": 2e-07, "loss": 0.0213, "step": 1151 }, { "clip_ratio/high_max": 0.0023322766355704516, "clip_ratio/high_mean": 0.000940882522627362, "clip_ratio/low_mean": 0.0008237906986323651, "clip_ratio/low_min": 0.00010056074552267091, "clip_ratio/region_mean": 0.0017646731830609497, "epoch": 0.107519731200672, "grad_norm": 0.7507470846176147, "learning_rate": 2e-07, "loss": -0.0019, "step": 1152 }, { "clip_ratio/high_max": 0.0016837600378494244, "clip_ratio/high_mean": 0.0005544483556150226, "clip_ratio/low_mean": 0.0006055840676708613, "clip_ratio/low_min": 3.713499245350249e-05, "clip_ratio/region_mean": 0.001160032419647905, "completions/clipped_ratio": 0.018476213727678603, "completions/max_length": 4096.0, "completions/max_terminated_length": 4096.0, "completions/mean_length": 642.605224609375, "completions/mean_terminated_length": 577.598388671875, "completions/min_length": 80.0, "completions/min_terminated_length": 80.0, "epoch": 0.10761306430067258, "grad_norm": 0.16128115355968475, "learning_rate": 2e-07, "loss": 0.0171, "num_tokens": 858955881.0, "reward": 0.5998186469078064, "reward_std": 0.1697637140750885, "rewards/simpleverify_reward/mean": 0.5998186469078064, "rewards/simpleverify_reward/std": 0.4899369478225708, "step": 1153 }, { "clip_ratio/high_max": 0.0017052963194146287, "clip_ratio/high_mean": 0.0006739360160281649, "clip_ratio/low_mean": 0.0006507485704787541, "clip_ratio/low_min": 6.943672815395985e-06, "clip_ratio/region_mean": 0.0013246845992398448, "epoch": 0.10770639740067317, "grad_norm": 0.1607077717781067, "learning_rate": 2e-07, "loss": 0.0289, "step": 1154 }, { "clip_ratio/high_max": 0.001727568132992019, "clip_ratio/high_mean": 0.0006293631804510369, "clip_ratio/low_mean": 0.0005630431915051304, "clip_ratio/low_min": 4.688851549872197e-05, "clip_ratio/region_mean": 0.0011924063765036408, "epoch": 0.10779973050067375, "grad_norm": 0.17281484603881836, "learning_rate": 2e-07, "loss": -0.0029, "step": 1155 }, { "clip_ratio/high_max": 0.0017013440738082863, "clip_ratio/high_mean": 0.0005897964019823121, "clip_ratio/low_mean": 0.0006868044674774865, "clip_ratio/low_min": 2.829975164786447e-05, "clip_ratio/region_mean": 0.0012766008549078833, "epoch": 0.10789306360067433, "grad_norm": 0.17459584772586823, "learning_rate": 2e-07, "loss": 0.0297, "step": 1156 }, { "clip_ratio/high_max": 0.002040089759248076, "clip_ratio/high_mean": 0.0007473067671526223, "clip_ratio/low_mean": 0.0005943110681982944, "clip_ratio/low_min": 1.3760458386968821e-05, "clip_ratio/region_mean": 0.001341617858997779, "epoch": 0.10798639670067492, "grad_norm": 0.16108620166778564, "learning_rate": 2e-07, "loss": 0.0611, "step": 1157 }, { "clip_ratio/high_max": 0.0015304409917007433, "clip_ratio/high_mean": 0.0005702020944227115, "clip_ratio/low_mean": 0.000645092484774068, "clip_ratio/low_min": 1.9607843569247052e-05, "clip_ratio/region_mean": 0.0012152946001151577, "epoch": 0.1080797298006755, "grad_norm": 0.16884584724903107, "learning_rate": 2e-07, "loss": 0.0399, "step": 1158 }, { "clip_ratio/high_max": 0.00151808327427716, "clip_ratio/high_mean": 0.0006391834576788824, "clip_ratio/low_mean": 0.0005465176336656441, "clip_ratio/low_min": 3.083862702624174e-05, "clip_ratio/region_mean": 0.0011857011159008835, "epoch": 0.10817306290067608, "grad_norm": 0.16362978518009186, "learning_rate": 2e-07, "loss": 0.0376, "step": 1159 }, { "clip_ratio/high_max": 0.0014961128654249478, "clip_ratio/high_mean": 0.0006387025296135107, "clip_ratio/low_mean": 0.0005922444777297642, "clip_ratio/low_min": 3.983666556450771e-05, "clip_ratio/region_mean": 0.0012309470075706486, "epoch": 0.10826639600067667, "grad_norm": 0.15502344071865082, "learning_rate": 2e-07, "loss": 0.0552, "step": 1160 }, { "clip_ratio/high_max": 0.0017032809009833727, "clip_ratio/high_mean": 0.0006181210210343124, "clip_ratio/low_mean": 0.0005751268481617444, "clip_ratio/low_min": 6.111742732173298e-05, "clip_ratio/region_mean": 0.0011932478919334244, "epoch": 0.10835972910067725, "grad_norm": 0.1718013435602188, "learning_rate": 2e-07, "loss": 0.0179, "step": 1161 }, { "clip_ratio/high_max": 0.0021555940620601177, "clip_ratio/high_mean": 0.0008054333811742254, "clip_ratio/low_mean": 0.0005906603328185156, "clip_ratio/low_min": 2.378686986048706e-05, "clip_ratio/region_mean": 0.0013960936848889105, "epoch": 0.10845306220067784, "grad_norm": 0.16806522011756897, "learning_rate": 2e-07, "loss": -0.0128, "step": 1162 }, { "clip_ratio/high_max": 0.0017676826282695401, "clip_ratio/high_mean": 0.0006460875174525427, "clip_ratio/low_mean": 0.0005763011640738114, "clip_ratio/low_min": 2.0315294023021124e-05, "clip_ratio/region_mean": 0.0012223886951687746, "epoch": 0.10854639530067842, "grad_norm": 0.16095001995563507, "learning_rate": 2e-07, "loss": 0.0304, "step": 1163 }, { "clip_ratio/high_max": 0.0018025333411060274, "clip_ratio/high_mean": 0.0006848933817309444, "clip_ratio/low_mean": 0.0006499745759356301, "clip_ratio/low_min": 1.7717023638397222e-05, "clip_ratio/region_mean": 0.0013348679785849527, "epoch": 0.108639728400679, "grad_norm": 0.17194600403308868, "learning_rate": 2e-07, "loss": 0.0482, "step": 1164 }, { "clip_ratio/high_max": 0.0014330134690681007, "clip_ratio/high_mean": 0.0006012221829223563, "clip_ratio/low_mean": 0.0005633944674627855, "clip_ratio/low_min": 1.573514600750059e-05, "clip_ratio/region_mean": 0.0011646166603895836, "epoch": 0.10873306150067959, "grad_norm": 0.14470773935317993, "learning_rate": 2e-07, "loss": 0.0547, "step": 1165 }, { "clip_ratio/high_max": 0.0015430625735461945, "clip_ratio/high_mean": 0.0005495188711392984, "clip_ratio/low_mean": 0.0006298827174759936, "clip_ratio/low_min": 1.441420681658201e-05, "clip_ratio/region_mean": 0.0011794015736086294, "epoch": 0.10882639460068017, "grad_norm": 0.16102689504623413, "learning_rate": 2e-07, "loss": 0.0674, "step": 1166 }, { "clip_ratio/high_max": 0.0017442243406549096, "clip_ratio/high_mean": 0.0006882075977046043, "clip_ratio/low_mean": 0.0006769624342268799, "clip_ratio/low_min": 4.554686984192813e-05, "clip_ratio/region_mean": 0.0013651700282935053, "epoch": 0.10891972770068074, "grad_norm": 0.2630913555622101, "learning_rate": 2e-07, "loss": 0.0476, "step": 1167 }, { "clip_ratio/high_max": 0.0012083029923815047, "clip_ratio/high_mean": 0.0005193826086724584, "clip_ratio/low_mean": 0.0006306533705355832, "clip_ratio/low_min": 2.6927695216727443e-05, "clip_ratio/region_mean": 0.0011500360087666195, "epoch": 0.10901306080068134, "grad_norm": 0.1636441946029663, "learning_rate": 2e-07, "loss": 0.0669, "step": 1168 }, { "clip_ratio/high_max": 0.001655586460401537, "clip_ratio/high_mean": 0.0006298368498391937, "clip_ratio/low_mean": 0.0006347403050313005, "clip_ratio/low_min": 4.4347297261992935e-05, "clip_ratio/region_mean": 0.001264577171241399, "epoch": 0.10910639390068191, "grad_norm": 0.17912589013576508, "learning_rate": 2e-07, "loss": 0.0482, "step": 1169 }, { "clip_ratio/high_max": 0.0019418290321482345, "clip_ratio/high_mean": 0.0006886320006742608, "clip_ratio/low_mean": 0.0005391031136241509, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012277351288503269, "epoch": 0.10919972700068249, "grad_norm": 0.4946267902851105, "learning_rate": 2e-07, "loss": 0.0226, "step": 1170 }, { "clip_ratio/high_max": 0.0020375708409119397, "clip_ratio/high_mean": 0.0006965673183003673, "clip_ratio/low_mean": 0.0006151668212623917, "clip_ratio/low_min": 3.526189721014816e-05, "clip_ratio/region_mean": 0.001311734136834275, "epoch": 0.10929306010068308, "grad_norm": 0.19428250193595886, "learning_rate": 2e-07, "loss": 0.034, "step": 1171 }, { "clip_ratio/high_max": 0.002049203059868887, "clip_ratio/high_mean": 0.0007376129869953729, "clip_ratio/low_mean": 0.0005875803271919722, "clip_ratio/low_min": 2.4418832254013978e-05, "clip_ratio/region_mean": 0.0013251933341962285, "epoch": 0.10938639320068366, "grad_norm": 0.16605006158351898, "learning_rate": 2e-07, "loss": -0.0002, "step": 1172 }, { "clip_ratio/high_max": 0.001794505755242426, "clip_ratio/high_mean": 0.0006250186506804312, "clip_ratio/low_mean": 0.0006042684271960752, "clip_ratio/low_min": 3.1561217838316225e-05, "clip_ratio/region_mean": 0.0012292870778765064, "epoch": 0.10947972630068425, "grad_norm": 0.16088369488716125, "learning_rate": 2e-07, "loss": 0.0336, "step": 1173 }, { "clip_ratio/high_max": 0.0018000064446823671, "clip_ratio/high_mean": 0.0006584975817531813, "clip_ratio/low_mean": 0.0006200137104315218, "clip_ratio/low_min": 1.1975474080827553e-05, "clip_ratio/region_mean": 0.001278511292184703, "epoch": 0.10957305940068483, "grad_norm": 0.15880903601646423, "learning_rate": 2e-07, "loss": 0.0109, "step": 1174 }, { "clip_ratio/high_max": 0.0017282588596572168, "clip_ratio/high_mean": 0.0006558646782650612, "clip_ratio/low_mean": 0.0006385556498571532, "clip_ratio/low_min": 4.382521547086071e-05, "clip_ratio/region_mean": 0.0012944203481310979, "epoch": 0.10966639250068541, "grad_norm": 0.18661294877529144, "learning_rate": 2e-07, "loss": -0.0031, "step": 1175 }, { "clip_ratio/high_max": 0.0016434299213869963, "clip_ratio/high_mean": 0.0006188478837430011, "clip_ratio/low_mean": 0.0007024453916528728, "clip_ratio/low_min": 3.1859116461419035e-05, "clip_ratio/region_mean": 0.0013212932717578951, "epoch": 0.109759725600686, "grad_norm": 0.1554040163755417, "learning_rate": 2e-07, "loss": 0.0004, "step": 1176 }, { "clip_ratio/high_max": 0.0016033118718041806, "clip_ratio/high_mean": 0.000590535453738994, "clip_ratio/low_mean": 0.0004960412416039617, "clip_ratio/low_min": 2.9591115890070796e-05, "clip_ratio/region_mean": 0.0010865766926144715, "epoch": 0.10985305870068658, "grad_norm": 0.1685456931591034, "learning_rate": 2e-07, "loss": 0.035, "step": 1177 }, { "clip_ratio/high_max": 0.0016119919200718869, "clip_ratio/high_mean": 0.0006994336872594431, "clip_ratio/low_mean": 0.0005080176961200777, "clip_ratio/low_min": 2.6680896553443745e-05, "clip_ratio/region_mean": 0.0012074513906554785, "epoch": 0.10994639180068716, "grad_norm": 0.1896088868379593, "learning_rate": 2e-07, "loss": 0.0153, "step": 1178 }, { "clip_ratio/high_max": 0.0016023181597120129, "clip_ratio/high_mean": 0.0006312606274150312, "clip_ratio/low_mean": 0.0006902899294800591, "clip_ratio/low_min": 2.462828706484288e-05, "clip_ratio/region_mean": 0.0013215505459811538, "epoch": 0.11003972490068775, "grad_norm": 0.1891086995601654, "learning_rate": 2e-07, "loss": 0.0219, "step": 1179 }, { "clip_ratio/high_max": 0.0012476910233090166, "clip_ratio/high_mean": 0.0004934763674100395, "clip_ratio/low_mean": 0.0006635324207309168, "clip_ratio/low_min": 4.6747450141992886e-05, "clip_ratio/region_mean": 0.001157008795416914, "epoch": 0.11013305800068833, "grad_norm": 0.17143650352954865, "learning_rate": 2e-07, "loss": 0.0636, "step": 1180 }, { "clip_ratio/high_max": 0.0017113645517383702, "clip_ratio/high_mean": 0.0006184773574204883, "clip_ratio/low_mean": 0.00048548767426837003, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011039650380553212, "epoch": 0.11022639110068891, "grad_norm": 0.14065149426460266, "learning_rate": 2e-07, "loss": 0.0143, "step": 1181 }, { "clip_ratio/high_max": 0.0018538670265115798, "clip_ratio/high_mean": 0.0006766871865693247, "clip_ratio/low_mean": 0.0005326104687810584, "clip_ratio/low_min": 2.8964404918951914e-05, "clip_ratio/region_mean": 0.0012092976612620987, "epoch": 0.1103197242006895, "grad_norm": 0.16656944155693054, "learning_rate": 2e-07, "loss": 0.0152, "step": 1182 }, { "clip_ratio/high_max": 0.0014464103005593643, "clip_ratio/high_mean": 0.0005811582796013681, "clip_ratio/low_mean": 0.0005964988995401654, "clip_ratio/low_min": 5.2509241868392564e-05, "clip_ratio/region_mean": 0.0011776571664086077, "epoch": 0.11041305730069008, "grad_norm": 0.27208787202835083, "learning_rate": 2e-07, "loss": 0.0387, "step": 1183 }, { "clip_ratio/high_max": 0.001598800945430412, "clip_ratio/high_mean": 0.0006337869217531988, "clip_ratio/low_mean": 0.0005574716051341966, "clip_ratio/low_min": 1.708350464468822e-05, "clip_ratio/region_mean": 0.0011912585032405332, "epoch": 0.11050639040069067, "grad_norm": 0.18974028527736664, "learning_rate": 2e-07, "loss": 0.0206, "step": 1184 }, { "clip_ratio/high_max": 0.001642827779505751, "clip_ratio/high_mean": 0.0006317607490018418, "clip_ratio/low_mean": 0.0006463834033638705, "clip_ratio/low_min": 1.3194005077821203e-05, "clip_ratio/region_mean": 0.0012781441619154066, "epoch": 0.11059972350069125, "grad_norm": 0.22505322098731995, "learning_rate": 2e-07, "loss": 0.0435, "step": 1185 }, { "clip_ratio/high_max": 0.0018842271601897664, "clip_ratio/high_mean": 0.0007860207988414913, "clip_ratio/low_mean": 0.0006362721469486132, "clip_ratio/low_min": 4.6767963794991374e-05, "clip_ratio/region_mean": 0.0014222929603420198, "epoch": 0.11069305660069183, "grad_norm": 0.1873571276664734, "learning_rate": 2e-07, "loss": 0.0016, "step": 1186 }, { "clip_ratio/high_max": 0.0017817896768974606, "clip_ratio/high_mean": 0.0007156403808039613, "clip_ratio/low_mean": 0.0006722234193148324, "clip_ratio/low_min": 1.7146776372101158e-05, "clip_ratio/region_mean": 0.0013878637728339527, "epoch": 0.11078638970069242, "grad_norm": 0.16899481415748596, "learning_rate": 2e-07, "loss": 0.0343, "step": 1187 }, { "clip_ratio/high_max": 0.0016362050700990949, "clip_ratio/high_mean": 0.0006184279154695105, "clip_ratio/low_mean": 0.0007243590771395247, "clip_ratio/low_min": 1.903746488096658e-05, "clip_ratio/region_mean": 0.0013427869816950988, "epoch": 0.110879722800693, "grad_norm": 0.15989556908607483, "learning_rate": 2e-07, "loss": 0.0449, "step": 1188 }, { "clip_ratio/high_max": 0.00199316243379144, "clip_ratio/high_mean": 0.0006864378592581488, "clip_ratio/low_mean": 0.0006274419579312962, "clip_ratio/low_min": 6.376243982231244e-05, "clip_ratio/region_mean": 0.0013138798349245917, "epoch": 0.11097305590069358, "grad_norm": 0.4591924250125885, "learning_rate": 2e-07, "loss": 0.0635, "step": 1189 }, { "clip_ratio/high_max": 0.001966142121091252, "clip_ratio/high_mean": 0.0007865260558901355, "clip_ratio/low_mean": 0.0006146843179521966, "clip_ratio/low_min": 1.2047031304973643e-05, "clip_ratio/region_mean": 0.0014012104002176784, "epoch": 0.11106638900069417, "grad_norm": 0.21491265296936035, "learning_rate": 2e-07, "loss": 0.0188, "step": 1190 }, { "clip_ratio/high_max": 0.0017275051977776457, "clip_ratio/high_mean": 0.0006209878674781066, "clip_ratio/low_mean": 0.0006014812997818808, "clip_ratio/low_min": 1.3422098163573537e-05, "clip_ratio/region_mean": 0.001222469156346051, "epoch": 0.11115972210069475, "grad_norm": 0.14565059542655945, "learning_rate": 2e-07, "loss": 0.0311, "step": 1191 }, { "clip_ratio/high_max": 0.0018550388158473652, "clip_ratio/high_mean": 0.0007006270479905652, "clip_ratio/low_mean": 0.0006284281298576389, "clip_ratio/low_min": 9.268805297324434e-05, "clip_ratio/region_mean": 0.0013290551833051722, "epoch": 0.11125305520069532, "grad_norm": 0.5749951004981995, "learning_rate": 2e-07, "loss": 0.0004, "step": 1192 }, { "clip_ratio/high_max": 0.0016502425969520118, "clip_ratio/high_mean": 0.0006437225802073954, "clip_ratio/low_mean": 0.0006101977069192799, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012539203198684845, "epoch": 0.11134638830069592, "grad_norm": 0.18215207755565643, "learning_rate": 2e-07, "loss": -0.0004, "step": 1193 }, { "clip_ratio/high_max": 0.0017368491680826992, "clip_ratio/high_mean": 0.0006446158386097522, "clip_ratio/low_mean": 0.0006619600208068732, "clip_ratio/low_min": 3.10600134980632e-05, "clip_ratio/region_mean": 0.0013065758103039116, "epoch": 0.1114397214006965, "grad_norm": 0.16845083236694336, "learning_rate": 2e-07, "loss": 0.0352, "step": 1194 }, { "clip_ratio/high_max": 0.0018229290908493567, "clip_ratio/high_mean": 0.0006434263132177875, "clip_ratio/low_mean": 0.0005951566126896068, "clip_ratio/low_min": 1.622955096536316e-05, "clip_ratio/region_mean": 0.0012385829504637513, "epoch": 0.11153305450069709, "grad_norm": 0.16113053262233734, "learning_rate": 2e-07, "loss": 0.0105, "step": 1195 }, { "clip_ratio/high_max": 0.001678079177509062, "clip_ratio/high_mean": 0.0006365074259520043, "clip_ratio/low_mean": 0.0005578919335675891, "clip_ratio/low_min": 3.087181903538294e-05, "clip_ratio/region_mean": 0.0011943993740715086, "epoch": 0.11162638760069767, "grad_norm": 0.16549836099147797, "learning_rate": 2e-07, "loss": 0.0227, "step": 1196 }, { "clip_ratio/high_max": 0.0017848812021838967, "clip_ratio/high_mean": 0.0006092470175644848, "clip_ratio/low_mean": 0.0006880839155201102, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012973309312656056, "epoch": 0.11171972070069824, "grad_norm": 0.18886782228946686, "learning_rate": 2e-07, "loss": 0.0765, "step": 1197 }, { "clip_ratio/high_max": 0.0017219117653439753, "clip_ratio/high_mean": 0.0006915204548931797, "clip_ratio/low_mean": 0.000730987159840879, "clip_ratio/low_min": 2.8415548513294198e-05, "clip_ratio/region_mean": 0.0014225076192815322, "epoch": 0.11181305380069884, "grad_norm": 0.21025022864341736, "learning_rate": 2e-07, "loss": 0.0243, "step": 1198 }, { "clip_ratio/high_max": 0.0019323795931995846, "clip_ratio/high_mean": 0.0007213194730866235, "clip_ratio/low_mean": 0.0006657858793914784, "clip_ratio/low_min": 0.00010046877105196472, "clip_ratio/region_mean": 0.001387105367030017, "epoch": 0.11190638690069941, "grad_norm": 0.18716835975646973, "learning_rate": 2e-07, "loss": 0.0293, "step": 1199 }, { "clip_ratio/high_max": 0.0017354464616801124, "clip_ratio/high_mean": 0.0007719640343566425, "clip_ratio/low_mean": 0.000552290217456175, "clip_ratio/low_min": 3.8159451833053026e-05, "clip_ratio/region_mean": 0.0013242542481748387, "epoch": 0.11199972000069999, "grad_norm": 0.1823512613773346, "learning_rate": 2e-07, "loss": -0.0054, "step": 1200 }, { "clip_ratio/high_max": 0.002123392197972862, "clip_ratio/high_mean": 0.0007648910795978736, "clip_ratio/low_mean": 0.000654587323879241, "clip_ratio/low_min": 4.306902155803982e-05, "clip_ratio/region_mean": 0.0014194783907441888, "epoch": 0.11209305310070058, "grad_norm": 0.16624779999256134, "learning_rate": 2e-07, "loss": 0.0073, "step": 1201 }, { "clip_ratio/high_max": 0.0017743766184139531, "clip_ratio/high_mean": 0.0006593586058443179, "clip_ratio/low_mean": 0.0008040177126531489, "clip_ratio/low_min": 1.5689720385125838e-05, "clip_ratio/region_mean": 0.0014633763566962443, "epoch": 0.11218638620070116, "grad_norm": 0.1570969521999359, "learning_rate": 2e-07, "loss": 0.056, "step": 1202 }, { "clip_ratio/high_max": 0.0020229958172421902, "clip_ratio/high_mean": 0.0006610530053876573, "clip_ratio/low_mean": 0.0006030327594999108, "clip_ratio/low_min": 5.476691057992866e-06, "clip_ratio/region_mean": 0.001264085734874243, "epoch": 0.11227971930070174, "grad_norm": 0.1563320904970169, "learning_rate": 2e-07, "loss": 0.0139, "step": 1203 }, { "clip_ratio/high_max": 0.0021467497608682606, "clip_ratio/high_mean": 0.0007964446558617055, "clip_ratio/low_mean": 0.0006419783203455154, "clip_ratio/low_min": 7.026428193057654e-05, "clip_ratio/region_mean": 0.0014384229725692421, "epoch": 0.11237305240070233, "grad_norm": 0.2023751139640808, "learning_rate": 2e-07, "loss": 0.0351, "step": 1204 }, { "clip_ratio/high_max": 0.0019305036003061105, "clip_ratio/high_mean": 0.0006750689281034283, "clip_ratio/low_mean": 0.0006261421331146266, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013012110721319914, "epoch": 0.11246638550070291, "grad_norm": 0.1906033605337143, "learning_rate": 2e-07, "loss": -0.0271, "step": 1205 }, { "clip_ratio/high_max": 0.0017085464678530116, "clip_ratio/high_mean": 0.0005854409346284228, "clip_ratio/low_mean": 0.0007824249878467526, "clip_ratio/low_min": 0.00010180927347391844, "clip_ratio/region_mean": 0.0013678659161087126, "epoch": 0.1125597186007035, "grad_norm": 0.2597927153110504, "learning_rate": 2e-07, "loss": 0.071, "step": 1206 }, { "clip_ratio/high_max": 0.001963698934559943, "clip_ratio/high_mean": 0.0008281957561848685, "clip_ratio/low_mean": 0.0006396881535692955, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001467883885197807, "epoch": 0.11265305170070408, "grad_norm": 0.19650326669216156, "learning_rate": 2e-07, "loss": -0.007, "step": 1207 }, { "clip_ratio/high_max": 0.0015268050628947094, "clip_ratio/high_mean": 0.0006416117812477751, "clip_ratio/low_mean": 0.000657591290291748, "clip_ratio/low_min": 2.282396781083662e-05, "clip_ratio/region_mean": 0.0012992030497116502, "epoch": 0.11274638480070466, "grad_norm": 0.19723127782344818, "learning_rate": 2e-07, "loss": 0.0356, "step": 1208 }, { "clip_ratio/high_max": 0.0017499780005891807, "clip_ratio/high_mean": 0.0006795383778808173, "clip_ratio/low_mean": 0.0007303121928998735, "clip_ratio/low_min": 4.5773445890517905e-05, "clip_ratio/region_mean": 0.0014098505707806908, "epoch": 0.11283971790070525, "grad_norm": 0.21628065407276154, "learning_rate": 2e-07, "loss": 0.0364, "step": 1209 }, { "clip_ratio/high_max": 0.0023353828728431836, "clip_ratio/high_mean": 0.0007169713007897371, "clip_ratio/low_mean": 0.000739826139579236, "clip_ratio/low_min": 8.280283600470284e-05, "clip_ratio/region_mean": 0.0014567974540113937, "epoch": 0.11293305100070583, "grad_norm": 0.1804220825433731, "learning_rate": 2e-07, "loss": 0.0159, "step": 1210 }, { "clip_ratio/high_max": 0.002023077664489392, "clip_ratio/high_mean": 0.0007972766070452053, "clip_ratio/low_mean": 0.0005483621807798045, "clip_ratio/low_min": 3.2717113754188176e-05, "clip_ratio/region_mean": 0.0013456387459882535, "epoch": 0.11302638410070641, "grad_norm": 0.19473648071289062, "learning_rate": 2e-07, "loss": -0.0542, "step": 1211 }, { "clip_ratio/high_max": 0.0018410850389045663, "clip_ratio/high_mean": 0.0006798792483095895, "clip_ratio/low_mean": 0.0007240676018227532, "clip_ratio/low_min": 2.9584543881355785e-05, "clip_ratio/region_mean": 0.001403946847858606, "epoch": 0.113119717200707, "grad_norm": 0.17801834642887115, "learning_rate": 2e-07, "loss": 0.0321, "step": 1212 }, { "clip_ratio/high_max": 0.0016314754975610413, "clip_ratio/high_mean": 0.0006564184877788648, "clip_ratio/low_mean": 0.0006458212155848742, "clip_ratio/low_min": 4.021795939479489e-05, "clip_ratio/region_mean": 0.0013022397106396966, "epoch": 0.11321305030070758, "grad_norm": 0.1814417839050293, "learning_rate": 2e-07, "loss": 0.052, "step": 1213 }, { "clip_ratio/high_max": 0.0017815642822824884, "clip_ratio/high_mean": 0.0006610833333979826, "clip_ratio/low_mean": 0.0006980185844440712, "clip_ratio/low_min": 4.563658148981631e-05, "clip_ratio/region_mean": 0.001359101905109128, "epoch": 0.11330638340070817, "grad_norm": 0.23626896739006042, "learning_rate": 2e-07, "loss": 0.0731, "step": 1214 }, { "clip_ratio/high_max": 0.0015849441624595784, "clip_ratio/high_mean": 0.0006159991407912457, "clip_ratio/low_mean": 0.0006856236250314396, "clip_ratio/low_min": 1.137397612183122e-05, "clip_ratio/region_mean": 0.0013016227712796535, "epoch": 0.11339971650070875, "grad_norm": 0.1933341771364212, "learning_rate": 2e-07, "loss": 0.0808, "step": 1215 }, { "clip_ratio/high_max": 0.0017681787358014844, "clip_ratio/high_mean": 0.0007517805624956964, "clip_ratio/low_mean": 0.0006485622716354555, "clip_ratio/low_min": 5.328930819814559e-05, "clip_ratio/region_mean": 0.001400342836859636, "epoch": 0.11349304960070933, "grad_norm": 0.22811280190944672, "learning_rate": 2e-07, "loss": 0.0458, "step": 1216 }, { "clip_ratio/high_max": 0.0017768497200449929, "clip_ratio/high_mean": 0.0006977675366215408, "clip_ratio/low_mean": 0.000740344501537038, "clip_ratio/low_min": 2.5202437427651603e-05, "clip_ratio/region_mean": 0.0014381120381585788, "epoch": 0.11358638270070992, "grad_norm": 0.21421846747398376, "learning_rate": 2e-07, "loss": 0.0398, "step": 1217 }, { "clip_ratio/high_max": 0.0018747972098935861, "clip_ratio/high_mean": 0.0006923552791704424, "clip_ratio/low_mean": 0.0006740733460901538, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.00136642867437331, "epoch": 0.1136797158007105, "grad_norm": 0.2485412061214447, "learning_rate": 2e-07, "loss": 0.0284, "step": 1218 }, { "clip_ratio/high_max": 0.0018189503971370868, "clip_ratio/high_mean": 0.0007034794780338416, "clip_ratio/low_mean": 0.000622761515842285, "clip_ratio/low_min": 2.8036334697389975e-05, "clip_ratio/region_mean": 0.0013262410211609676, "epoch": 0.11377304890071108, "grad_norm": 0.1935350000858307, "learning_rate": 2e-07, "loss": 0.0027, "step": 1219 }, { "clip_ratio/high_max": 0.0018007807702815626, "clip_ratio/high_mean": 0.0007380261595244519, "clip_ratio/low_mean": 0.0006788272421545116, "clip_ratio/low_min": 1.200999213324394e-05, "clip_ratio/region_mean": 0.001416853363480186, "epoch": 0.11386638200071167, "grad_norm": 0.19330981373786926, "learning_rate": 2e-07, "loss": 0.0168, "step": 1220 }, { "clip_ratio/high_max": 0.001946432650584029, "clip_ratio/high_mean": 0.0008022147212614072, "clip_ratio/low_mean": 0.0006667568250122713, "clip_ratio/low_min": 3.8755119931010995e-05, "clip_ratio/region_mean": 0.0014689715535496362, "epoch": 0.11395971510071225, "grad_norm": 0.18847449123859406, "learning_rate": 2e-07, "loss": 0.0076, "step": 1221 }, { "clip_ratio/high_max": 0.0016450900511699729, "clip_ratio/high_mean": 0.0006838346052973066, "clip_ratio/low_mean": 0.0007193425808509346, "clip_ratio/low_min": 4.710746361524798e-05, "clip_ratio/region_mean": 0.0014031771970621776, "epoch": 0.11405304820071283, "grad_norm": 0.27726420760154724, "learning_rate": 2e-07, "loss": 0.0122, "step": 1222 }, { "clip_ratio/high_max": 0.0021418637334136292, "clip_ratio/high_mean": 0.0007826969977031695, "clip_ratio/low_mean": 0.0006444951497996954, "clip_ratio/low_min": 1.4417531929211691e-05, "clip_ratio/region_mean": 0.001427192171831848, "epoch": 0.11414638130071342, "grad_norm": 0.27614083886146545, "learning_rate": 2e-07, "loss": -0.0068, "step": 1223 }, { "clip_ratio/high_max": 0.0015291256459022406, "clip_ratio/high_mean": 0.0006138509252195945, "clip_ratio/low_mean": 0.000735232946681208, "clip_ratio/low_min": 3.415183255128795e-05, "clip_ratio/region_mean": 0.0013490838355210144, "epoch": 0.114239714400714, "grad_norm": 0.20824365317821503, "learning_rate": 2e-07, "loss": 0.0448, "step": 1224 }, { "clip_ratio/high_max": 0.002354316762648523, "clip_ratio/high_mean": 0.0008129967191052856, "clip_ratio/low_mean": 0.0005285174229356926, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.00134151413658401, "epoch": 0.11433304750071459, "grad_norm": 0.17510150372982025, "learning_rate": 2e-07, "loss": -0.0365, "step": 1225 }, { "clip_ratio/high_max": 0.0019088539411313832, "clip_ratio/high_mean": 0.0007080216564645525, "clip_ratio/low_mean": 0.000813225031379261, "clip_ratio/low_min": 4.328796057961881e-05, "clip_ratio/region_mean": 0.0015212466387310997, "epoch": 0.11442638060071517, "grad_norm": 0.20113784074783325, "learning_rate": 2e-07, "loss": 0.0468, "step": 1226 }, { "clip_ratio/high_max": 0.0020745092260767706, "clip_ratio/high_mean": 0.0007107132914825343, "clip_ratio/low_mean": 0.0007181372657214524, "clip_ratio/low_min": 5.305122795107309e-05, "clip_ratio/region_mean": 0.0014288505481090397, "epoch": 0.11451971370071574, "grad_norm": 0.220927894115448, "learning_rate": 2e-07, "loss": 0.0602, "step": 1227 }, { "clip_ratio/high_max": 0.0016867046615516301, "clip_ratio/high_mean": 0.0006076824884075904, "clip_ratio/low_mean": 0.000760385706598754, "clip_ratio/low_min": 9.021905225381488e-05, "clip_ratio/region_mean": 0.0013680681877303869, "epoch": 0.11461304680071634, "grad_norm": 0.31231027841567993, "learning_rate": 2e-07, "loss": 0.0537, "step": 1228 }, { "clip_ratio/high_max": 0.0017079940444091335, "clip_ratio/high_mean": 0.0007021579367574304, "clip_ratio/low_mean": 0.0007262806084327167, "clip_ratio/low_min": 5.8474061006563716e-05, "clip_ratio/region_mean": 0.0014284385470091365, "epoch": 0.11470637990071691, "grad_norm": 0.20768561959266663, "learning_rate": 2e-07, "loss": 0.0192, "step": 1229 }, { "clip_ratio/high_max": 0.001813379631130374, "clip_ratio/high_mean": 0.0007907981821517751, "clip_ratio/low_mean": 0.0005893414017918985, "clip_ratio/low_min": 4.112740862183273e-05, "clip_ratio/region_mean": 0.0013801395652990323, "epoch": 0.1147997130007175, "grad_norm": 0.21587878465652466, "learning_rate": 2e-07, "loss": -0.0061, "step": 1230 }, { "clip_ratio/high_max": 0.001871857457445003, "clip_ratio/high_mean": 0.0007189674815890612, "clip_ratio/low_mean": 0.0007085789748089155, "clip_ratio/low_min": 4.5587553358927835e-05, "clip_ratio/region_mean": 0.001427546470949892, "epoch": 0.11489304610071809, "grad_norm": 0.26298826932907104, "learning_rate": 2e-07, "loss": -0.0019, "step": 1231 }, { "clip_ratio/high_max": 0.001995592585444683, "clip_ratio/high_mean": 0.0007535356699008844, "clip_ratio/low_mean": 0.000834016980661545, "clip_ratio/low_min": 5.5207642617460806e-05, "clip_ratio/region_mean": 0.0015875526405579876, "epoch": 0.11498637920071866, "grad_norm": 0.22382621467113495, "learning_rate": 2e-07, "loss": 0.0709, "step": 1232 }, { "clip_ratio/high_max": 0.0020703155823866837, "clip_ratio/high_mean": 0.0007313550704566296, "clip_ratio/low_mean": 0.0007691301052545896, "clip_ratio/low_min": 9.533536695016664e-05, "clip_ratio/region_mean": 0.0015004851484263781, "epoch": 0.11507971230071924, "grad_norm": 0.3064039349555969, "learning_rate": 2e-07, "loss": 0.0562, "step": 1233 }, { "clip_ratio/high_max": 0.0014994187768024858, "clip_ratio/high_mean": 0.0006033366826159181, "clip_ratio/low_mean": 0.0008681757535669021, "clip_ratio/low_min": 0.00011312712194921914, "clip_ratio/region_mean": 0.0014715124416397884, "epoch": 0.11517304540071983, "grad_norm": 0.20792166888713837, "learning_rate": 2e-07, "loss": 0.0658, "step": 1234 }, { "clip_ratio/high_max": 0.0015238021151162684, "clip_ratio/high_mean": 0.0006355091554723913, "clip_ratio/low_mean": 0.0007760861117276363, "clip_ratio/low_min": 2.3273132683243603e-05, "clip_ratio/region_mean": 0.0014115952762949746, "epoch": 0.11526637850072041, "grad_norm": 0.18855012953281403, "learning_rate": 2e-07, "loss": 0.0541, "step": 1235 }, { "clip_ratio/high_max": 0.001760505332640605, "clip_ratio/high_mean": 0.0007069724961183965, "clip_ratio/low_mean": 0.000678924696330796, "clip_ratio/low_min": 3.358627509442158e-05, "clip_ratio/region_mean": 0.0013858971615263727, "epoch": 0.115359711600721, "grad_norm": 0.24031035602092743, "learning_rate": 2e-07, "loss": 0.0435, "step": 1236 }, { "clip_ratio/high_max": 0.001999726733629359, "clip_ratio/high_mean": 0.0007284773710125592, "clip_ratio/low_mean": 0.0008245931167039089, "clip_ratio/low_min": 1.3343295904633123e-05, "clip_ratio/region_mean": 0.001553070527734235, "epoch": 0.11545304470072158, "grad_norm": 0.2353406697511673, "learning_rate": 2e-07, "loss": 0.0578, "step": 1237 }, { "clip_ratio/high_max": 0.002238269667941495, "clip_ratio/high_mean": 0.0008678373433212982, "clip_ratio/low_mean": 0.0008464150250802049, "clip_ratio/low_min": 2.5683171770651825e-05, "clip_ratio/region_mean": 0.0017142523574875668, "epoch": 0.11554637780072216, "grad_norm": 0.1996588557958603, "learning_rate": 2e-07, "loss": 0.0098, "step": 1238 }, { "clip_ratio/high_max": 0.0023110997608455364, "clip_ratio/high_mean": 0.0008199171279557049, "clip_ratio/low_mean": 0.0008518363028997555, "clip_ratio/low_min": 5.146158582647331e-05, "clip_ratio/region_mean": 0.0016717534163035452, "epoch": 0.11563971090072275, "grad_norm": 4.392289161682129, "learning_rate": 2e-07, "loss": 0.0443, "step": 1239 }, { "clip_ratio/high_max": 0.001825257679229253, "clip_ratio/high_mean": 0.0007007856834206905, "clip_ratio/low_mean": 0.000799260741132457, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001500046471846872, "epoch": 0.11573304400072333, "grad_norm": 0.257712721824646, "learning_rate": 2e-07, "loss": 0.0696, "step": 1240 }, { "clip_ratio/high_max": 0.0021302935274434276, "clip_ratio/high_mean": 0.0008179210799426073, "clip_ratio/low_mean": 0.000769412579757045, "clip_ratio/low_min": 6.361843406921253e-05, "clip_ratio/region_mean": 0.0015873336815275252, "epoch": 0.11582637710072391, "grad_norm": 0.23931963741779327, "learning_rate": 2e-07, "loss": 0.02, "step": 1241 }, { "clip_ratio/high_max": 0.002196750858274754, "clip_ratio/high_mean": 0.0008153451108228182, "clip_ratio/low_mean": 0.0007461098502972163, "clip_ratio/low_min": 2.883926390495617e-05, "clip_ratio/region_mean": 0.0015614549847668968, "epoch": 0.1159197102007245, "grad_norm": 0.21287751197814941, "learning_rate": 2e-07, "loss": 0.0236, "step": 1242 }, { "clip_ratio/high_max": 0.0019600256928242743, "clip_ratio/high_mean": 0.0007343806810240494, "clip_ratio/low_mean": 0.0009392539177497383, "clip_ratio/low_min": 3.688192373374477e-05, "clip_ratio/region_mean": 0.00167363462242065, "epoch": 0.11601304330072508, "grad_norm": 0.2388520985841751, "learning_rate": 2e-07, "loss": 0.0665, "step": 1243 }, { "clip_ratio/high_max": 0.002049018112302292, "clip_ratio/high_mean": 0.0007977996792760678, "clip_ratio/low_mean": 0.0009095584573515225, "clip_ratio/low_min": 5.842214795848122e-05, "clip_ratio/region_mean": 0.0017073581329896115, "epoch": 0.11610637640072566, "grad_norm": 0.24122431874275208, "learning_rate": 2e-07, "loss": 0.0196, "step": 1244 }, { "clip_ratio/high_max": 0.002036123725702055, "clip_ratio/high_mean": 0.0008964891894720495, "clip_ratio/low_mean": 0.0007354551507887663, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0016319443384418264, "epoch": 0.11619970950072625, "grad_norm": 0.20469529926776886, "learning_rate": 2e-07, "loss": -0.0257, "step": 1245 }, { "clip_ratio/high_max": 0.0019104303100903053, "clip_ratio/high_mean": 0.0006784113566027372, "clip_ratio/low_mean": 0.0008821239989629248, "clip_ratio/low_min": 7.028521940810606e-05, "clip_ratio/region_mean": 0.0015605353546561673, "epoch": 0.11629304260072683, "grad_norm": 0.22279538214206696, "learning_rate": 2e-07, "loss": 0.0453, "step": 1246 }, { "clip_ratio/high_max": 0.002346692497667391, "clip_ratio/high_mean": 0.0009934938534570392, "clip_ratio/low_mean": 0.0007314088952625752, "clip_ratio/low_min": 7.161081703088712e-05, "clip_ratio/region_mean": 0.001724902780551929, "epoch": 0.11638637570072742, "grad_norm": 10.260732650756836, "learning_rate": 2e-07, "loss": -0.0227, "step": 1247 }, { "clip_ratio/high_max": 0.0020246249841875397, "clip_ratio/high_mean": 0.000756393466872396, "clip_ratio/low_mean": 0.0008872840517142322, "clip_ratio/low_min": 2.3005634830042254e-05, "clip_ratio/region_mean": 0.0016436775185866281, "epoch": 0.116479708800728, "grad_norm": 0.2727806866168976, "learning_rate": 2e-07, "loss": 0.045, "step": 1248 }, { "clip_ratio/high_max": 0.0019652216797112487, "clip_ratio/high_mean": 0.0008002615377336042, "clip_ratio/low_mean": 0.0007939530096336966, "clip_ratio/low_min": 4.6326510528160725e-05, "clip_ratio/region_mean": 0.0015942145910230465, "epoch": 0.11657304190072858, "grad_norm": 0.25504693388938904, "learning_rate": 2e-07, "loss": 0.0093, "step": 1249 }, { "clip_ratio/high_max": 0.002215296346548712, "clip_ratio/high_mean": 0.0007826925266272156, "clip_ratio/low_mean": 0.000982078752713278, "clip_ratio/low_min": 6.717219548590947e-05, "clip_ratio/region_mean": 0.0017647712738835253, "epoch": 0.11666637500072917, "grad_norm": 0.2266179621219635, "learning_rate": 2e-07, "loss": 0.0369, "step": 1250 }, { "clip_ratio/high_max": 0.0019647805002023233, "clip_ratio/high_mean": 0.0007041130820653052, "clip_ratio/low_mean": 0.0007562920618511271, "clip_ratio/low_min": 5.909408537263516e-05, "clip_ratio/region_mean": 0.0014604051248170435, "epoch": 0.11675970810072975, "grad_norm": 0.29295626282691956, "learning_rate": 2e-07, "loss": 0.0724, "step": 1251 }, { "clip_ratio/high_max": 0.0020743414024764206, "clip_ratio/high_mean": 0.0008697340817889199, "clip_ratio/low_mean": 0.0010383620683569461, "clip_ratio/low_min": 5.647818124998594e-05, "clip_ratio/region_mean": 0.0019080961283179931, "epoch": 0.11685304120073033, "grad_norm": 12.287240028381348, "learning_rate": 2e-07, "loss": 0.0506, "step": 1252 }, { "clip_ratio/high_max": 0.002025371439231094, "clip_ratio/high_mean": 0.0007783571127220057, "clip_ratio/low_mean": 0.0009971768995455932, "clip_ratio/low_min": 1.2964115740032867e-05, "clip_ratio/region_mean": 0.0017755339868017472, "epoch": 0.11694637430073092, "grad_norm": 0.2508259117603302, "learning_rate": 2e-07, "loss": 0.0477, "step": 1253 }, { "clip_ratio/high_max": 0.002316706650162814, "clip_ratio/high_mean": 0.0008681285271450179, "clip_ratio/low_mean": 0.0009755221362865996, "clip_ratio/low_min": 3.444724097789731e-05, "clip_ratio/region_mean": 0.0018436506943544373, "epoch": 0.1170397074007315, "grad_norm": 0.26576414704322815, "learning_rate": 2e-07, "loss": 0.0055, "step": 1254 }, { "clip_ratio/high_max": 0.0018528344735386781, "clip_ratio/high_mean": 0.0008110902126645669, "clip_ratio/low_mean": 0.0009734508494148031, "clip_ratio/low_min": 6.097308823882486e-05, "clip_ratio/region_mean": 0.0017845410402514972, "epoch": 0.11713304050073207, "grad_norm": 0.2170669138431549, "learning_rate": 2e-07, "loss": 0.0319, "step": 1255 }, { "clip_ratio/high_max": 0.002197306130256038, "clip_ratio/high_mean": 0.0009107797322940314, "clip_ratio/low_mean": 0.000995673155557597, "clip_ratio/low_min": 7.87351109465817e-05, "clip_ratio/region_mean": 0.0019064528969465755, "epoch": 0.11722637360073267, "grad_norm": 0.24046719074249268, "learning_rate": 2e-07, "loss": -0.0069, "step": 1256 }, { "clip_ratio/high_max": 0.0022432658151956275, "clip_ratio/high_mean": 0.0007830854065105086, "clip_ratio/low_mean": 0.00100970386301924, "clip_ratio/low_min": 1.2016919754387345e-05, "clip_ratio/region_mean": 0.0017927893168234732, "epoch": 0.11731970670073325, "grad_norm": 0.9291532635688782, "learning_rate": 2e-07, "loss": 0.044, "step": 1257 }, { "clip_ratio/high_max": 0.002338208898436278, "clip_ratio/high_mean": 0.0009733000224514399, "clip_ratio/low_mean": 0.001056001736287726, "clip_ratio/low_min": 5.24069182574749e-05, "clip_ratio/region_mean": 0.002029301795118954, "epoch": 0.11741303980073384, "grad_norm": 0.3018883466720581, "learning_rate": 2e-07, "loss": 0.0546, "step": 1258 }, { "clip_ratio/high_max": 0.002101969403156545, "clip_ratio/high_mean": 0.0008679473103256896, "clip_ratio/low_mean": 0.0010254984863422578, "clip_ratio/low_min": 6.449223383242497e-05, "clip_ratio/region_mean": 0.0018934457802970428, "epoch": 0.11750637290073442, "grad_norm": 0.24048177897930145, "learning_rate": 2e-07, "loss": 0.0151, "step": 1259 }, { "clip_ratio/high_max": 0.0021233339502941817, "clip_ratio/high_mean": 0.0008342386627191445, "clip_ratio/low_mean": 0.001055306072885287, "clip_ratio/low_min": 6.914673076607869e-05, "clip_ratio/region_mean": 0.0018895446992246434, "epoch": 0.117599706000735, "grad_norm": 0.3301331698894501, "learning_rate": 2e-07, "loss": 0.0336, "step": 1260 }, { "clip_ratio/high_max": 0.0021031348514952697, "clip_ratio/high_mean": 0.0008741130095586414, "clip_ratio/low_mean": 0.0011592530045163585, "clip_ratio/low_min": 5.5464362958446145e-05, "clip_ratio/region_mean": 0.002033366014075, "epoch": 0.11769303910073559, "grad_norm": 0.3050091564655304, "learning_rate": 2e-07, "loss": 0.0383, "step": 1261 }, { "clip_ratio/high_max": 0.002194752509240061, "clip_ratio/high_mean": 0.0008979595404525753, "clip_ratio/low_mean": 0.001517905060609337, "clip_ratio/low_min": 0.00020412803496583365, "clip_ratio/region_mean": 0.00241586454649223, "epoch": 0.11778637220073616, "grad_norm": 0.34791383147239685, "learning_rate": 2e-07, "loss": 0.0743, "step": 1262 }, { "clip_ratio/high_max": 0.0024040390708250925, "clip_ratio/high_mean": 0.0009456051338929683, "clip_ratio/low_mean": 0.0011774161066568922, "clip_ratio/low_min": 7.642715718247928e-05, "clip_ratio/region_mean": 0.002123021251463797, "epoch": 0.11787970530073674, "grad_norm": 0.27156931161880493, "learning_rate": 2e-07, "loss": 0.015, "step": 1263 }, { "clip_ratio/high_max": 0.0021734513211413287, "clip_ratio/high_mean": 0.0009090211297007045, "clip_ratio/low_mean": 0.001424410096660722, "clip_ratio/low_min": 9.035143466462614e-05, "clip_ratio/region_mean": 0.002333431286388077, "epoch": 0.11797303840073733, "grad_norm": 0.3664061427116394, "learning_rate": 2e-07, "loss": 0.0807, "step": 1264 }, { "clip_ratio/high_max": 0.001962700673175277, "clip_ratio/high_mean": 0.0008608010120951803, "clip_ratio/low_mean": 0.0010642087181622628, "clip_ratio/low_min": 5.5411504945368506e-05, "clip_ratio/region_mean": 0.0019250096811447293, "epoch": 0.11806637150073791, "grad_norm": 0.34462690353393555, "learning_rate": 2e-07, "loss": 0.041, "step": 1265 }, { "clip_ratio/high_max": 0.0024077000343822874, "clip_ratio/high_mean": 0.0009383187498315237, "clip_ratio/low_mean": 0.00111204542190535, "clip_ratio/low_min": 8.336667633557227e-06, "clip_ratio/region_mean": 0.002050364149909001, "epoch": 0.1181597046007385, "grad_norm": 0.2464223951101303, "learning_rate": 2e-07, "loss": -0.0096, "step": 1266 }, { "clip_ratio/high_max": 0.0025267136152251624, "clip_ratio/high_mean": 0.001028686594509054, "clip_ratio/low_mean": 0.001256505973287858, "clip_ratio/low_min": 9.591557500243653e-05, "clip_ratio/region_mean": 0.002285192553244997, "epoch": 0.11825303770073908, "grad_norm": 0.269366055727005, "learning_rate": 2e-07, "loss": -0.004, "step": 1267 }, { "clip_ratio/high_max": 0.002575017075287178, "clip_ratio/high_mean": 0.001032883894367842, "clip_ratio/low_mean": 0.001359586418402614, "clip_ratio/low_min": 4.977024218533188e-05, "clip_ratio/region_mean": 0.0023924702472868375, "epoch": 0.11834637080073966, "grad_norm": 0.39204010367393494, "learning_rate": 2e-07, "loss": -0.0023, "step": 1268 }, { "clip_ratio/high_max": 0.002847917392500676, "clip_ratio/high_mean": 0.0010294341227563564, "clip_ratio/low_mean": 0.001372111619275529, "clip_ratio/low_min": 0.00013289974776853342, "clip_ratio/region_mean": 0.0024015457529458217, "epoch": 0.11843970390074025, "grad_norm": 0.4098057150840759, "learning_rate": 2e-07, "loss": 0.0185, "step": 1269 }, { "clip_ratio/high_max": 0.0024046394100878388, "clip_ratio/high_mean": 0.0008807622689346317, "clip_ratio/low_mean": 0.001354486339550931, "clip_ratio/low_min": 9.513880468148272e-05, "clip_ratio/region_mean": 0.0022352485902956687, "epoch": 0.11853303700074083, "grad_norm": 0.3981229066848755, "learning_rate": 2e-07, "loss": 0.0462, "step": 1270 }, { "clip_ratio/high_max": 0.002377386554144323, "clip_ratio/high_mean": 0.0009606256753613707, "clip_ratio/low_mean": 0.0014499417884508148, "clip_ratio/low_min": 0.00019184387747372966, "clip_ratio/region_mean": 0.0024105674601742066, "epoch": 0.11862637010074141, "grad_norm": 0.3201233744621277, "learning_rate": 2e-07, "loss": 0.0349, "step": 1271 }, { "clip_ratio/high_max": 0.0021397609780251514, "clip_ratio/high_mean": 0.000795558151367004, "clip_ratio/low_mean": 0.0012330156278039794, "clip_ratio/low_min": 6.209020284586586e-06, "clip_ratio/region_mean": 0.002028573813731782, "epoch": 0.118719703200742, "grad_norm": 0.29944702982902527, "learning_rate": 2e-07, "loss": 0.0488, "step": 1272 }, { "clip_ratio/high_max": 0.0021472515290952288, "clip_ratio/high_mean": 0.000861019795138418, "clip_ratio/low_mean": 0.0014865241682855412, "clip_ratio/low_min": 0.0002455126068525715, "clip_ratio/region_mean": 0.002347543944779318, "epoch": 0.11881303630074258, "grad_norm": 0.3404085040092468, "learning_rate": 2e-07, "loss": 0.0127, "step": 1273 }, { "clip_ratio/high_max": 0.002644624728418421, "clip_ratio/high_mean": 0.0010759932010842022, "clip_ratio/low_mean": 0.0011998535046586767, "clip_ratio/low_min": 5.289918954076711e-05, "clip_ratio/region_mean": 0.0022758467093808576, "epoch": 0.11890636940074316, "grad_norm": 0.6112203598022461, "learning_rate": 2e-07, "loss": -0.0042, "step": 1274 }, { "clip_ratio/high_max": 0.0028735673913615756, "clip_ratio/high_mean": 0.001140658583608456, "clip_ratio/low_mean": 0.0013515157279471168, "clip_ratio/low_min": 9.274819421989378e-05, "clip_ratio/region_mean": 0.0024921743315644562, "epoch": 0.11899970250074375, "grad_norm": 0.3619697093963623, "learning_rate": 2e-07, "loss": -0.0096, "step": 1275 }, { "clip_ratio/high_max": 0.002722287201322615, "clip_ratio/high_mean": 0.0010953694445561268, "clip_ratio/low_mean": 0.00137232463566761, "clip_ratio/low_min": 9.765392314875498e-05, "clip_ratio/region_mean": 0.0024676940811332315, "epoch": 0.11909303560074433, "grad_norm": 1.1415901184082031, "learning_rate": 2e-07, "loss": 0.0313, "step": 1276 }, { "clip_ratio/high_max": 0.002542464619182283, "clip_ratio/high_mean": 0.0010773795056593372, "clip_ratio/low_mean": 0.001582915418111952, "clip_ratio/low_min": 0.00014981811273173662, "clip_ratio/region_mean": 0.0026602949074003845, "epoch": 0.11918636870074492, "grad_norm": 0.4163925051689148, "learning_rate": 2e-07, "loss": 0.0532, "step": 1277 }, { "clip_ratio/high_max": 0.0024797124206088483, "clip_ratio/high_mean": 0.0009754664370120736, "clip_ratio/low_mean": 0.001676218438660726, "clip_ratio/low_min": 7.32098515072721e-05, "clip_ratio/region_mean": 0.002651684859301895, "epoch": 0.1192797018007455, "grad_norm": 0.413104385137558, "learning_rate": 2e-07, "loss": 0.0228, "step": 1278 }, { "clip_ratio/high_max": 0.00320190660568187, "clip_ratio/high_mean": 0.0012071944656781852, "clip_ratio/low_mean": 0.001631128525332315, "clip_ratio/low_min": 0.0001381863148708362, "clip_ratio/region_mean": 0.002838323031028267, "epoch": 0.11937303490074608, "grad_norm": 0.34816280007362366, "learning_rate": 2e-07, "loss": 0.0178, "step": 1279 }, { "clip_ratio/high_max": 0.003162445886118803, "clip_ratio/high_mean": 0.001232248883752618, "clip_ratio/low_mean": 0.0018059698850265704, "clip_ratio/low_min": 0.00021825372459716164, "clip_ratio/region_mean": 0.003038218754227273, "epoch": 0.11946636800074667, "grad_norm": 0.46213099360466003, "learning_rate": 2e-07, "loss": 0.0346, "step": 1280 }, { "clip_ratio/high_max": 0.033636262523941696, "clip_ratio/high_mean": 0.014187191671226174, "clip_ratio/low_mean": 0.0018158269340347033, "clip_ratio/low_min": 5.904925819777418e-05, "clip_ratio/region_mean": 0.01600301859434694, "completions/clipped_ratio": 0.023960658482142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 4091.0, "completions/mean_length": 618.6232299804688, "completions/mean_terminated_length": 533.2575073242188, "completions/min_length": 58.0, "completions/min_terminated_length": 58.0, "epoch": 0.11955970110074725, "grad_norm": 12078193.0, "learning_rate": 2e-07, "loss": 111408.4844, "num_tokens": 941270536.0, "reward": 0.5556553602218628, "reward_std": 0.17502625286579132, "rewards/simpleverify_reward/mean": 0.5556553602218628, "rewards/simpleverify_reward/std": 0.4968949556350708, "step": 1281 }, { "clip_ratio/high_max": 0.001757148675096687, "clip_ratio/high_mean": 0.0006821812876296462, "clip_ratio/low_mean": 0.0007092216746968916, "clip_ratio/low_min": 1.2659509593504481e-05, "clip_ratio/region_mean": 0.0013914029732404742, "epoch": 0.11965303420074783, "grad_norm": 0.21127934753894806, "learning_rate": 2e-07, "loss": 0.0623, "step": 1282 }, { "clip_ratio/high_max": 0.0017937738593900576, "clip_ratio/high_mean": 0.0007236842975544278, "clip_ratio/low_mean": 0.000609595124842599, "clip_ratio/low_min": 2.893673809012398e-05, "clip_ratio/region_mean": 0.001333279436948942, "epoch": 0.11974636730074842, "grad_norm": 0.19094645977020264, "learning_rate": 2e-07, "loss": 0.0149, "step": 1283 }, { "clip_ratio/high_max": 0.0017875819758046418, "clip_ratio/high_mean": 0.0006865628656669287, "clip_ratio/low_mean": 0.0005820919504913036, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001268654828891158, "epoch": 0.119839700400749, "grad_norm": 0.27259427309036255, "learning_rate": 2e-07, "loss": 0.0542, "step": 1284 }, { "clip_ratio/high_max": 0.0016040740883909166, "clip_ratio/high_mean": 0.0007093386211636243, "clip_ratio/low_mean": 0.0006890046952321427, "clip_ratio/low_min": 2.821818270604126e-05, "clip_ratio/region_mean": 0.001398343334585661, "epoch": 0.11993303350074958, "grad_norm": 0.29669323563575745, "learning_rate": 2e-07, "loss": 0.0687, "step": 1285 }, { "clip_ratio/high_max": 0.0016907315839489456, "clip_ratio/high_mean": 0.0006684055915684439, "clip_ratio/low_mean": 0.0007403020954370731, "clip_ratio/low_min": 9.851828508544713e-06, "clip_ratio/region_mean": 0.0014087076888245065, "epoch": 0.12002636660075017, "grad_norm": 0.25619158148765564, "learning_rate": 2e-07, "loss": 0.0469, "step": 1286 }, { "clip_ratio/high_max": 0.0018188443209510297, "clip_ratio/high_mean": 0.0006201109990797704, "clip_ratio/low_mean": 0.0007741618210275192, "clip_ratio/low_min": 5.69612438994227e-05, "clip_ratio/region_mean": 0.001394272840116173, "epoch": 0.12011969970075075, "grad_norm": 0.2125396877527237, "learning_rate": 2e-07, "loss": 0.0564, "step": 1287 }, { "clip_ratio/high_max": 0.002026779613515828, "clip_ratio/high_mean": 0.000803885426648776, "clip_ratio/low_mean": 0.0006704076540700044, "clip_ratio/low_min": 2.8510792617453262e-05, "clip_ratio/region_mean": 0.001474293112551095, "epoch": 0.12021303280075134, "grad_norm": 0.2173265963792801, "learning_rate": 2e-07, "loss": 0.0251, "step": 1288 }, { "clip_ratio/high_max": 0.00189265031258401, "clip_ratio/high_mean": 0.0007191644272097619, "clip_ratio/low_mean": 0.0006233749118109699, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013425393772195093, "epoch": 0.12030636590075192, "grad_norm": 0.22747761011123657, "learning_rate": 2e-07, "loss": 0.0092, "step": 1289 }, { "clip_ratio/high_max": 0.001521122070698766, "clip_ratio/high_mean": 0.0005974022587906802, "clip_ratio/low_mean": 0.000597670648858184, "clip_ratio/low_min": 6.374299391609384e-05, "clip_ratio/region_mean": 0.0011950728912779596, "epoch": 0.1203996990007525, "grad_norm": 0.270175576210022, "learning_rate": 2e-07, "loss": 0.0829, "step": 1290 }, { "clip_ratio/high_max": 0.0019364003237569705, "clip_ratio/high_mean": 0.0006787155243728193, "clip_ratio/low_mean": 0.0006841404301667353, "clip_ratio/low_min": 3.210352770111058e-05, "clip_ratio/region_mean": 0.0013628559681819752, "epoch": 0.12049303210075309, "grad_norm": 0.2402317225933075, "learning_rate": 2e-07, "loss": 0.043, "step": 1291 }, { "clip_ratio/high_max": 0.0018949249715660699, "clip_ratio/high_mean": 0.0006710470916004851, "clip_ratio/low_mean": 0.000702621329764952, "clip_ratio/low_min": 2.7315084480505902e-05, "clip_ratio/region_mean": 0.0013736684231844265, "epoch": 0.12058636520075366, "grad_norm": 0.3079681396484375, "learning_rate": 2e-07, "loss": 0.0306, "step": 1292 }, { "clip_ratio/high_max": 0.001634513377211988, "clip_ratio/high_mean": 0.000612127758358838, "clip_ratio/low_mean": 0.0007146379994082963, "clip_ratio/low_min": 4.088839705218561e-05, "clip_ratio/region_mean": 0.0013267657195683569, "epoch": 0.12067969830075424, "grad_norm": 0.23631501197814941, "learning_rate": 2e-07, "loss": 0.0818, "step": 1293 }, { "clip_ratio/high_max": 0.0020257626310922205, "clip_ratio/high_mean": 0.0007871269426686922, "clip_ratio/low_mean": 0.0007188580493675545, "clip_ratio/low_min": 4.595153313857736e-05, "clip_ratio/region_mean": 0.001505985015683109, "epoch": 0.12077303140075484, "grad_norm": 0.2371877133846283, "learning_rate": 2e-07, "loss": 0.024, "step": 1294 }, { "clip_ratio/high_max": 0.0017748676655173767, "clip_ratio/high_mean": 0.000702990719673835, "clip_ratio/low_mean": 0.0006596877556148684, "clip_ratio/low_min": 7.316324990824796e-05, "clip_ratio/region_mean": 0.0013626784493681043, "epoch": 0.12086636450075541, "grad_norm": 0.20498250424861908, "learning_rate": 2e-07, "loss": 0.0318, "step": 1295 }, { "clip_ratio/high_max": 0.0020530199035420083, "clip_ratio/high_mean": 0.0007222244967124425, "clip_ratio/low_mean": 0.0006987845181356533, "clip_ratio/low_min": 2.337336536584189e-05, "clip_ratio/region_mean": 0.0014210090121196117, "epoch": 0.12095969760075599, "grad_norm": 0.21127595007419586, "learning_rate": 2e-07, "loss": 0.0688, "step": 1296 }, { "clip_ratio/high_max": 0.0019531643301888835, "clip_ratio/high_mean": 0.0006723535625496879, "clip_ratio/low_mean": 0.0007116848373698303, "clip_ratio/low_min": 5.836957188876113e-05, "clip_ratio/region_mean": 0.0013840384199284017, "epoch": 0.12105303070075658, "grad_norm": 0.482972115278244, "learning_rate": 2e-07, "loss": 0.0362, "step": 1297 }, { "clip_ratio/high_max": 0.0019306067697471008, "clip_ratio/high_mean": 0.0007689784160902491, "clip_ratio/low_mean": 0.0006959293586987769, "clip_ratio/low_min": 3.0395374778890982e-05, "clip_ratio/region_mean": 0.00146490779297892, "epoch": 0.12114636380075716, "grad_norm": 0.31388309597969055, "learning_rate": 2e-07, "loss": 0.0877, "step": 1298 }, { "clip_ratio/high_max": 0.001762273657732294, "clip_ratio/high_mean": 0.0007248708125189296, "clip_ratio/low_mean": 0.0007124294297682354, "clip_ratio/low_min": 2.74315316346474e-05, "clip_ratio/region_mean": 0.0014373002450156491, "epoch": 0.12123969690075775, "grad_norm": 0.2976444363594055, "learning_rate": 2e-07, "loss": 0.0569, "step": 1299 }, { "clip_ratio/high_max": 0.0017979960175580345, "clip_ratio/high_mean": 0.0006309786349447677, "clip_ratio/low_mean": 0.0006848052034911234, "clip_ratio/low_min": 5.603728777714423e-05, "clip_ratio/region_mean": 0.0013157838329789229, "epoch": 0.12133303000075833, "grad_norm": 0.2182295024394989, "learning_rate": 2e-07, "loss": 0.0414, "step": 1300 }, { "clip_ratio/high_max": 0.002008143019338604, "clip_ratio/high_mean": 0.000882398520843708, "clip_ratio/low_mean": 0.0006994358900556108, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001581834385433467, "epoch": 0.12142636310075891, "grad_norm": 0.23353268206119537, "learning_rate": 2e-07, "loss": 0.0239, "step": 1301 }, { "clip_ratio/high_max": 0.00187652758177137, "clip_ratio/high_mean": 0.0008260255526693072, "clip_ratio/low_mean": 0.0007375657824013615, "clip_ratio/low_min": 2.817170570779126e-05, "clip_ratio/region_mean": 0.001563591322337743, "epoch": 0.1215196962007595, "grad_norm": 0.2685752809047699, "learning_rate": 2e-07, "loss": 0.0636, "step": 1302 }, { "clip_ratio/high_max": 0.0020067645600647666, "clip_ratio/high_mean": 0.0007231156814668793, "clip_ratio/low_mean": 0.0008120148031593999, "clip_ratio/low_min": 7.850659039831953e-05, "clip_ratio/region_mean": 0.0015351305082731415, "epoch": 0.12161302930076008, "grad_norm": 1.0086857080459595, "learning_rate": 2e-07, "loss": 0.0683, "step": 1303 }, { "clip_ratio/high_max": 0.002134534122887999, "clip_ratio/high_mean": 0.0007982306960911956, "clip_ratio/low_mean": 0.0007991269303602166, "clip_ratio/low_min": 1.661350324866362e-05, "clip_ratio/region_mean": 0.0015973576228134334, "epoch": 0.12170636240076066, "grad_norm": 0.31662291288375854, "learning_rate": 2e-07, "loss": 0.0489, "step": 1304 }, { "clip_ratio/high_max": 0.0017364988234476186, "clip_ratio/high_mean": 0.0006627386810578173, "clip_ratio/low_mean": 0.0008237217953137588, "clip_ratio/low_min": 1.7952032067114487e-05, "clip_ratio/region_mean": 0.001486460467276629, "epoch": 0.12179969550076125, "grad_norm": 0.38843637704849243, "learning_rate": 2e-07, "loss": 0.0372, "step": 1305 }, { "clip_ratio/high_max": 0.0018198121397290379, "clip_ratio/high_mean": 0.0007805256609572098, "clip_ratio/low_mean": 0.0006921924505149946, "clip_ratio/low_min": 5.5522172260680236e-05, "clip_ratio/region_mean": 0.0014727181296620984, "epoch": 0.12189302860076183, "grad_norm": 0.2285003960132599, "learning_rate": 2e-07, "loss": 0.0124, "step": 1306 }, { "clip_ratio/high_max": 0.0022814342664787546, "clip_ratio/high_mean": 0.0008140117661241675, "clip_ratio/low_mean": 0.000778165711381007, "clip_ratio/low_min": 4.254353279975476e-05, "clip_ratio/region_mean": 0.0015921774502203334, "epoch": 0.12198636170076241, "grad_norm": 0.3073002099990845, "learning_rate": 2e-07, "loss": 0.0902, "step": 1307 }, { "clip_ratio/high_max": 0.0016679491782269906, "clip_ratio/high_mean": 0.0007282405331352493, "clip_ratio/low_mean": 0.0008117576289805584, "clip_ratio/low_min": 3.0046589927223977e-05, "clip_ratio/region_mean": 0.0015399981566588394, "epoch": 0.122079694800763, "grad_norm": 0.39821913838386536, "learning_rate": 2e-07, "loss": 0.0127, "step": 1308 }, { "clip_ratio/high_max": 0.00206142363458639, "clip_ratio/high_mean": 0.000798316777945729, "clip_ratio/low_mean": 0.0007695334861637093, "clip_ratio/low_min": 4.798694862984121e-05, "clip_ratio/region_mean": 0.0015678502168157138, "epoch": 0.12217302790076358, "grad_norm": 0.2792995870113373, "learning_rate": 2e-07, "loss": 0.0682, "step": 1309 }, { "clip_ratio/high_max": 0.0016825979073473718, "clip_ratio/high_mean": 0.0006769990586690255, "clip_ratio/low_mean": 0.000753710453864187, "clip_ratio/low_min": 4.154892667429522e-05, "clip_ratio/region_mean": 0.0014307094788819086, "epoch": 0.12226636100076417, "grad_norm": 0.2524963319301605, "learning_rate": 2e-07, "loss": 0.0337, "step": 1310 }, { "clip_ratio/high_max": 0.0021563444533967413, "clip_ratio/high_mean": 0.0007952545129228383, "clip_ratio/low_mean": 0.0008033812755456893, "clip_ratio/low_min": 2.5028905838553328e-05, "clip_ratio/region_mean": 0.001598635732079856, "epoch": 0.12235969410076475, "grad_norm": 0.3603135943412781, "learning_rate": 2e-07, "loss": -0.004, "step": 1311 }, { "clip_ratio/high_max": 0.0016751332441344857, "clip_ratio/high_mean": 0.0007291154070117045, "clip_ratio/low_mean": 0.0008382637497561518, "clip_ratio/low_min": 7.451120836776681e-06, "clip_ratio/region_mean": 0.0015673791676817928, "epoch": 0.12245302720076533, "grad_norm": 0.2564535439014435, "learning_rate": 2e-07, "loss": 0.052, "step": 1312 }, { "clip_ratio/high_max": 0.0019843452391796745, "clip_ratio/high_mean": 0.0007824366985005327, "clip_ratio/low_mean": 0.0008390289222006686, "clip_ratio/low_min": 5.71707514609443e-05, "clip_ratio/region_mean": 0.0016214656279771589, "epoch": 0.12254636030076592, "grad_norm": 0.27296802401542664, "learning_rate": 2e-07, "loss": 0.0429, "step": 1313 }, { "clip_ratio/high_max": 0.0018787092158163432, "clip_ratio/high_mean": 0.0006905629634275101, "clip_ratio/low_mean": 0.0007322320434468566, "clip_ratio/low_min": 7.550461941718822e-05, "clip_ratio/region_mean": 0.0014227949868654832, "epoch": 0.1226396934007665, "grad_norm": 0.242542564868927, "learning_rate": 2e-07, "loss": 0.0259, "step": 1314 }, { "clip_ratio/high_max": 0.001954390791070182, "clip_ratio/high_mean": 0.0008253508317466185, "clip_ratio/low_mean": 0.0008160023307937081, "clip_ratio/low_min": 3.258050764998188e-05, "clip_ratio/region_mean": 0.0016413531775469892, "epoch": 0.12273302650076708, "grad_norm": 52.56976318359375, "learning_rate": 2e-07, "loss": 0.0498, "step": 1315 }, { "clip_ratio/high_max": 0.0020414851533132605, "clip_ratio/high_mean": 0.000825727091068984, "clip_ratio/low_mean": 0.0009527973597869277, "clip_ratio/low_min": 6.130089877842693e-05, "clip_ratio/region_mean": 0.00177852445631288, "epoch": 0.12282635960076767, "grad_norm": 0.27530622482299805, "learning_rate": 2e-07, "loss": 0.0656, "step": 1316 }, { "clip_ratio/high_max": 0.001803757742891321, "clip_ratio/high_mean": 0.0007136169006116688, "clip_ratio/low_mean": 0.0009263413157896139, "clip_ratio/low_min": 5.682919800165109e-05, "clip_ratio/region_mean": 0.0016399581872974522, "epoch": 0.12291969270076825, "grad_norm": 0.28610602021217346, "learning_rate": 2e-07, "loss": 0.0514, "step": 1317 }, { "clip_ratio/high_max": 0.002272968915349338, "clip_ratio/high_mean": 0.0009091477677429793, "clip_ratio/low_mean": 0.0009976486107916571, "clip_ratio/low_min": 2.2899364921613596e-05, "clip_ratio/region_mean": 0.0019067964021814987, "epoch": 0.12301302580076884, "grad_norm": 0.281762957572937, "learning_rate": 2e-07, "loss": 0.0432, "step": 1318 }, { "clip_ratio/high_max": 0.0017071304646378849, "clip_ratio/high_mean": 0.0006421457019314403, "clip_ratio/low_mean": 0.0009136016069533071, "clip_ratio/low_min": 3.69376484741224e-05, "clip_ratio/region_mean": 0.001555747276142938, "epoch": 0.12310635890076942, "grad_norm": 0.4166424870491028, "learning_rate": 2e-07, "loss": 0.0907, "step": 1319 }, { "clip_ratio/high_max": 0.0020416418483364396, "clip_ratio/high_mean": 0.0008372932934435084, "clip_ratio/low_mean": 0.0008271236092696199, "clip_ratio/low_min": 7.730295692454092e-05, "clip_ratio/region_mean": 0.0016644168790662661, "epoch": 0.12319969200077, "grad_norm": 0.7874302864074707, "learning_rate": 2e-07, "loss": 0.0309, "step": 1320 }, { "clip_ratio/high_max": 0.002167759506846778, "clip_ratio/high_mean": 0.0007454313472408103, "clip_ratio/low_mean": 0.0009587386248313123, "clip_ratio/low_min": 1.3522284461942036e-05, "clip_ratio/region_mean": 0.0017041699720721226, "epoch": 0.12329302510077059, "grad_norm": 0.29603636264801025, "learning_rate": 2e-07, "loss": 0.0526, "step": 1321 }, { "clip_ratio/high_max": 0.0022808557314419886, "clip_ratio/high_mean": 0.0008713424695088179, "clip_ratio/low_mean": 0.0009923141551553272, "clip_ratio/low_min": 9.941825192072429e-05, "clip_ratio/region_mean": 0.0018636566055647563, "epoch": 0.12338635820077117, "grad_norm": 0.26004448533058167, "learning_rate": 2e-07, "loss": 0.0379, "step": 1322 }, { "clip_ratio/high_max": 0.0017804318013077136, "clip_ratio/high_mean": 0.0007413897074002307, "clip_ratio/low_mean": 0.0008614507332822541, "clip_ratio/low_min": 6.583530102943769e-05, "clip_ratio/region_mean": 0.001602840427949559, "epoch": 0.12347969130077174, "grad_norm": 0.25743335485458374, "learning_rate": 2e-07, "loss": 0.0132, "step": 1323 }, { "clip_ratio/high_max": 0.001918955189466942, "clip_ratio/high_mean": 0.0007236880646814825, "clip_ratio/low_mean": 0.0008195133468689164, "clip_ratio/low_min": 7.284461116796592e-05, "clip_ratio/region_mean": 0.001543201411550399, "epoch": 0.12357302440077234, "grad_norm": 0.35232871770858765, "learning_rate": 2e-07, "loss": 0.0933, "step": 1324 }, { "clip_ratio/high_max": 0.0019627074943855405, "clip_ratio/high_mean": 0.0006931725320100668, "clip_ratio/low_mean": 0.0008815954006422544, "clip_ratio/low_min": 2.658820267242845e-05, "clip_ratio/region_mean": 0.0015747679935884662, "epoch": 0.12366635750077291, "grad_norm": 0.3551845848560333, "learning_rate": 2e-07, "loss": 0.0459, "step": 1325 }, { "clip_ratio/high_max": 0.002006488408369478, "clip_ratio/high_mean": 0.0008612518067820929, "clip_ratio/low_mean": 0.0009049921827681828, "clip_ratio/low_min": 3.9592043322045356e-05, "clip_ratio/region_mean": 0.001766243985912297, "epoch": 0.12375969060077349, "grad_norm": 0.5212864279747009, "learning_rate": 2e-07, "loss": -0.0171, "step": 1326 }, { "clip_ratio/high_max": 0.002028292259637965, "clip_ratio/high_mean": 0.0008103689579002094, "clip_ratio/low_mean": 0.00113118836452486, "clip_ratio/low_min": 0.00011083176559623098, "clip_ratio/region_mean": 0.0019415573624428362, "epoch": 0.12385302370077408, "grad_norm": 0.35676780343055725, "learning_rate": 2e-07, "loss": 0.0789, "step": 1327 }, { "clip_ratio/high_max": 0.0018600714101921767, "clip_ratio/high_mean": 0.0008191145170712844, "clip_ratio/low_mean": 0.0011517696802911814, "clip_ratio/low_min": 0.00016133240569615737, "clip_ratio/region_mean": 0.0019708842373802327, "epoch": 0.12394635680077466, "grad_norm": 0.2637704014778137, "learning_rate": 2e-07, "loss": 0.0628, "step": 1328 }, { "clip_ratio/high_max": 0.0020112660276936367, "clip_ratio/high_mean": 0.0008812366268102778, "clip_ratio/low_mean": 0.0009420038386451779, "clip_ratio/low_min": 4.4494761823443696e-05, "clip_ratio/region_mean": 0.0018232404254376888, "epoch": 0.12403968990077525, "grad_norm": 0.7555397748947144, "learning_rate": 2e-07, "loss": 0.0629, "step": 1329 }, { "clip_ratio/high_max": 0.0023849813660490327, "clip_ratio/high_mean": 0.0008650260024296585, "clip_ratio/low_mean": 0.0010616773943183944, "clip_ratio/low_min": 5.0489512432250194e-05, "clip_ratio/region_mean": 0.0019267033858341165, "epoch": 0.12413302300077583, "grad_norm": 0.4563961625099182, "learning_rate": 2e-07, "loss": 0.0068, "step": 1330 }, { "clip_ratio/high_max": 0.001777242447133176, "clip_ratio/high_mean": 0.0007844591327739181, "clip_ratio/low_mean": 0.0011538661765371216, "clip_ratio/low_min": 3.338774877192918e-05, "clip_ratio/region_mean": 0.0019383253093110397, "epoch": 0.12422635610077641, "grad_norm": 1.0166248083114624, "learning_rate": 2e-07, "loss": 0.079, "step": 1331 }, { "clip_ratio/high_max": 0.002384343446465209, "clip_ratio/high_mean": 0.0009008032575366087, "clip_ratio/low_mean": 0.0011397356411180226, "clip_ratio/low_min": 3.090169866482029e-05, "clip_ratio/region_mean": 0.002040538915025536, "epoch": 0.124319689200777, "grad_norm": 0.4668191373348236, "learning_rate": 2e-07, "loss": 0.0284, "step": 1332 }, { "clip_ratio/high_max": 0.0021127900181454606, "clip_ratio/high_mean": 0.0009407284278495354, "clip_ratio/low_mean": 0.0012498247488110792, "clip_ratio/low_min": 0.00013673216199094895, "clip_ratio/region_mean": 0.00219055317575112, "epoch": 0.12441302230077758, "grad_norm": 0.9345513582229614, "learning_rate": 2e-07, "loss": 0.0425, "step": 1333 }, { "clip_ratio/high_max": 0.0015578489073959645, "clip_ratio/high_mean": 0.0005979709985695081, "clip_ratio/low_mean": 0.0013884701220376883, "clip_ratio/low_min": 5.219834201852791e-05, "clip_ratio/region_mean": 0.001986441100598313, "epoch": 0.12450635540077816, "grad_norm": 0.3814065754413605, "learning_rate": 2e-07, "loss": 0.075, "step": 1334 }, { "clip_ratio/high_max": 0.002161230044293916, "clip_ratio/high_mean": 0.0008774659618211444, "clip_ratio/low_mean": 0.0011680799834721256, "clip_ratio/low_min": 2.9156085474824067e-05, "clip_ratio/region_mean": 0.00204554594529327, "epoch": 0.12459968850077875, "grad_norm": 0.2788899540901184, "learning_rate": 2e-07, "loss": 0.0157, "step": 1335 }, { "clip_ratio/high_max": 0.0023304426431423053, "clip_ratio/high_mean": 0.0009230548748746514, "clip_ratio/low_mean": 0.0012638118332688464, "clip_ratio/low_min": 7.977026143635157e-06, "clip_ratio/region_mean": 0.0021868667245144024, "epoch": 0.12469302160077933, "grad_norm": 0.41839393973350525, "learning_rate": 2e-07, "loss": 0.0353, "step": 1336 }, { "clip_ratio/high_max": 0.0025382133026141673, "clip_ratio/high_mean": 0.0009690013430372346, "clip_ratio/low_mean": 0.0013258234248496592, "clip_ratio/low_min": 8.433274069830077e-05, "clip_ratio/region_mean": 0.0022948247205931693, "epoch": 0.12478635470077991, "grad_norm": 0.3450069725513458, "learning_rate": 2e-07, "loss": 0.0525, "step": 1337 }, { "clip_ratio/high_max": 0.002026578578806948, "clip_ratio/high_mean": 0.0007255108630488394, "clip_ratio/low_mean": 0.0010088756753248163, "clip_ratio/low_min": 5.659642101818463e-05, "clip_ratio/region_mean": 0.0017343865474686027, "epoch": 0.1248796878007805, "grad_norm": 0.5262999534606934, "learning_rate": 2e-07, "loss": 0.0497, "step": 1338 }, { "clip_ratio/high_max": 0.002392732127191266, "clip_ratio/high_mean": 0.0010224842626485042, "clip_ratio/low_mean": 0.0014065424620639533, "clip_ratio/low_min": 0.00013383975601755083, "clip_ratio/region_mean": 0.0024290266883326694, "epoch": 0.12497302090078108, "grad_norm": 1.024769902229309, "learning_rate": 2e-07, "loss": 0.0316, "step": 1339 }, { "clip_ratio/high_max": 0.002354182070121169, "clip_ratio/high_mean": 0.0009948725783033296, "clip_ratio/low_mean": 0.0011815804355137516, "clip_ratio/low_min": 9.019901244755602e-05, "clip_ratio/region_mean": 0.002176452981075272, "epoch": 0.12506635400078167, "grad_norm": 0.6699047684669495, "learning_rate": 2e-07, "loss": 0.0049, "step": 1340 }, { "clip_ratio/high_max": 0.0023776668531354517, "clip_ratio/high_mean": 0.0010117283309227787, "clip_ratio/low_mean": 0.0012840334529755637, "clip_ratio/low_min": 5.5601229178137146e-05, "clip_ratio/region_mean": 0.0022957617911743, "epoch": 0.12515968710078224, "grad_norm": 0.38643139600753784, "learning_rate": 2e-07, "loss": 0.0573, "step": 1341 }, { "clip_ratio/high_max": 0.0025596788618713617, "clip_ratio/high_mean": 0.0009837788238655776, "clip_ratio/low_mean": 0.0014672048382635694, "clip_ratio/low_min": 0.00016191880968108308, "clip_ratio/region_mean": 0.00245098362211138, "epoch": 0.12525302020078283, "grad_norm": 0.3206682503223419, "learning_rate": 2e-07, "loss": 0.0524, "step": 1342 }, { "clip_ratio/high_max": 0.002239757825009292, "clip_ratio/high_mean": 0.0009277489261876326, "clip_ratio/low_mean": 0.0013906829844927415, "clip_ratio/low_min": 0.00010912384095718153, "clip_ratio/region_mean": 0.002318431928870268, "epoch": 0.12534635330078342, "grad_norm": 0.8834324479103088, "learning_rate": 2e-07, "loss": 0.0341, "step": 1343 }, { "clip_ratio/high_max": 0.002785747972666286, "clip_ratio/high_mean": 0.0010081149157485925, "clip_ratio/low_mean": 0.0013623956583614927, "clip_ratio/low_min": 0.0002735084399319021, "clip_ratio/region_mean": 0.002370510555920191, "epoch": 0.125439686400784, "grad_norm": 0.3635942339897156, "learning_rate": 2e-07, "loss": 0.0492, "step": 1344 }, { "clip_ratio/high_max": 0.002569894986663712, "clip_ratio/high_mean": 0.0010378716942796018, "clip_ratio/low_mean": 0.0013031043745286297, "clip_ratio/low_min": 8.553877796657616e-05, "clip_ratio/region_mean": 0.0023409760906361043, "epoch": 0.12553301950078458, "grad_norm": 0.32739266753196716, "learning_rate": 2e-07, "loss": 0.0279, "step": 1345 }, { "clip_ratio/high_max": 0.0027577963046496734, "clip_ratio/high_mean": 0.0010262937685183715, "clip_ratio/low_mean": 0.0014175027827150188, "clip_ratio/low_min": 0.00010162354737985879, "clip_ratio/region_mean": 0.0024437965330434963, "epoch": 0.12562635260078517, "grad_norm": 0.450640469789505, "learning_rate": 2e-07, "loss": 0.0452, "step": 1346 }, { "clip_ratio/high_max": 0.002508274221327156, "clip_ratio/high_mean": 0.0010059725154860644, "clip_ratio/low_mean": 0.0016545239413972013, "clip_ratio/low_min": 7.452976387867238e-05, "clip_ratio/region_mean": 0.002660496385942679, "epoch": 0.12571968570078576, "grad_norm": 1.7429561614990234, "learning_rate": 2e-07, "loss": 0.0294, "step": 1347 }, { "clip_ratio/high_max": 0.00264312639774289, "clip_ratio/high_mean": 0.0009296898970205802, "clip_ratio/low_mean": 0.0015441433060914278, "clip_ratio/low_min": 0.00011590816029638518, "clip_ratio/region_mean": 0.0024738331776461564, "epoch": 0.12581301880078632, "grad_norm": 0.6041150689125061, "learning_rate": 2e-07, "loss": 0.0577, "step": 1348 }, { "clip_ratio/high_max": 0.002590553674963303, "clip_ratio/high_mean": 0.0010728382439992856, "clip_ratio/low_mean": 0.0017656338022788987, "clip_ratio/low_min": 1.8360751710133627e-05, "clip_ratio/region_mean": 0.0028384720280882902, "epoch": 0.12590635190078692, "grad_norm": 0.4069293439388275, "learning_rate": 2e-07, "loss": 0.0533, "step": 1349 }, { "clip_ratio/high_max": 0.0026527065710979514, "clip_ratio/high_mean": 0.0009589272431185236, "clip_ratio/low_mean": 0.0018918638124887366, "clip_ratio/low_min": 0.00012639217311516404, "clip_ratio/region_mean": 0.0028507910174084827, "epoch": 0.1259996850007875, "grad_norm": 0.6421383619308472, "learning_rate": 2e-07, "loss": 0.0635, "step": 1350 }, { "clip_ratio/high_max": 0.002398975771939149, "clip_ratio/high_mean": 0.0009691734758234816, "clip_ratio/low_mean": 0.0017611170187592506, "clip_ratio/low_min": 9.336669972981326e-05, "clip_ratio/region_mean": 0.002730290485487785, "epoch": 0.12609301810078807, "grad_norm": 0.4027394652366638, "learning_rate": 2e-07, "loss": 0.0323, "step": 1351 }, { "clip_ratio/high_max": 0.002671590023965109, "clip_ratio/high_mean": 0.0011251056821492966, "clip_ratio/low_mean": 0.0017431940541428048, "clip_ratio/low_min": 3.889718573191203e-05, "clip_ratio/region_mean": 0.002868299765395932, "epoch": 0.12618635120078867, "grad_norm": 0.47086450457572937, "learning_rate": 2e-07, "loss": 0.0264, "step": 1352 }, { "clip_ratio/high_max": 0.00303544523922028, "clip_ratio/high_mean": 0.0010380452295066789, "clip_ratio/low_mean": 0.0019605745728767943, "clip_ratio/low_min": 0.00022377767345460597, "clip_ratio/region_mean": 0.0029986197550897487, "epoch": 0.12627968430078926, "grad_norm": 0.42231351137161255, "learning_rate": 2e-07, "loss": 0.0537, "step": 1353 }, { "clip_ratio/high_max": 0.002713036992645357, "clip_ratio/high_mean": 0.0010575264968792908, "clip_ratio/low_mean": 0.002031062562309671, "clip_ratio/low_min": 0.0001831213776313234, "clip_ratio/region_mean": 0.0030885890882927924, "epoch": 0.12637301740078982, "grad_norm": 2.319950580596924, "learning_rate": 2e-07, "loss": 0.0481, "step": 1354 }, { "clip_ratio/high_max": 0.0026042971658171155, "clip_ratio/high_mean": 0.0010055225247924682, "clip_ratio/low_mean": 0.0022166963171912357, "clip_ratio/low_min": 0.00016186922948691063, "clip_ratio/region_mean": 0.003222218860173598, "epoch": 0.12646635050079041, "grad_norm": 0.38973361253738403, "learning_rate": 2e-07, "loss": 0.0659, "step": 1355 }, { "clip_ratio/high_max": 0.0028644218400586396, "clip_ratio/high_mean": 0.0010173107475566212, "clip_ratio/low_mean": 0.0021601985426968895, "clip_ratio/low_min": 0.00021948090397927444, "clip_ratio/region_mean": 0.0031775092938914895, "epoch": 0.126559683600791, "grad_norm": 0.5260111093521118, "learning_rate": 2e-07, "loss": 0.0618, "step": 1356 }, { "clip_ratio/high_max": 0.002534827624913305, "clip_ratio/high_mean": 0.0010179266955674393, "clip_ratio/low_mean": 0.0020294062451284844, "clip_ratio/low_min": 0.00017540973567520268, "clip_ratio/region_mean": 0.003047332924325019, "epoch": 0.12665301670079157, "grad_norm": 0.36452606320381165, "learning_rate": 2e-07, "loss": 0.0421, "step": 1357 }, { "clip_ratio/high_max": 0.003283579324488528, "clip_ratio/high_mean": 0.0011979314840573352, "clip_ratio/low_mean": 0.002413402657111874, "clip_ratio/low_min": 0.00010176395153393969, "clip_ratio/region_mean": 0.0036113340611336753, "epoch": 0.12674634980079216, "grad_norm": 0.6362717747688293, "learning_rate": 2e-07, "loss": 0.0543, "step": 1358 }, { "clip_ratio/high_max": 0.0031596681365044788, "clip_ratio/high_mean": 0.0012051305529894307, "clip_ratio/low_mean": 0.0023653525022382382, "clip_ratio/low_min": 0.00023573484213557094, "clip_ratio/region_mean": 0.0035704830297618173, "epoch": 0.12683968290079276, "grad_norm": 0.4454534649848938, "learning_rate": 2e-07, "loss": 0.0408, "step": 1359 }, { "clip_ratio/high_max": 0.002676119329407811, "clip_ratio/high_mean": 0.001154763331214781, "clip_ratio/low_mean": 0.0020823837694479153, "clip_ratio/low_min": 0.0002069328875222709, "clip_ratio/region_mean": 0.0032371472334489226, "epoch": 0.12693301600079332, "grad_norm": 0.5644140839576721, "learning_rate": 2e-07, "loss": 0.0541, "step": 1360 }, { "clip_ratio/high_max": 0.0027585992866079323, "clip_ratio/high_mean": 0.0011558113146747928, "clip_ratio/low_mean": 0.002482581745425705, "clip_ratio/low_min": 0.00015319200792873744, "clip_ratio/region_mean": 0.003638393129222095, "epoch": 0.1270263491007939, "grad_norm": 0.8527132868766785, "learning_rate": 2e-07, "loss": 0.0672, "step": 1361 }, { "clip_ratio/high_max": 0.003196263212885242, "clip_ratio/high_mean": 0.0011651565582724288, "clip_ratio/low_mean": 0.0023582407520734705, "clip_ratio/low_min": 0.0002619697497721063, "clip_ratio/region_mean": 0.003523397332173772, "epoch": 0.1271196822007945, "grad_norm": 0.5256268382072449, "learning_rate": 2e-07, "loss": 0.0138, "step": 1362 }, { "clip_ratio/high_max": 0.003100650050328113, "clip_ratio/high_mean": 0.0013356989111343864, "clip_ratio/low_mean": 0.0027006478048861027, "clip_ratio/low_min": 0.00011838667705887929, "clip_ratio/region_mean": 0.004036346756038256, "epoch": 0.12721301530079507, "grad_norm": 2.531118154525757, "learning_rate": 2e-07, "loss": 0.0278, "step": 1363 }, { "clip_ratio/high_max": 0.0028732038917951286, "clip_ratio/high_mean": 0.00128830639005173, "clip_ratio/low_mean": 0.002406457300821785, "clip_ratio/low_min": 0.00011435964916017838, "clip_ratio/region_mean": 0.0036947636399418116, "epoch": 0.12730634840079566, "grad_norm": 1.0263330936431885, "learning_rate": 2e-07, "loss": 0.0556, "step": 1364 }, { "clip_ratio/high_max": 0.0031363028538180515, "clip_ratio/high_mean": 0.0012643065929296426, "clip_ratio/low_mean": 0.0026590454654069617, "clip_ratio/low_min": 0.0005984568597341422, "clip_ratio/region_mean": 0.0039233520947163925, "epoch": 0.12739968150079625, "grad_norm": 3.830597400665283, "learning_rate": 2e-07, "loss": 0.054, "step": 1365 }, { "clip_ratio/high_max": 0.0027876336098415777, "clip_ratio/high_mean": 0.0011439816480560694, "clip_ratio/low_mean": 0.002571844703197712, "clip_ratio/low_min": 0.00014298139285529032, "clip_ratio/region_mean": 0.0037158263367018662, "epoch": 0.12749301460079684, "grad_norm": 114.3066635131836, "learning_rate": 2e-07, "loss": 0.048, "step": 1366 }, { "clip_ratio/high_max": 0.0028859059239039198, "clip_ratio/high_mean": 0.0013933537738921586, "clip_ratio/low_mean": 0.002734703251917381, "clip_ratio/low_min": 0.0004924963286612183, "clip_ratio/region_mean": 0.004128057087655179, "epoch": 0.1275863477007974, "grad_norm": 0.9545043706893921, "learning_rate": 2e-07, "loss": 0.0823, "step": 1367 }, { "clip_ratio/high_max": 0.0029186317697167397, "clip_ratio/high_mean": 0.0011734642102965154, "clip_ratio/low_mean": 0.0031680052561569028, "clip_ratio/low_min": 0.00021352353905967902, "clip_ratio/region_mean": 0.0043414695537649095, "epoch": 0.127679680800798, "grad_norm": 1.8541851043701172, "learning_rate": 2e-07, "loss": 0.0597, "step": 1368 }, { "clip_ratio/high_max": 0.0030309347494039685, "clip_ratio/high_mean": 0.0012215992210258264, "clip_ratio/low_mean": 0.002775567460048478, "clip_ratio/low_min": 0.0001850656008173246, "clip_ratio/region_mean": 0.003997166626504622, "epoch": 0.1277730139007986, "grad_norm": 0.642250657081604, "learning_rate": 2e-07, "loss": 0.0445, "step": 1369 }, { "clip_ratio/high_max": 0.003637999834609218, "clip_ratio/high_mean": 0.0015648667322238907, "clip_ratio/low_mean": 0.002479783237504307, "clip_ratio/low_min": 7.660453411517665e-05, "clip_ratio/region_mean": 0.004044649889692664, "epoch": 0.12786634700079916, "grad_norm": 0.5624629259109497, "learning_rate": 2e-07, "loss": 0.0314, "step": 1370 }, { "clip_ratio/high_max": 0.0035111106481053866, "clip_ratio/high_mean": 0.0016173406402231194, "clip_ratio/low_mean": 0.0027622288107522763, "clip_ratio/low_min": 0.00010209899664914701, "clip_ratio/region_mean": 0.004379569450975396, "epoch": 0.12795968010079975, "grad_norm": 3.477973461151123, "learning_rate": 2e-07, "loss": 0.0348, "step": 1371 }, { "clip_ratio/high_max": 0.003616042355133686, "clip_ratio/high_mean": 0.0014225406957848463, "clip_ratio/low_mean": 0.0027355257770977914, "clip_ratio/low_min": 0.00018423074652673677, "clip_ratio/region_mean": 0.004158066381933168, "epoch": 0.12805301320080034, "grad_norm": 0.48430582880973816, "learning_rate": 2e-07, "loss": 0.0282, "step": 1372 }, { "clip_ratio/high_max": 0.0038695161347277462, "clip_ratio/high_mean": 0.0016061990827438422, "clip_ratio/low_mean": 0.0022497429526993074, "clip_ratio/low_min": 6.129045323177706e-05, "clip_ratio/region_mean": 0.0038559419626835734, "epoch": 0.1281463463008009, "grad_norm": 1.27579927444458, "learning_rate": 2e-07, "loss": -0.0089, "step": 1373 }, { "clip_ratio/high_max": 0.0038353272393578663, "clip_ratio/high_mean": 0.0014430596747843083, "clip_ratio/low_mean": 0.0030794607591815293, "clip_ratio/low_min": 0.0002923455394920893, "clip_ratio/region_mean": 0.004522520466707647, "epoch": 0.1282396794008015, "grad_norm": 0.6043083667755127, "learning_rate": 2e-07, "loss": 0.0341, "step": 1374 }, { "clip_ratio/high_max": 0.0038286989874904975, "clip_ratio/high_mean": 0.0016365228038921487, "clip_ratio/low_mean": 0.0029189629494794644, "clip_ratio/low_min": 0.00018754915436147712, "clip_ratio/region_mean": 0.004555485749733634, "epoch": 0.1283330125008021, "grad_norm": 1.2715624570846558, "learning_rate": 2e-07, "loss": 0.0424, "step": 1375 }, { "clip_ratio/high_max": 0.003996309489593841, "clip_ratio/high_mean": 0.001487261146394303, "clip_ratio/low_mean": 0.003512060757202562, "clip_ratio/low_min": 0.00027828013844555244, "clip_ratio/region_mean": 0.004999321841751225, "epoch": 0.12842634560080265, "grad_norm": 1.0847305059432983, "learning_rate": 2e-07, "loss": 0.0616, "step": 1376 }, { "clip_ratio/high_max": 0.003426998940994963, "clip_ratio/high_mean": 0.0015868897462496534, "clip_ratio/low_mean": 0.003159518222673796, "clip_ratio/low_min": 0.0004187214744888479, "clip_ratio/region_mean": 0.004746407925267704, "epoch": 0.12851967870080325, "grad_norm": 0.8347870111465454, "learning_rate": 2e-07, "loss": 0.0532, "step": 1377 }, { "clip_ratio/high_max": 0.00401118399167899, "clip_ratio/high_mean": 0.0015423135591845494, "clip_ratio/low_mean": 0.003695669034641469, "clip_ratio/low_min": 0.0002078050238196738, "clip_ratio/region_mean": 0.005237982579274103, "epoch": 0.12861301180080384, "grad_norm": 7864.328125, "learning_rate": 2e-07, "loss": 0.2554, "step": 1378 }, { "clip_ratio/high_max": 0.0034559860978333745, "clip_ratio/high_mean": 0.0013092727658658987, "clip_ratio/low_mean": 0.0035334619824425317, "clip_ratio/low_min": 0.00045110837527317926, "clip_ratio/region_mean": 0.004842734691919759, "epoch": 0.1287063449008044, "grad_norm": 0.6829981207847595, "learning_rate": 2e-07, "loss": 0.06, "step": 1379 }, { "clip_ratio/high_max": 0.003859608063066844, "clip_ratio/high_mean": 0.0016222514350374695, "clip_ratio/low_mean": 0.00332366850489052, "clip_ratio/low_min": 0.0002849134907592088, "clip_ratio/region_mean": 0.004945919936290011, "epoch": 0.128799678000805, "grad_norm": 0.559385359287262, "learning_rate": 2e-07, "loss": 0.0263, "step": 1380 }, { "clip_ratio/high_max": 0.003397942375158891, "clip_ratio/high_mean": 0.001415162165358197, "clip_ratio/low_mean": 0.0032363546233682428, "clip_ratio/low_min": 0.0003124932190985419, "clip_ratio/region_mean": 0.004651516763260588, "epoch": 0.1288930111008056, "grad_norm": 0.9944037795066833, "learning_rate": 2e-07, "loss": 0.0614, "step": 1381 }, { "clip_ratio/high_max": 0.004276193867553957, "clip_ratio/high_mean": 0.0017224744697159622, "clip_ratio/low_mean": 0.003566123719792813, "clip_ratio/low_min": 0.0003244434774387628, "clip_ratio/region_mean": 0.005288598273182288, "epoch": 0.12898634420080615, "grad_norm": 1.682273507118225, "learning_rate": 2e-07, "loss": 0.0242, "step": 1382 }, { "clip_ratio/high_max": 0.0033961466906475835, "clip_ratio/high_mean": 0.0016357606509700418, "clip_ratio/low_mean": 0.003744128203834407, "clip_ratio/low_min": 0.0001531709531263914, "clip_ratio/region_mean": 0.005379888927564025, "epoch": 0.12907967730080674, "grad_norm": 0.7601098418235779, "learning_rate": 2e-07, "loss": 0.0208, "step": 1383 }, { "clip_ratio/high_max": 0.0040022009634412825, "clip_ratio/high_mean": 0.0016266211096080951, "clip_ratio/low_mean": 0.003614822155213915, "clip_ratio/low_min": 7.809624366927892e-05, "clip_ratio/region_mean": 0.005241443199338391, "epoch": 0.12917301040080734, "grad_norm": 1.0077459812164307, "learning_rate": 2e-07, "loss": 0.0411, "step": 1384 }, { "clip_ratio/high_max": 0.0045463819114957005, "clip_ratio/high_mean": 0.0019163090983056463, "clip_ratio/low_mean": 0.003707854644744657, "clip_ratio/low_min": 0.00029892163365730084, "clip_ratio/region_mean": 0.005624163881293498, "epoch": 0.1292663435008079, "grad_norm": 1.0528879165649414, "learning_rate": 2e-07, "loss": 0.0136, "step": 1385 }, { "clip_ratio/high_max": 0.004596721177222207, "clip_ratio/high_mean": 0.0019079614685324486, "clip_ratio/low_mean": 0.0037388546697911806, "clip_ratio/low_min": 0.0002499071379133966, "clip_ratio/region_mean": 0.005646816221997142, "epoch": 0.1293596766008085, "grad_norm": 0.7254329919815063, "learning_rate": 2e-07, "loss": 0.0007, "step": 1386 }, { "clip_ratio/high_max": 0.0046409419883275405, "clip_ratio/high_mean": 0.00197995436610654, "clip_ratio/low_mean": 0.004078065787325613, "clip_ratio/low_min": 0.00010447280783409951, "clip_ratio/region_mean": 0.006058020080672577, "epoch": 0.12945300970080909, "grad_norm": 5.374253273010254, "learning_rate": 2e-07, "loss": 0.0605, "step": 1387 }, { "clip_ratio/high_max": 0.004603522989782505, "clip_ratio/high_mean": 0.001997970164666185, "clip_ratio/low_mean": 0.003666163232992403, "clip_ratio/low_min": 8.293707651318982e-05, "clip_ratio/region_mean": 0.005664133408572525, "epoch": 0.12954634280080968, "grad_norm": 3.4913594722747803, "learning_rate": 2e-07, "loss": 0.0354, "step": 1388 }, { "clip_ratio/high_max": 0.004369210837467108, "clip_ratio/high_mean": 0.001770555285474984, "clip_ratio/low_mean": 0.0030628961394540966, "clip_ratio/low_min": 0.0001617876478121616, "clip_ratio/region_mean": 0.004833451297599822, "epoch": 0.12963967590081024, "grad_norm": 0.9306317567825317, "learning_rate": 2e-07, "loss": 0.0789, "step": 1389 }, { "clip_ratio/high_max": 0.0033555475674802437, "clip_ratio/high_mean": 0.0014689973068016116, "clip_ratio/low_mean": 0.004944380416418426, "clip_ratio/low_min": 0.00040157132025342435, "clip_ratio/region_mean": 0.006413377675926313, "epoch": 0.12973300900081083, "grad_norm": 0.6786685585975647, "learning_rate": 2e-07, "loss": 0.0502, "step": 1390 }, { "clip_ratio/high_max": 0.004791298517375253, "clip_ratio/high_mean": 0.0020858020507148467, "clip_ratio/low_mean": 0.004148053427343257, "clip_ratio/low_min": 0.00022182316024554893, "clip_ratio/region_mean": 0.006233855412574485, "epoch": 0.12982634210081143, "grad_norm": 3.305159330368042, "learning_rate": 2e-07, "loss": 0.0432, "step": 1391 }, { "clip_ratio/high_max": 0.004473034634429496, "clip_ratio/high_mean": 0.002165060257539153, "clip_ratio/low_mean": 0.004114166084036697, "clip_ratio/low_min": 0.00018270008877152577, "clip_ratio/region_mean": 0.006279226450715214, "epoch": 0.129919675200812, "grad_norm": 0.7855595350265503, "learning_rate": 2e-07, "loss": 0.0229, "step": 1392 }, { "clip_ratio/high_max": 0.0043141168061993085, "clip_ratio/high_mean": 0.0018456328489264706, "clip_ratio/low_mean": 0.004047504371555988, "clip_ratio/low_min": 0.00035311139799887314, "clip_ratio/region_mean": 0.005893137291423045, "epoch": 0.13001300830081258, "grad_norm": 2.176729917526245, "learning_rate": 2e-07, "loss": 0.0068, "step": 1393 }, { "clip_ratio/high_max": 0.0052060490706935525, "clip_ratio/high_mean": 0.002251189660455566, "clip_ratio/low_mean": 0.004593905745423399, "clip_ratio/low_min": 0.00034324987609579694, "clip_ratio/region_mean": 0.006845095427706838, "epoch": 0.13010634140081317, "grad_norm": 0.8071860074996948, "learning_rate": 2e-07, "loss": -0.0018, "step": 1394 }, { "clip_ratio/high_max": 0.00486210516828578, "clip_ratio/high_mean": 0.0021418071337393485, "clip_ratio/low_mean": 0.004519600552157499, "clip_ratio/low_min": 0.00044231839274289086, "clip_ratio/region_mean": 0.0066614077222766355, "epoch": 0.13019967450081374, "grad_norm": 0.8564684987068176, "learning_rate": 2e-07, "loss": 0.0233, "step": 1395 }, { "clip_ratio/high_max": 0.00514608999947086, "clip_ratio/high_mean": 0.002337209414690733, "clip_ratio/low_mean": 0.004481221083551645, "clip_ratio/low_min": 0.00038196273817447945, "clip_ratio/region_mean": 0.006818430440034717, "epoch": 0.13029300760081433, "grad_norm": 0.7765283584594727, "learning_rate": 2e-07, "loss": 0.0311, "step": 1396 }, { "clip_ratio/high_max": 0.00541700805479195, "clip_ratio/high_mean": 0.0022175691701704636, "clip_ratio/low_mean": 0.004954605275997892, "clip_ratio/low_min": 0.00016486383537994698, "clip_ratio/region_mean": 0.007172174577135593, "epoch": 0.13038634070081492, "grad_norm": 0.8650400042533875, "learning_rate": 2e-07, "loss": 0.0294, "step": 1397 }, { "clip_ratio/high_max": 0.004295728205761407, "clip_ratio/high_mean": 0.0019153212633682415, "clip_ratio/low_mean": 0.005195003774133511, "clip_ratio/low_min": 0.0008627111237728968, "clip_ratio/region_mean": 0.007110325008397922, "epoch": 0.1304796738008155, "grad_norm": 2.9553794860839844, "learning_rate": 2e-07, "loss": 0.0292, "step": 1398 }, { "clip_ratio/high_max": 0.005520305625395849, "clip_ratio/high_mean": 0.002344898290175479, "clip_ratio/low_mean": 0.005007434083381668, "clip_ratio/low_min": 0.0006360517054417869, "clip_ratio/region_mean": 0.007352332278969698, "epoch": 0.13057300690081608, "grad_norm": 0.9109849333763123, "learning_rate": 2e-07, "loss": 0.0394, "step": 1399 }, { "clip_ratio/high_max": 0.004168804924120195, "clip_ratio/high_mean": 0.0018622530005814042, "clip_ratio/low_mean": 0.004975227107934188, "clip_ratio/low_min": 0.0007623797937412746, "clip_ratio/region_mean": 0.006837480163085274, "epoch": 0.13066634000081667, "grad_norm": 0.7531682252883911, "learning_rate": 2e-07, "loss": 0.0893, "step": 1400 }, { "clip_ratio/high_max": 0.0041679345013108104, "clip_ratio/high_mean": 0.0018102321064361604, "clip_ratio/low_mean": 0.005287512714858167, "clip_ratio/low_min": 0.00037659023655578494, "clip_ratio/region_mean": 0.007097744950442575, "epoch": 0.13075967310081724, "grad_norm": 3.531359910964966, "learning_rate": 2e-07, "loss": 0.0632, "step": 1401 }, { "clip_ratio/high_max": 0.004772643856995273, "clip_ratio/high_mean": 0.0019076236640103161, "clip_ratio/low_mean": 0.00559662512387149, "clip_ratio/low_min": 0.0008603511105320649, "clip_ratio/region_mean": 0.007504248729674146, "epoch": 0.13085300620081783, "grad_norm": 1.369084358215332, "learning_rate": 2e-07, "loss": 0.0754, "step": 1402 }, { "clip_ratio/high_max": 0.005707138130674139, "clip_ratio/high_mean": 0.002062457409920171, "clip_ratio/low_mean": 0.0049517017614562064, "clip_ratio/low_min": 0.0004519810536294244, "clip_ratio/region_mean": 0.007014159156824462, "epoch": 0.13094633930081842, "grad_norm": 0.8947897553443909, "learning_rate": 2e-07, "loss": 0.0261, "step": 1403 }, { "clip_ratio/high_max": 0.005706018098862842, "clip_ratio/high_mean": 0.002277906245581107, "clip_ratio/low_mean": 0.005606733146123588, "clip_ratio/low_min": 0.001022373322484782, "clip_ratio/region_mean": 0.00788463931530714, "epoch": 0.13103967240081899, "grad_norm": 1.3503310680389404, "learning_rate": 2e-07, "loss": 0.0722, "step": 1404 }, { "clip_ratio/high_max": 0.005204361514188349, "clip_ratio/high_mean": 0.0021775253117084503, "clip_ratio/low_mean": 0.005372596890083514, "clip_ratio/low_min": 0.0005749110314354766, "clip_ratio/region_mean": 0.0075501223909668624, "epoch": 0.13113300550081958, "grad_norm": 1.8905260562896729, "learning_rate": 2e-07, "loss": 0.0344, "step": 1405 }, { "clip_ratio/high_max": 0.005523397616343573, "clip_ratio/high_mean": 0.002568270814663265, "clip_ratio/low_mean": 0.00610120888450183, "clip_ratio/low_min": 0.00024570243840571493, "clip_ratio/region_mean": 0.008669479633681476, "epoch": 0.13122633860082017, "grad_norm": 2.2963099479675293, "learning_rate": 2e-07, "loss": 0.0479, "step": 1406 }, { "clip_ratio/high_max": 0.0056981727975653484, "clip_ratio/high_mean": 0.0023827111508580856, "clip_ratio/low_mean": 0.005824722975376062, "clip_ratio/low_min": 0.0002989589120261371, "clip_ratio/region_mean": 0.00820743408985436, "epoch": 0.13131967170082076, "grad_norm": 1.7729641199111938, "learning_rate": 2e-07, "loss": 0.0417, "step": 1407 }, { "clip_ratio/high_max": 0.005736575796618126, "clip_ratio/high_mean": 0.002538405082304962, "clip_ratio/low_mean": 0.006702297818264924, "clip_ratio/low_min": 0.0005772056538262405, "clip_ratio/region_mean": 0.009240702929673716, "epoch": 0.13141300480082133, "grad_norm": 4.276357173919678, "learning_rate": 2e-07, "loss": 0.0561, "step": 1408 }, { "clip_ratio/high_max": 0.0022858290030853823, "clip_ratio/high_mean": 0.0007739901957393158, "clip_ratio/low_mean": 0.0008203464412872563, "clip_ratio/low_min": 9.448223863728344e-06, "clip_ratio/region_mean": 0.0015943366233841516, "completions/clipped_ratio": 0.040867396763392905, "completions/max_length": 4096.0, "completions/max_terminated_length": 4096.0, "completions/mean_length": 514.756103515625, "completions/mean_terminated_length": 362.1639709472656, "completions/min_length": 10.0, "completions/min_terminated_length": 10.0, "epoch": 0.13150633790082192, "grad_norm": 0.37589943408966064, "learning_rate": 2e-07, "loss": 0.0622, "num_tokens": 1011679701.0, "reward": 0.3563319742679596, "reward_std": 0.15777914226055145, "rewards/simpleverify_reward/mean": 0.3563319742679596, "rewards/simpleverify_reward/std": 0.4789169728755951, "step": 1409 }, { "clip_ratio/high_max": 0.0020147146606177557, "clip_ratio/high_mean": 0.0006640896626777248, "clip_ratio/low_mean": 0.000886892239577719, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015509819022554439, "epoch": 0.1315996710008225, "grad_norm": 0.3647271394729614, "learning_rate": 2e-07, "loss": 0.0364, "step": 1410 }, { "clip_ratio/high_max": 0.0019205837525078095, "clip_ratio/high_mean": 0.0006850848631074768, "clip_ratio/low_mean": 0.0007341512482526014, "clip_ratio/low_min": 1.8074031686410308e-05, "clip_ratio/region_mean": 0.001419236108631594, "epoch": 0.13169300410082307, "grad_norm": 0.2974034547805786, "learning_rate": 2e-07, "loss": 0.0509, "step": 1411 }, { "clip_ratio/high_max": 0.002813302220602054, "clip_ratio/high_mean": 0.0009271139242628124, "clip_ratio/low_mean": 0.000787550066888798, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0017146640057035256, "epoch": 0.13178633720082367, "grad_norm": 0.3826490044593811, "learning_rate": 2e-07, "loss": 0.0883, "step": 1412 }, { "clip_ratio/high_max": 0.002181429234042298, "clip_ratio/high_mean": 0.0007425126314046793, "clip_ratio/low_mean": 0.0008070146213867702, "clip_ratio/low_min": 3.332349660922773e-05, "clip_ratio/region_mean": 0.0015495272855332587, "epoch": 0.13187967030082426, "grad_norm": 0.3387027382850647, "learning_rate": 2e-07, "loss": 0.0311, "step": 1413 }, { "clip_ratio/high_max": 0.001755005770974094, "clip_ratio/high_mean": 0.0005431728550320258, "clip_ratio/low_mean": 0.0007685362911615812, "clip_ratio/low_min": 1.966645686479751e-05, "clip_ratio/region_mean": 0.0013117091257299762, "epoch": 0.13197300340082482, "grad_norm": 0.3229408264160156, "learning_rate": 2e-07, "loss": 0.1079, "step": 1414 }, { "clip_ratio/high_max": 0.0028391996675054543, "clip_ratio/high_mean": 0.000890133900611545, "clip_ratio/low_mean": 0.0009665957732067909, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0018567296465334948, "epoch": 0.13206633650082542, "grad_norm": 2.079270124435425, "learning_rate": 2e-07, "loss": 0.0596, "step": 1415 }, { "clip_ratio/high_max": 0.002113057144015329, "clip_ratio/high_mean": 0.0007430680743709672, "clip_ratio/low_mean": 0.0007185060949268518, "clip_ratio/low_min": 3.0110419174889103e-05, "clip_ratio/region_mean": 0.0014615741820307449, "epoch": 0.132159669600826, "grad_norm": 0.4793534576892853, "learning_rate": 2e-07, "loss": 0.0524, "step": 1416 }, { "clip_ratio/high_max": 0.0019793916180788074, "clip_ratio/high_mean": 0.0007194257923401892, "clip_ratio/low_mean": 0.0009732189391797874, "clip_ratio/low_min": 4.096811517229071e-05, "clip_ratio/region_mean": 0.0016926447278819978, "epoch": 0.13225300270082657, "grad_norm": 0.5610447525978088, "learning_rate": 2e-07, "loss": 0.043, "step": 1417 }, { "clip_ratio/high_max": 0.0021653680778399576, "clip_ratio/high_mean": 0.0008574580424465239, "clip_ratio/low_mean": 0.0008970333965407917, "clip_ratio/low_min": 3.0658032301289495e-05, "clip_ratio/region_mean": 0.0017544914517202415, "epoch": 0.13234633580082716, "grad_norm": 0.3877805471420288, "learning_rate": 2e-07, "loss": 0.0694, "step": 1418 }, { "clip_ratio/high_max": 0.0021148373634787276, "clip_ratio/high_mean": 0.0006996362812969892, "clip_ratio/low_mean": 0.0011564133419597056, "clip_ratio/low_min": 9.15080545382807e-06, "clip_ratio/region_mean": 0.0018560496500867885, "epoch": 0.13243966890082776, "grad_norm": 1.9866297245025635, "learning_rate": 2e-07, "loss": 0.0636, "step": 1419 }, { "clip_ratio/high_max": 0.001726624119328335, "clip_ratio/high_mean": 0.0006377351528499275, "clip_ratio/low_mean": 0.0010235778099740855, "clip_ratio/low_min": 2.980169483635109e-05, "clip_ratio/region_mean": 0.0016613129700999707, "epoch": 0.13253300200082832, "grad_norm": 0.40588143467903137, "learning_rate": 2e-07, "loss": 0.0754, "step": 1420 }, { "clip_ratio/high_max": 0.002501332273823209, "clip_ratio/high_mean": 0.0008535894012311473, "clip_ratio/low_mean": 0.0010554007167229429, "clip_ratio/low_min": 6.904629026394105e-05, "clip_ratio/region_mean": 0.001908990103402175, "epoch": 0.1326263351008289, "grad_norm": 0.6843695640563965, "learning_rate": 2e-07, "loss": 0.0603, "step": 1421 }, { "clip_ratio/high_max": 0.002225620748504298, "clip_ratio/high_mean": 0.000803230912424624, "clip_ratio/low_mean": 0.0011859491969516966, "clip_ratio/low_min": 4.97936703141022e-05, "clip_ratio/region_mean": 0.0019891801348421723, "epoch": 0.1327196682008295, "grad_norm": 8.613897323608398, "learning_rate": 2e-07, "loss": 0.06, "step": 1422 }, { "clip_ratio/high_max": 0.0020461132371565327, "clip_ratio/high_mean": 0.0007648333066754276, "clip_ratio/low_mean": 0.0009778306066436926, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0017426638660253957, "epoch": 0.13281300130083007, "grad_norm": 4.5761308670043945, "learning_rate": 2e-07, "loss": 0.032, "step": 1423 }, { "clip_ratio/high_max": 0.0025704339550429722, "clip_ratio/high_mean": 0.0007804412794030213, "clip_ratio/low_mean": 0.0009155608304354246, "clip_ratio/low_min": 9.745828720042482e-06, "clip_ratio/region_mean": 0.001696002102107741, "epoch": 0.13290633440083066, "grad_norm": 0.4805845618247986, "learning_rate": 2e-07, "loss": 0.0644, "step": 1424 }, { "clip_ratio/high_max": 0.0024515897821402177, "clip_ratio/high_mean": 0.0009031936569954269, "clip_ratio/low_mean": 0.0009094547367567429, "clip_ratio/low_min": 1.952514867298305e-05, "clip_ratio/region_mean": 0.0018126483846572228, "epoch": 0.13299966750083125, "grad_norm": 4.029233932495117, "learning_rate": 2e-07, "loss": 0.0548, "step": 1425 }, { "clip_ratio/high_max": 0.0024525904373149388, "clip_ratio/high_mean": 0.0007474123303836677, "clip_ratio/low_mean": 0.0011082827040809207, "clip_ratio/low_min": 7.223650754895061e-05, "clip_ratio/region_mean": 0.0018556950381025672, "epoch": 0.13309300060083182, "grad_norm": 0.45861756801605225, "learning_rate": 2e-07, "loss": 0.0678, "step": 1426 }, { "clip_ratio/high_max": 0.0023402858423651196, "clip_ratio/high_mean": 0.0008091158251772868, "clip_ratio/low_mean": 0.0010660908810677938, "clip_ratio/low_min": 2.5805120458244346e-05, "clip_ratio/region_mean": 0.0018752067408058792, "epoch": 0.1331863337008324, "grad_norm": 0.40048226714134216, "learning_rate": 2e-07, "loss": 0.0239, "step": 1427 }, { "clip_ratio/high_max": 0.0024079250961221987, "clip_ratio/high_mean": 0.0008682491725267028, "clip_ratio/low_mean": 0.0010387228558101924, "clip_ratio/low_min": 2.9490405722754076e-05, "clip_ratio/region_mean": 0.0019069720001425594, "epoch": 0.133279666800833, "grad_norm": 0.5608670711517334, "learning_rate": 2e-07, "loss": 0.0631, "step": 1428 }, { "clip_ratio/high_max": 0.0025148778877337463, "clip_ratio/high_mean": 0.0008417230410486809, "clip_ratio/low_mean": 0.0009324784259661101, "clip_ratio/low_min": 2.0424837202881463e-05, "clip_ratio/region_mean": 0.0017742014315444976, "epoch": 0.1333729999008336, "grad_norm": 0.46958687901496887, "learning_rate": 2e-07, "loss": 0.011, "step": 1429 }, { "clip_ratio/high_max": 0.0017079925928555895, "clip_ratio/high_mean": 0.0006412028642444056, "clip_ratio/low_mean": 0.0012678189450525679, "clip_ratio/low_min": 2.8621583624044433e-05, "clip_ratio/region_mean": 0.0019090218338533305, "epoch": 0.13346633300083416, "grad_norm": 0.6317129135131836, "learning_rate": 2e-07, "loss": 0.0553, "step": 1430 }, { "clip_ratio/high_max": 0.0023303997877519578, "clip_ratio/high_mean": 0.0007982043862284627, "clip_ratio/low_mean": 0.0012996413061046042, "clip_ratio/low_min": 7.60751536290627e-05, "clip_ratio/region_mean": 0.002097845674143173, "epoch": 0.13355966610083475, "grad_norm": 1.3061832189559937, "learning_rate": 2e-07, "loss": 0.0499, "step": 1431 }, { "clip_ratio/high_max": 0.002532239435822703, "clip_ratio/high_mean": 0.0008975334567367099, "clip_ratio/low_mean": 0.0013155305150576169, "clip_ratio/low_min": 4.970132613379974e-05, "clip_ratio/region_mean": 0.0022130638972157612, "epoch": 0.13365299920083534, "grad_norm": 0.4787406325340271, "learning_rate": 2e-07, "loss": 0.0743, "step": 1432 }, { "clip_ratio/high_max": 0.002342156421946129, "clip_ratio/high_mean": 0.000860677583659708, "clip_ratio/low_mean": 0.00127108288825184, "clip_ratio/low_min": 2.7814924578706268e-05, "clip_ratio/region_mean": 0.002131760469637811, "epoch": 0.1337463323008359, "grad_norm": 0.664539635181427, "learning_rate": 2e-07, "loss": 0.0906, "step": 1433 }, { "clip_ratio/high_max": 0.002125769926351495, "clip_ratio/high_mean": 0.0007431493231706554, "clip_ratio/low_mean": 0.0011908952383237192, "clip_ratio/low_min": 7.470491254935041e-05, "clip_ratio/region_mean": 0.001934044594236184, "epoch": 0.1338396654008365, "grad_norm": 1.4063613414764404, "learning_rate": 2e-07, "loss": 0.0291, "step": 1434 }, { "clip_ratio/high_max": 0.0024910152569646016, "clip_ratio/high_mean": 0.0009417605542694218, "clip_ratio/low_mean": 0.0017236646863239002, "clip_ratio/low_min": 7.579754674225114e-05, "clip_ratio/region_mean": 0.002665425265149679, "epoch": 0.1339329985008371, "grad_norm": 0.8937907814979553, "learning_rate": 2e-07, "loss": 0.0184, "step": 1435 }, { "clip_ratio/high_max": 0.003027369653864298, "clip_ratio/high_mean": 0.0009681766714493278, "clip_ratio/low_mean": 0.00157960296201054, "clip_ratio/low_min": 9.461288573220372e-05, "clip_ratio/region_mean": 0.0025477796370978467, "epoch": 0.13402633160083766, "grad_norm": 1.5080628395080566, "learning_rate": 2e-07, "loss": 0.042, "step": 1436 }, { "clip_ratio/high_max": 0.0023450885964848567, "clip_ratio/high_mean": 0.000836309934129531, "clip_ratio/low_mean": 0.0016264475343632512, "clip_ratio/low_min": 0.0001718180501484312, "clip_ratio/region_mean": 0.00246275746030733, "epoch": 0.13411966470083825, "grad_norm": 0.48777467012405396, "learning_rate": 2e-07, "loss": 0.0701, "step": 1437 }, { "clip_ratio/high_max": 0.002487173638655804, "clip_ratio/high_mean": 0.0009364368706883397, "clip_ratio/low_mean": 0.001603741169674322, "clip_ratio/low_min": 0.00014974144778534537, "clip_ratio/region_mean": 0.0025401780658285134, "epoch": 0.13421299780083884, "grad_norm": 0.5488957762718201, "learning_rate": 2e-07, "loss": 0.0186, "step": 1438 }, { "clip_ratio/high_max": 0.0025934384466381744, "clip_ratio/high_mean": 0.0009238202983397059, "clip_ratio/low_mean": 0.0015067408348841127, "clip_ratio/low_min": 0.00013340966506802943, "clip_ratio/region_mean": 0.00243056112958584, "epoch": 0.1343063309008394, "grad_norm": 0.5224533677101135, "learning_rate": 2e-07, "loss": 0.042, "step": 1439 }, { "clip_ratio/high_max": 0.002193598715166445, "clip_ratio/high_mean": 0.0008002889835552196, "clip_ratio/low_mean": 0.0017514475293864962, "clip_ratio/low_min": 0.00012452206829038914, "clip_ratio/region_mean": 0.0025517364774714224, "epoch": 0.13439966400084, "grad_norm": 0.674601137638092, "learning_rate": 2e-07, "loss": 0.1045, "step": 1440 }, { "clip_ratio/high_max": 0.0026730866029538447, "clip_ratio/high_mean": 0.0008861470303145325, "clip_ratio/low_mean": 0.0016485023661516607, "clip_ratio/low_min": 6.800128721806686e-05, "clip_ratio/region_mean": 0.0025346493421238847, "epoch": 0.1344929971008406, "grad_norm": 0.7866846323013306, "learning_rate": 2e-07, "loss": 0.0488, "step": 1441 }, { "clip_ratio/high_max": 0.0025691538994578877, "clip_ratio/high_mean": 0.0009108567719522398, "clip_ratio/low_mean": 0.001813813349144766, "clip_ratio/low_min": 0.0001307161401200574, "clip_ratio/region_mean": 0.002724670135648921, "epoch": 0.13458633020084115, "grad_norm": 2.7574350833892822, "learning_rate": 2e-07, "loss": 0.0418, "step": 1442 }, { "clip_ratio/high_max": 0.0019525459065334871, "clip_ratio/high_mean": 0.0006550313191837631, "clip_ratio/low_mean": 0.0016372677018807735, "clip_ratio/low_min": 0.00013123841017659288, "clip_ratio/region_mean": 0.002292298995598685, "epoch": 0.13467966330084175, "grad_norm": 0.6903833746910095, "learning_rate": 2e-07, "loss": 0.1173, "step": 1443 }, { "clip_ratio/high_max": 0.0023495137538702693, "clip_ratio/high_mean": 0.0009579425368428929, "clip_ratio/low_mean": 0.0022618568400503136, "clip_ratio/low_min": 0.00027113845862913877, "clip_ratio/region_mean": 0.003219799262296874, "epoch": 0.13477299640084234, "grad_norm": 0.7423739433288574, "learning_rate": 2e-07, "loss": 0.0409, "step": 1444 }, { "clip_ratio/high_max": 0.0029621027715620585, "clip_ratio/high_mean": 0.0011092934610132943, "clip_ratio/low_mean": 0.0021768877595604863, "clip_ratio/low_min": 0.00011650138276309008, "clip_ratio/region_mean": 0.0032861812724149786, "epoch": 0.1348663295008429, "grad_norm": 0.5768318772315979, "learning_rate": 2e-07, "loss": 0.0201, "step": 1445 }, { "clip_ratio/high_max": 0.0034801488909579348, "clip_ratio/high_mean": 0.001034025319313514, "clip_ratio/low_mean": 0.0020276021750760265, "clip_ratio/low_min": 0.0001441427430108888, "clip_ratio/region_mean": 0.003061627510760445, "epoch": 0.1349596626008435, "grad_norm": 0.8390409350395203, "learning_rate": 2e-07, "loss": 0.056, "step": 1446 }, { "clip_ratio/high_max": 0.003055284352740273, "clip_ratio/high_mean": 0.0011373850738891633, "clip_ratio/low_mean": 0.0024477444458170794, "clip_ratio/low_min": 8.887905823939946e-05, "clip_ratio/region_mean": 0.0035851295760949142, "epoch": 0.1350529957008441, "grad_norm": 0.790241003036499, "learning_rate": 2e-07, "loss": 0.0263, "step": 1447 }, { "clip_ratio/high_max": 0.002895025405450724, "clip_ratio/high_mean": 0.001217231168084254, "clip_ratio/low_mean": 0.002294643269124208, "clip_ratio/low_min": 4.638218888430856e-05, "clip_ratio/region_mean": 0.0035118744053761475, "epoch": 0.13514632880084468, "grad_norm": 1.6624170541763306, "learning_rate": 2e-07, "loss": 0.0167, "step": 1448 }, { "clip_ratio/high_max": 0.003560240555088967, "clip_ratio/high_mean": 0.0011220917986065615, "clip_ratio/low_mean": 0.0022375651642505545, "clip_ratio/low_min": 0.00015427527978317812, "clip_ratio/region_mean": 0.0033596569555811584, "epoch": 0.13523966190084524, "grad_norm": 1.019584059715271, "learning_rate": 2e-07, "loss": 0.0959, "step": 1449 }, { "clip_ratio/high_max": 0.0035204305677325465, "clip_ratio/high_mean": 0.001129406800828292, "clip_ratio/low_mean": 0.0025988498418882955, "clip_ratio/low_min": 8.632439858047292e-05, "clip_ratio/region_mean": 0.0037282566918293014, "epoch": 0.13533299500084583, "grad_norm": 0.9907898902893066, "learning_rate": 2e-07, "loss": 0.0109, "step": 1450 }, { "clip_ratio/high_max": 0.0029726437678618822, "clip_ratio/high_mean": 0.001083929925925986, "clip_ratio/low_mean": 0.002887091693992261, "clip_ratio/low_min": 0.00014147655747365206, "clip_ratio/region_mean": 0.003971021702454891, "epoch": 0.13542632810084643, "grad_norm": 0.8106179237365723, "learning_rate": 2e-07, "loss": 0.0764, "step": 1451 }, { "clip_ratio/high_max": 0.0033718745289661456, "clip_ratio/high_mean": 0.0010835926186700817, "clip_ratio/low_mean": 0.0025226208599633537, "clip_ratio/low_min": 0.00017530148215882946, "clip_ratio/region_mean": 0.0036062135113752447, "epoch": 0.135519661200847, "grad_norm": 1.337110996246338, "learning_rate": 2e-07, "loss": 0.0647, "step": 1452 }, { "clip_ratio/high_max": 0.003224976757337572, "clip_ratio/high_mean": 0.001138706558776903, "clip_ratio/low_mean": 0.0030269080234575085, "clip_ratio/low_min": 0.0001649675286898855, "clip_ratio/region_mean": 0.004165614605881274, "epoch": 0.13561299430084758, "grad_norm": 0.8208521008491516, "learning_rate": 2e-07, "loss": 0.0281, "step": 1453 }, { "clip_ratio/high_max": 0.0034363482482149266, "clip_ratio/high_mean": 0.0012834551016567275, "clip_ratio/low_mean": 0.0031190614063234534, "clip_ratio/low_min": 0.0002975349416374229, "clip_ratio/region_mean": 0.004402516431582626, "epoch": 0.13570632740084818, "grad_norm": 1.2432661056518555, "learning_rate": 2e-07, "loss": 0.0953, "step": 1454 }, { "clip_ratio/high_max": 0.0032899245888984296, "clip_ratio/high_mean": 0.0011471292832538893, "clip_ratio/low_mean": 0.0031637828651582822, "clip_ratio/low_min": 0.0003822116159426514, "clip_ratio/region_mean": 0.004310912030632608, "epoch": 0.13579966050084874, "grad_norm": 2.820694923400879, "learning_rate": 2e-07, "loss": 0.0869, "step": 1455 }, { "clip_ratio/high_max": 0.003551418281858787, "clip_ratio/high_mean": 0.0013596752687590197, "clip_ratio/low_mean": 0.003230344478652114, "clip_ratio/low_min": 0.00015057870041346177, "clip_ratio/region_mean": 0.004590019838360604, "epoch": 0.13589299360084933, "grad_norm": 0.969578206539154, "learning_rate": 2e-07, "loss": 0.0679, "step": 1456 }, { "clip_ratio/high_max": 0.0036511849248199724, "clip_ratio/high_mean": 0.0013421898183878511, "clip_ratio/low_mean": 0.0030917273106751963, "clip_ratio/low_min": 5.652698382618837e-05, "clip_ratio/region_mean": 0.004433917158166878, "epoch": 0.13598632670084992, "grad_norm": 1.054965853691101, "learning_rate": 2e-07, "loss": 0.0287, "step": 1457 }, { "clip_ratio/high_max": 0.0032520052263862453, "clip_ratio/high_mean": 0.0011596130243560765, "clip_ratio/low_mean": 0.0031603337192791514, "clip_ratio/low_min": 0.00018933411593025085, "clip_ratio/region_mean": 0.00431994679820491, "epoch": 0.1360796598008505, "grad_norm": 1.4219534397125244, "learning_rate": 2e-07, "loss": 0.0943, "step": 1458 }, { "clip_ratio/high_max": 0.00430202076677233, "clip_ratio/high_mean": 0.0016392757897847332, "clip_ratio/low_mean": 0.0036003288405481726, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.005239604593953118, "epoch": 0.13617299290085108, "grad_norm": 43.294593811035156, "learning_rate": 2e-07, "loss": 0.0336, "step": 1459 }, { "clip_ratio/high_max": 0.0035026593250222504, "clip_ratio/high_mean": 0.0013563765496655833, "clip_ratio/low_mean": 0.0033182903280248865, "clip_ratio/low_min": 0.00013439610575005645, "clip_ratio/region_mean": 0.004674666924984194, "epoch": 0.13626632600085167, "grad_norm": 1.0633057355880737, "learning_rate": 2e-07, "loss": 0.0427, "step": 1460 }, { "clip_ratio/high_max": 0.0044785700883949175, "clip_ratio/high_mean": 0.0015623941544617992, "clip_ratio/low_mean": 0.004423938582476694, "clip_ratio/low_min": 0.000145254594826838, "clip_ratio/region_mean": 0.005986332820612006, "epoch": 0.13635965910085224, "grad_norm": 2.145371913909912, "learning_rate": 2e-07, "loss": 0.0382, "step": 1461 }, { "clip_ratio/high_max": 0.004140974429901689, "clip_ratio/high_mean": 0.0015549366071354598, "clip_ratio/low_mean": 0.004191668325802311, "clip_ratio/low_min": 8.336111932294443e-05, "clip_ratio/region_mean": 0.005746605005697347, "epoch": 0.13645299220085283, "grad_norm": 2.1271820068359375, "learning_rate": 2e-07, "loss": 0.0528, "step": 1462 }, { "clip_ratio/high_max": 0.003508572459395509, "clip_ratio/high_mean": 0.0014288541897258256, "clip_ratio/low_mean": 0.004070622875588015, "clip_ratio/low_min": 0.00017038987243722659, "clip_ratio/region_mean": 0.005499477119883522, "epoch": 0.13654632530085342, "grad_norm": 1.6256355047225952, "learning_rate": 2e-07, "loss": 0.062, "step": 1463 }, { "clip_ratio/high_max": 0.003687793247081572, "clip_ratio/high_mean": 0.0012284385920793284, "clip_ratio/low_mean": 0.004748032690258697, "clip_ratio/low_min": 0.00045663763739867136, "clip_ratio/region_mean": 0.0059764712932519615, "epoch": 0.13663965840085399, "grad_norm": 12.150602340698242, "learning_rate": 2e-07, "loss": 0.0869, "step": 1464 }, { "clip_ratio/high_max": 0.004284890856069978, "clip_ratio/high_mean": 0.00160679801410879, "clip_ratio/low_mean": 0.004744461111840792, "clip_ratio/low_min": 0.0005952937062829733, "clip_ratio/region_mean": 0.0063512590713799, "epoch": 0.13673299150085458, "grad_norm": 3.128418445587158, "learning_rate": 2e-07, "loss": 0.1109, "step": 1465 }, { "clip_ratio/high_max": 0.0039977570122573525, "clip_ratio/high_mean": 0.0013557869588112226, "clip_ratio/low_mean": 0.004711387358838692, "clip_ratio/low_min": 0.0007294126799024525, "clip_ratio/region_mean": 0.006067174443160184, "epoch": 0.13682632460085517, "grad_norm": 1.4236468076705933, "learning_rate": 2e-07, "loss": 0.083, "step": 1466 }, { "clip_ratio/high_max": 0.0042467395614949055, "clip_ratio/high_mean": 0.0016202096594497561, "clip_ratio/low_mean": 0.004201971125439741, "clip_ratio/low_min": 0.00028734744773828425, "clip_ratio/region_mean": 0.00582218095951248, "epoch": 0.13691965770085573, "grad_norm": 1095.0185546875, "learning_rate": 2e-07, "loss": 0.1467, "step": 1467 }, { "clip_ratio/high_max": 0.004691912108683027, "clip_ratio/high_mean": 0.0016341140872100368, "clip_ratio/low_mean": 0.005747154253185727, "clip_ratio/low_min": 0.0005838838196723373, "clip_ratio/region_mean": 0.007381268369499594, "epoch": 0.13701299080085633, "grad_norm": 1.6314975023269653, "learning_rate": 2e-07, "loss": 0.045, "step": 1468 }, { "clip_ratio/high_max": 0.0041639123010099865, "clip_ratio/high_mean": 0.0014945414804969914, "clip_ratio/low_mean": 0.005325760808773339, "clip_ratio/low_min": 0.00014301269766292535, "clip_ratio/region_mean": 0.006820301932748407, "epoch": 0.13710632390085692, "grad_norm": 1.2559224367141724, "learning_rate": 2e-07, "loss": 0.0375, "step": 1469 }, { "clip_ratio/high_max": 0.004616990147042088, "clip_ratio/high_mean": 0.0019184701377525926, "clip_ratio/low_mean": 0.005328517538146116, "clip_ratio/low_min": 0.0006260307491174899, "clip_ratio/region_mean": 0.007246987632242963, "epoch": 0.1371996570008575, "grad_norm": 1.7895684242248535, "learning_rate": 2e-07, "loss": 0.0631, "step": 1470 }, { "clip_ratio/high_max": 0.004513877691351809, "clip_ratio/high_mean": 0.0018984657799592242, "clip_ratio/low_mean": 0.005441672969027422, "clip_ratio/low_min": 0.00042876379666267894, "clip_ratio/region_mean": 0.0073401387489866465, "epoch": 0.13729299010085808, "grad_norm": 1.7593392133712769, "learning_rate": 2e-07, "loss": 0.0354, "step": 1471 }, { "clip_ratio/high_max": 0.004719357923022471, "clip_ratio/high_mean": 0.001891336745757144, "clip_ratio/low_mean": 0.005200485480600037, "clip_ratio/low_min": 0.00014725681103300303, "clip_ratio/region_mean": 0.007091822393704206, "epoch": 0.13738632320085867, "grad_norm": 3.064988613128662, "learning_rate": 2e-07, "loss": 0.034, "step": 1472 }, { "clip_ratio/high_max": 0.006331202792352997, "clip_ratio/high_mean": 0.002299361272889655, "clip_ratio/low_mean": 0.005316790717188269, "clip_ratio/low_min": 0.00024288724671350792, "clip_ratio/region_mean": 0.007616152142873034, "epoch": 0.13747965630085926, "grad_norm": 8.548855781555176, "learning_rate": 2e-07, "loss": 0.0353, "step": 1473 }, { "clip_ratio/high_max": 0.005207854323089123, "clip_ratio/high_mean": 0.002193677566538099, "clip_ratio/low_mean": 0.005998676788294688, "clip_ratio/low_min": 0.00036002738488605246, "clip_ratio/region_mean": 0.00819235443486832, "epoch": 0.13757298940085982, "grad_norm": 3.0118956565856934, "learning_rate": 2e-07, "loss": 0.046, "step": 1474 }, { "clip_ratio/high_max": 0.006218437614734285, "clip_ratio/high_mean": 0.0022107767945271917, "clip_ratio/low_mean": 0.0058808091198443435, "clip_ratio/low_min": 0.00024875648523448035, "clip_ratio/region_mean": 0.008091586045338772, "epoch": 0.13766632250086042, "grad_norm": 2.0009605884552, "learning_rate": 2e-07, "loss": 0.0484, "step": 1475 }, { "clip_ratio/high_max": 0.00552432682889048, "clip_ratio/high_mean": 0.0019232859303883743, "clip_ratio/low_mean": 0.006324873960693367, "clip_ratio/low_min": 0.00018492533854441717, "clip_ratio/region_mean": 0.008248159661889076, "epoch": 0.137759655600861, "grad_norm": 33.7970085144043, "learning_rate": 2e-07, "loss": 0.0463, "step": 1476 }, { "clip_ratio/high_max": 0.005923492892179638, "clip_ratio/high_mean": 0.002552726731664734, "clip_ratio/low_mean": 0.00571142508124467, "clip_ratio/low_min": 0.00025221707619493827, "clip_ratio/region_mean": 0.008264151721959934, "epoch": 0.13785298870086157, "grad_norm": 2.7130908966064453, "learning_rate": 2e-07, "loss": 0.0527, "step": 1477 }, { "clip_ratio/high_max": 0.005575213326665107, "clip_ratio/high_mean": 0.001979000269784592, "clip_ratio/low_mean": 0.006446830491768196, "clip_ratio/low_min": 0.0006341262560454197, "clip_ratio/region_mean": 0.008425830717897043, "epoch": 0.13794632180086216, "grad_norm": 3.3413078784942627, "learning_rate": 2e-07, "loss": 0.0913, "step": 1478 }, { "clip_ratio/high_max": 0.005666428143740632, "clip_ratio/high_mean": 0.0018616802190081216, "clip_ratio/low_mean": 0.006947968955500983, "clip_ratio/low_min": 0.0002819410183292348, "clip_ratio/region_mean": 0.008809649152681231, "epoch": 0.13803965490086276, "grad_norm": 1.8736939430236816, "learning_rate": 2e-07, "loss": 0.0835, "step": 1479 }, { "clip_ratio/high_max": 0.00663354728021659, "clip_ratio/high_mean": 0.0026273501680407207, "clip_ratio/low_mean": 0.006056972619262524, "clip_ratio/low_min": 0.0004918819286103826, "clip_ratio/region_mean": 0.008684322820045054, "epoch": 0.13813298800086332, "grad_norm": 23.508838653564453, "learning_rate": 2e-07, "loss": 0.0536, "step": 1480 }, { "clip_ratio/high_max": 0.006675719050690532, "clip_ratio/high_mean": 0.002241718721052166, "clip_ratio/low_mean": 0.006085971981519833, "clip_ratio/low_min": 0.0002589657742646523, "clip_ratio/region_mean": 0.008327690593432635, "epoch": 0.1382263211008639, "grad_norm": 1.6455236673355103, "learning_rate": 2e-07, "loss": 0.0328, "step": 1481 }, { "clip_ratio/high_max": 0.006918589846463874, "clip_ratio/high_mean": 0.0026504000161367003, "clip_ratio/low_mean": 0.007400747010251507, "clip_ratio/low_min": 0.0013944573365733959, "clip_ratio/region_mean": 0.010051146891782992, "epoch": 0.1383196542008645, "grad_norm": 2.862809419631958, "learning_rate": 2e-07, "loss": 0.072, "step": 1482 }, { "clip_ratio/high_max": 0.0061363898566924036, "clip_ratio/high_mean": 0.002340374187042471, "clip_ratio/low_mean": 0.006821267583291046, "clip_ratio/low_min": 0.0005191047603148036, "clip_ratio/region_mean": 0.00916164173395373, "epoch": 0.13841298730086507, "grad_norm": 6.401864528656006, "learning_rate": 2e-07, "loss": 0.0583, "step": 1483 }, { "clip_ratio/high_max": 0.005868579653906636, "clip_ratio/high_mean": 0.0024353633052669466, "clip_ratio/low_mean": 0.007369162238319404, "clip_ratio/low_min": 0.0007636391774212825, "clip_ratio/region_mean": 0.009804525645449758, "epoch": 0.13850632040086566, "grad_norm": 2.3226635456085205, "learning_rate": 2e-07, "loss": 0.0474, "step": 1484 }, { "clip_ratio/high_max": 0.0065404388442402706, "clip_ratio/high_mean": 0.002226689553936012, "clip_ratio/low_mean": 0.006948075955733657, "clip_ratio/low_min": 0.0003735497812158428, "clip_ratio/region_mean": 0.009174765640636906, "epoch": 0.13859965350086625, "grad_norm": 24.28095054626465, "learning_rate": 2e-07, "loss": 0.0374, "step": 1485 }, { "clip_ratio/high_max": 0.007796584439347498, "clip_ratio/high_mean": 0.0027821751791634597, "clip_ratio/low_mean": 0.008453633199678734, "clip_ratio/low_min": 0.00038704232429154217, "clip_ratio/region_mean": 0.011235808546189219, "epoch": 0.13869298660086682, "grad_norm": 4.270010471343994, "learning_rate": 2e-07, "loss": 0.0548, "step": 1486 }, { "clip_ratio/high_max": 0.006832428654888645, "clip_ratio/high_mean": 0.002934200681920629, "clip_ratio/low_mean": 0.007350774394581094, "clip_ratio/low_min": 0.0008920675900299102, "clip_ratio/region_mean": 0.010284974938258529, "epoch": 0.1387863197008674, "grad_norm": 22.819581985473633, "learning_rate": 2e-07, "loss": 0.0367, "step": 1487 }, { "clip_ratio/high_max": 0.00845759900403209, "clip_ratio/high_mean": 0.0031235256501531694, "clip_ratio/low_mean": 0.005614174922811799, "clip_ratio/low_min": 0.000335266646288801, "clip_ratio/region_mean": 0.008737700496567413, "epoch": 0.138879652800868, "grad_norm": 3.468803882598877, "learning_rate": 2e-07, "loss": 0.0248, "step": 1488 }, { "clip_ratio/high_max": 0.006896912178490311, "clip_ratio/high_mean": 0.002735484355071094, "clip_ratio/low_mean": 0.007968440011609346, "clip_ratio/low_min": 0.00043800368439406157, "clip_ratio/region_mean": 0.010703924344852567, "epoch": 0.13897298590086857, "grad_norm": 4.241065502166748, "learning_rate": 2e-07, "loss": 0.0701, "step": 1489 }, { "clip_ratio/high_max": 0.008602060668636113, "clip_ratio/high_mean": 0.0029603758521261625, "clip_ratio/low_mean": 0.007971873608767055, "clip_ratio/low_min": 0.0006832727885921486, "clip_ratio/region_mean": 0.010932249497273006, "epoch": 0.13906631900086916, "grad_norm": 5.218986988067627, "learning_rate": 2e-07, "loss": 0.0564, "step": 1490 }, { "clip_ratio/high_max": 0.0074678011442301795, "clip_ratio/high_mean": 0.003028563307452714, "clip_ratio/low_mean": 0.00805964546452742, "clip_ratio/low_min": 0.0005745011440012604, "clip_ratio/region_mean": 0.011088208586443216, "epoch": 0.13915965210086975, "grad_norm": 2.2884485721588135, "learning_rate": 2e-07, "loss": 0.0121, "step": 1491 }, { "clip_ratio/high_max": 0.0103855915076565, "clip_ratio/high_mean": 0.004188959836028516, "clip_ratio/low_mean": 0.006645009838393889, "clip_ratio/low_min": 0.001080509799066931, "clip_ratio/region_mean": 0.01083396963076666, "epoch": 0.13925298520087034, "grad_norm": 8.672272682189941, "learning_rate": 2e-07, "loss": 0.056, "step": 1492 }, { "clip_ratio/high_max": 0.008909284821129404, "clip_ratio/high_mean": 0.0031841247400734574, "clip_ratio/low_mean": 0.009349468600703403, "clip_ratio/low_min": 0.000671421985316556, "clip_ratio/region_mean": 0.01253359334077686, "epoch": 0.1393463183008709, "grad_norm": 4.608011722564697, "learning_rate": 2e-07, "loss": 0.0528, "step": 1493 }, { "clip_ratio/high_max": 0.010531258114497177, "clip_ratio/high_mean": 0.00379265307856258, "clip_ratio/low_mean": 0.007463000249117613, "clip_ratio/low_min": 0.0006122993363533169, "clip_ratio/region_mean": 0.011255653109401464, "epoch": 0.1394396514008715, "grad_norm": 6.114775657653809, "learning_rate": 2e-07, "loss": 0.0518, "step": 1494 }, { "clip_ratio/high_max": 0.007593485788675025, "clip_ratio/high_mean": 0.0027870056583196856, "clip_ratio/low_mean": 0.008355341502465308, "clip_ratio/low_min": 0.00039152655517682433, "clip_ratio/region_mean": 0.011142347299028188, "epoch": 0.1395329845008721, "grad_norm": 3.744758367538452, "learning_rate": 2e-07, "loss": 0.0659, "step": 1495 }, { "clip_ratio/high_max": 0.009614675014745444, "clip_ratio/high_mean": 0.0035036601730098482, "clip_ratio/low_mean": 0.0075600906275212765, "clip_ratio/low_min": 0.0006339566607493907, "clip_ratio/region_mean": 0.011063750804169104, "epoch": 0.13962631760087266, "grad_norm": 43.78133010864258, "learning_rate": 2e-07, "loss": 0.0662, "step": 1496 }, { "clip_ratio/high_max": 0.01048118996550329, "clip_ratio/high_mean": 0.0037377056141849607, "clip_ratio/low_mean": 0.009337985538877547, "clip_ratio/low_min": 0.0005732829886255786, "clip_ratio/region_mean": 0.013075691240373999, "epoch": 0.13971965070087325, "grad_norm": 10.085516929626465, "learning_rate": 2e-07, "loss": 0.0628, "step": 1497 }, { "clip_ratio/high_max": 0.010500032862182707, "clip_ratio/high_mean": 0.00373045325250132, "clip_ratio/low_mean": 0.009753571037435904, "clip_ratio/low_min": 0.0014065109280636534, "clip_ratio/region_mean": 0.013484024355420843, "epoch": 0.13981298380087384, "grad_norm": 9.888315200805664, "learning_rate": 2e-07, "loss": 0.0829, "step": 1498 }, { "clip_ratio/high_max": 0.00988983758725226, "clip_ratio/high_mean": 0.004135534349188674, "clip_ratio/low_mean": 0.008053027151618153, "clip_ratio/low_min": 0.0001524157851235941, "clip_ratio/region_mean": 0.012188561784569174, "epoch": 0.1399063169008744, "grad_norm": 3.000796318054199, "learning_rate": 2e-07, "loss": 0.0328, "step": 1499 }, { "clip_ratio/high_max": 0.010463965736562386, "clip_ratio/high_mean": 0.0040103903447743505, "clip_ratio/low_mean": 0.009397264599101618, "clip_ratio/low_min": 0.0007881463825469837, "clip_ratio/region_mean": 0.013407654885668308, "epoch": 0.139999650000875, "grad_norm": 5.209139347076416, "learning_rate": 2e-07, "loss": 0.0029, "step": 1500 }, { "clip_ratio/high_max": 0.008385888795601204, "clip_ratio/high_mean": 0.0036111978988628834, "clip_ratio/low_mean": 0.00833279881044291, "clip_ratio/low_min": 0.00047047092812135816, "clip_ratio/region_mean": 0.011943996825721115, "epoch": 0.1400929831008756, "grad_norm": 2.6238529682159424, "learning_rate": 2e-07, "loss": 0.0501, "step": 1501 }, { "clip_ratio/high_max": 0.00948066305136308, "clip_ratio/high_mean": 0.003758178456337191, "clip_ratio/low_mean": 0.008748326712520793, "clip_ratio/low_min": 0.0005665630960720591, "clip_ratio/region_mean": 0.01250650524161756, "epoch": 0.14018631620087615, "grad_norm": 6.539761543273926, "learning_rate": 2e-07, "loss": 0.0743, "step": 1502 }, { "clip_ratio/high_max": 0.010480839715455659, "clip_ratio/high_mean": 0.003898135262716096, "clip_ratio/low_mean": 0.010391399817308411, "clip_ratio/low_min": 0.0010799056763062254, "clip_ratio/region_mean": 0.014289535058196634, "epoch": 0.14027964930087675, "grad_norm": 6.7871012687683105, "learning_rate": 2e-07, "loss": 0.056, "step": 1503 }, { "clip_ratio/high_max": 0.011206004084669985, "clip_ratio/high_mean": 0.0042216691945213825, "clip_ratio/low_mean": 0.009597584343282506, "clip_ratio/low_min": 0.0016802061363705434, "clip_ratio/region_mean": 0.013819253770634532, "epoch": 0.14037298240087734, "grad_norm": 188.27972412109375, "learning_rate": 2e-07, "loss": 0.0669, "step": 1504 }, { "clip_ratio/high_max": 0.009206546877976507, "clip_ratio/high_mean": 0.0033995092890108936, "clip_ratio/low_mean": 0.010470787150552496, "clip_ratio/low_min": 0.0006538449379149824, "clip_ratio/region_mean": 0.013870296534150839, "epoch": 0.1404663155008779, "grad_norm": 4.114729404449463, "learning_rate": 2e-07, "loss": 0.0662, "step": 1505 }, { "clip_ratio/high_max": 0.009534433702356182, "clip_ratio/high_mean": 0.003739131920156069, "clip_ratio/low_mean": 0.01049463200615719, "clip_ratio/low_min": 0.0005600358708761632, "clip_ratio/region_mean": 0.014233763999072835, "epoch": 0.1405596486008785, "grad_norm": 7.1068549156188965, "learning_rate": 2e-07, "loss": 0.0813, "step": 1506 }, { "clip_ratio/high_max": 0.010350649943575263, "clip_ratio/high_mean": 0.0036709518535644747, "clip_ratio/low_mean": 0.0107872890366707, "clip_ratio/low_min": 0.0009237701160600409, "clip_ratio/region_mean": 0.014458240912063047, "epoch": 0.1406529817008791, "grad_norm": 4.928596019744873, "learning_rate": 2e-07, "loss": 0.0376, "step": 1507 }, { "clip_ratio/high_max": 0.011022994323866442, "clip_ratio/high_mean": 0.00394003411201993, "clip_ratio/low_mean": 0.011775011342251673, "clip_ratio/low_min": 0.0012715245684375986, "clip_ratio/region_mean": 0.01571504567982629, "epoch": 0.14074631480087965, "grad_norm": 10.586506843566895, "learning_rate": 2e-07, "loss": 0.079, "step": 1508 }, { "clip_ratio/high_max": 0.010411949915578589, "clip_ratio/high_mean": 0.004011575219919905, "clip_ratio/low_mean": 0.012277233676286414, "clip_ratio/low_min": 0.0010456988966325298, "clip_ratio/region_mean": 0.01628880901262164, "epoch": 0.14083964790088024, "grad_norm": 114.8277587890625, "learning_rate": 2e-07, "loss": 0.0728, "step": 1509 }, { "clip_ratio/high_max": 0.009721985727082938, "clip_ratio/high_mean": 0.00394196772685973, "clip_ratio/low_mean": 0.012693684577243403, "clip_ratio/low_min": 0.0009633996814955026, "clip_ratio/region_mean": 0.016635652544209734, "epoch": 0.14093298100088084, "grad_norm": 4.461319923400879, "learning_rate": 2e-07, "loss": 0.071, "step": 1510 }, { "clip_ratio/high_max": 0.010852181134396233, "clip_ratio/high_mean": 0.004392207058117492, "clip_ratio/low_mean": 0.008701858692802489, "clip_ratio/low_min": 0.00019897828315151855, "clip_ratio/region_mean": 0.013094065710902214, "epoch": 0.14102631410088143, "grad_norm": 4.938162803649902, "learning_rate": 2e-07, "loss": 0.0397, "step": 1511 }, { "clip_ratio/high_max": 0.009760048109455965, "clip_ratio/high_mean": 0.003953845791329513, "clip_ratio/low_mean": 0.01263499160995707, "clip_ratio/low_min": 0.001086536023649387, "clip_ratio/region_mean": 0.01658883731579408, "epoch": 0.141119647200882, "grad_norm": 14.981100082397461, "learning_rate": 2e-07, "loss": 0.063, "step": 1512 }, { "clip_ratio/high_max": 0.013405644509475678, "clip_ratio/high_mean": 0.004705114450189285, "clip_ratio/low_mean": 0.013370266067795455, "clip_ratio/low_min": 0.0012662416702369228, "clip_ratio/region_mean": 0.018075380648951977, "epoch": 0.14121298030088258, "grad_norm": 23.063703536987305, "learning_rate": 2e-07, "loss": 0.0978, "step": 1513 }, { "clip_ratio/high_max": 0.01309622050030157, "clip_ratio/high_mean": 0.004619080093107186, "clip_ratio/low_mean": 0.013665582868270576, "clip_ratio/low_min": 0.0014160636492306367, "clip_ratio/region_mean": 0.018284663325175643, "epoch": 0.14130631340088318, "grad_norm": 9.61037826538086, "learning_rate": 2e-07, "loss": 0.0636, "step": 1514 }, { "clip_ratio/high_max": 0.010971913812682033, "clip_ratio/high_mean": 0.004688748063927051, "clip_ratio/low_mean": 0.010022249130997807, "clip_ratio/low_min": 0.0015986244397936389, "clip_ratio/region_mean": 0.014710997260408476, "epoch": 0.14139964650088374, "grad_norm": 163.98851013183594, "learning_rate": 2e-07, "loss": 0.0328, "step": 1515 }, { "clip_ratio/high_max": 0.01266252383356914, "clip_ratio/high_mean": 0.004752950670081191, "clip_ratio/low_mean": 0.012311642232816666, "clip_ratio/low_min": 0.0008202619792427868, "clip_ratio/region_mean": 0.017064592888345942, "epoch": 0.14149297960088433, "grad_norm": 11.858915328979492, "learning_rate": 2e-07, "loss": 0.0785, "step": 1516 }, { "clip_ratio/high_max": 0.01245160264079459, "clip_ratio/high_mean": 0.004706357904069591, "clip_ratio/low_mean": 0.012294373140321113, "clip_ratio/low_min": 0.002134902752004564, "clip_ratio/region_mean": 0.017000731197185814, "epoch": 0.14158631270088493, "grad_norm": 8.54983139038086, "learning_rate": 2e-07, "loss": 0.0719, "step": 1517 }, { "clip_ratio/high_max": 0.009926143800839782, "clip_ratio/high_mean": 0.003591825268813409, "clip_ratio/low_mean": 0.01287001179298386, "clip_ratio/low_min": 0.00023505708668380976, "clip_ratio/region_mean": 0.016461836989037693, "epoch": 0.1416796458008855, "grad_norm": 42.633609771728516, "learning_rate": 2e-07, "loss": 0.097, "step": 1518 }, { "clip_ratio/high_max": 0.013024982617935166, "clip_ratio/high_mean": 0.004754815381602384, "clip_ratio/low_mean": 0.0128330857551191, "clip_ratio/low_min": 0.00032164011645363644, "clip_ratio/region_mean": 0.01758790109306574, "epoch": 0.14177297890088608, "grad_norm": 6.471938610076904, "learning_rate": 2e-07, "loss": 0.0448, "step": 1519 }, { "clip_ratio/high_max": 0.014478586497716606, "clip_ratio/high_mean": 0.0055814015795476735, "clip_ratio/low_mean": 0.012879426503786817, "clip_ratio/low_min": 0.001599843795702327, "clip_ratio/region_mean": 0.018460827763192356, "epoch": 0.14186631200088667, "grad_norm": 16.62783432006836, "learning_rate": 2e-07, "loss": 0.0601, "step": 1520 }, { "clip_ratio/high_max": 0.012882993323728442, "clip_ratio/high_mean": 0.004840794033952989, "clip_ratio/low_mean": 0.014288280683103949, "clip_ratio/low_min": 0.0013846508954884484, "clip_ratio/region_mean": 0.019129074295051396, "epoch": 0.14195964510088724, "grad_norm": 10.969073295593262, "learning_rate": 2e-07, "loss": 0.0709, "step": 1521 }, { "clip_ratio/high_max": 0.014034319610800594, "clip_ratio/high_mean": 0.005414353698142804, "clip_ratio/low_mean": 0.015940941084409133, "clip_ratio/low_min": 0.00034125147794838995, "clip_ratio/region_mean": 0.02135529473889619, "epoch": 0.14205297820088783, "grad_norm": 34.26850509643555, "learning_rate": 2e-07, "loss": 0.1027, "step": 1522 }, { "clip_ratio/high_max": 0.014943013578886166, "clip_ratio/high_mean": 0.005799706093966961, "clip_ratio/low_mean": 0.0159431979409419, "clip_ratio/low_min": 0.0019753236047108658, "clip_ratio/region_mean": 0.021742904267739505, "epoch": 0.14214631130088842, "grad_norm": 9.099319458007812, "learning_rate": 2e-07, "loss": 0.0793, "step": 1523 }, { "clip_ratio/high_max": 0.016448465175926685, "clip_ratio/high_mean": 0.005841576377861202, "clip_ratio/low_mean": 0.012623472095583566, "clip_ratio/low_min": 0.0012977936276001856, "clip_ratio/region_mean": 0.018465048517100513, "epoch": 0.142239644400889, "grad_norm": 15.552641868591309, "learning_rate": 2e-07, "loss": 0.0311, "step": 1524 }, { "clip_ratio/high_max": 0.012718146230326965, "clip_ratio/high_mean": 0.004639718637918122, "clip_ratio/low_mean": 0.013770155375823379, "clip_ratio/low_min": 0.0014278234739322215, "clip_ratio/region_mean": 0.01840987359173596, "epoch": 0.14233297750088958, "grad_norm": 71.0652084350586, "learning_rate": 2e-07, "loss": 0.1008, "step": 1525 }, { "clip_ratio/high_max": 0.013745399424806237, "clip_ratio/high_mean": 0.005304327511112206, "clip_ratio/low_mean": 0.013843146647559479, "clip_ratio/low_min": 0.0013580212034867145, "clip_ratio/region_mean": 0.01914747426053509, "epoch": 0.14242631060089017, "grad_norm": 3827.419677734375, "learning_rate": 2e-07, "loss": 0.2753, "step": 1526 }, { "clip_ratio/high_max": 0.01704944239463657, "clip_ratio/high_mean": 0.006721330850268714, "clip_ratio/low_mean": 0.013069258100586012, "clip_ratio/low_min": 0.0007864274375606328, "clip_ratio/region_mean": 0.01979058881988749, "epoch": 0.14251964370089074, "grad_norm": 7.572017192840576, "learning_rate": 2e-07, "loss": 0.0326, "step": 1527 }, { "clip_ratio/high_max": 0.016442522959550843, "clip_ratio/high_mean": 0.006326496739347931, "clip_ratio/low_mean": 0.01312543178210035, "clip_ratio/low_min": 0.0008171074950951152, "clip_ratio/region_mean": 0.019451929023489356, "epoch": 0.14261297680089133, "grad_norm": 8.53805160522461, "learning_rate": 2e-07, "loss": 0.0543, "step": 1528 }, { "clip_ratio/high_max": 0.014276863948907703, "clip_ratio/high_mean": 0.005592348927166313, "clip_ratio/low_mean": 0.01404675652156584, "clip_ratio/low_min": 0.0006839881825726479, "clip_ratio/region_mean": 0.019639105710666627, "epoch": 0.14270630990089192, "grad_norm": 37.78240966796875, "learning_rate": 2e-07, "loss": 0.1078, "step": 1529 }, { "clip_ratio/high_max": 0.015142606367589906, "clip_ratio/high_mean": 0.005922932890825905, "clip_ratio/low_mean": 0.015061702870298177, "clip_ratio/low_min": 0.0008201240125345066, "clip_ratio/region_mean": 0.020984635630156845, "epoch": 0.14279964300089248, "grad_norm": 22.309871673583984, "learning_rate": 2e-07, "loss": 0.0517, "step": 1530 }, { "clip_ratio/high_max": 0.01574518723646179, "clip_ratio/high_mean": 0.006336163976811804, "clip_ratio/low_mean": 0.016001800016965717, "clip_ratio/low_min": 0.0014258518058340997, "clip_ratio/region_mean": 0.02233796362997964, "epoch": 0.14289297610089308, "grad_norm": 52.65270233154297, "learning_rate": 2e-07, "loss": 0.09, "step": 1531 }, { "clip_ratio/high_max": 0.018542533740401268, "clip_ratio/high_mean": 0.007310303277336061, "clip_ratio/low_mean": 0.015105009370017797, "clip_ratio/low_min": 0.001293691952014342, "clip_ratio/region_mean": 0.02241531270556152, "epoch": 0.14298630920089367, "grad_norm": 10.39724349975586, "learning_rate": 2e-07, "loss": 0.0407, "step": 1532 }, { "clip_ratio/high_max": 0.020917464164085686, "clip_ratio/high_mean": 0.007530486167524941, "clip_ratio/low_mean": 0.014347026648465544, "clip_ratio/low_min": 0.0014942592242732644, "clip_ratio/region_mean": 0.021877513034269214, "epoch": 0.14307964230089426, "grad_norm": 20.651105880737305, "learning_rate": 2e-07, "loss": 0.0651, "step": 1533 }, { "clip_ratio/high_max": 0.01636326580774039, "clip_ratio/high_mean": 0.005423209979198873, "clip_ratio/low_mean": 0.018241409910842776, "clip_ratio/low_min": 0.002557979809353128, "clip_ratio/region_mean": 0.023664619715418667, "epoch": 0.14317297540089483, "grad_norm": 25.144960403442383, "learning_rate": 2e-07, "loss": 0.1248, "step": 1534 }, { "clip_ratio/high_max": 0.02010624815011397, "clip_ratio/high_mean": 0.007987363875145093, "clip_ratio/low_mean": 0.014099827560130507, "clip_ratio/low_min": 0.0022938479960430413, "clip_ratio/region_mean": 0.022087191173341125, "epoch": 0.14326630850089542, "grad_norm": 21.81289291381836, "learning_rate": 2e-07, "loss": 0.0541, "step": 1535 }, { "clip_ratio/high_max": 0.014984688896220177, "clip_ratio/high_mean": 0.006201296113431454, "clip_ratio/low_mean": 0.01496699801646173, "clip_ratio/low_min": 0.0006940177991054952, "clip_ratio/region_mean": 0.021168294362723827, "epoch": 0.143359641600896, "grad_norm": 97.85721588134766, "learning_rate": 2e-07, "loss": 0.1155, "step": 1536 }, { "clip_ratio/high_max": 0.0021196430916461395, "clip_ratio/high_mean": 0.0005031969892570487, "clip_ratio/low_mean": 0.0012564877797558438, "clip_ratio/low_min": 4.3615722461254336e-05, "clip_ratio/region_mean": 0.0017596847865206655, "completions/clipped_ratio": 0.1940220424107143, "completions/max_length": 4096.0, "completions/max_terminated_length": 4088.0, "completions/mean_length": 990.591796875, "completions/mean_terminated_length": 243.0307159423828, "completions/min_length": 12.0, "completions/min_terminated_length": 12.0, "epoch": 0.14345297470089657, "grad_norm": 2.641310930252075, "learning_rate": 2e-07, "loss": 0.0913, "num_tokens": 1136645105.0, "reward": 0.0988159254193306, "reward_std": 0.11764324456453323, "rewards/simpleverify_reward/mean": 0.09881591796875, "rewards/simpleverify_reward/std": 0.29841604828834534, "step": 1537 }, { "clip_ratio/high_max": 0.0009248777614629944, "clip_ratio/high_mean": 0.0002494698062491807, "clip_ratio/low_mean": 0.00121991393098142, "clip_ratio/low_min": 1.1382410320948111e-05, "clip_ratio/region_mean": 0.0014693837365484796, "epoch": 0.14354630780089717, "grad_norm": 0.580058217048645, "learning_rate": 2e-07, "loss": 0.088, "step": 1538 }, { "clip_ratio/high_max": 0.0017186277618748136, "clip_ratio/high_mean": 0.00043266852026135894, "clip_ratio/low_mean": 0.001394681527017383, "clip_ratio/low_min": 1.6476559721922968e-05, "clip_ratio/region_mean": 0.0018273500245413743, "epoch": 0.14363964090089776, "grad_norm": 4.160432815551758, "learning_rate": 2e-07, "loss": 0.0175, "step": 1539 }, { "clip_ratio/high_max": 0.00290394180410658, "clip_ratio/high_mean": 0.0005832867718709167, "clip_ratio/low_mean": 0.00105823777448677, "clip_ratio/low_min": 6.402730514309951e-05, "clip_ratio/region_mean": 0.0016415245645475807, "epoch": 0.14373297400089832, "grad_norm": 3.574641466140747, "learning_rate": 2e-07, "loss": 0.0506, "step": 1540 }, { "clip_ratio/high_max": 0.0010591922155072098, "clip_ratio/high_mean": 0.00030550576627774717, "clip_ratio/low_mean": 0.0016089271903183544, "clip_ratio/low_min": 3.3851657462946605e-05, "clip_ratio/region_mean": 0.0019144328762195073, "epoch": 0.14382630710089891, "grad_norm": 0.391309916973114, "learning_rate": 2e-07, "loss": 0.0613, "step": 1541 }, { "clip_ratio/high_max": 0.00243121982748562, "clip_ratio/high_mean": 0.0005177284097044321, "clip_ratio/low_mean": 0.0016252015921054408, "clip_ratio/low_min": 6.0420709814934526e-05, "clip_ratio/region_mean": 0.0021429300359159242, "epoch": 0.1439196402008995, "grad_norm": 2.6499602794647217, "learning_rate": 2e-07, "loss": 0.0113, "step": 1542 }, { "clip_ratio/high_max": 0.0007740434894003556, "clip_ratio/high_mean": 0.00018782196775646298, "clip_ratio/low_mean": 0.0014512754605675582, "clip_ratio/low_min": 2.97895689982397e-05, "clip_ratio/region_mean": 0.0016390974051319063, "epoch": 0.14401297330090007, "grad_norm": 1.3003000020980835, "learning_rate": 2e-07, "loss": 0.0748, "step": 1543 }, { "clip_ratio/high_max": 0.002311426052983734, "clip_ratio/high_mean": 0.00047950890302672633, "clip_ratio/low_mean": 0.0021022655546403257, "clip_ratio/low_min": 6.01604579060222e-05, "clip_ratio/region_mean": 0.0025817744281084742, "epoch": 0.14410630640090066, "grad_norm": 1.2963966131210327, "learning_rate": 2e-07, "loss": 0.0908, "step": 1544 }, { "clip_ratio/high_max": 0.0028650009371631313, "clip_ratio/high_mean": 0.0006087638957978925, "clip_ratio/low_mean": 0.001814946677768603, "clip_ratio/low_min": 7.642771197424736e-05, "clip_ratio/region_mean": 0.0024237105972133577, "epoch": 0.14419963950090126, "grad_norm": 3.2933709621429443, "learning_rate": 2e-07, "loss": 0.0244, "step": 1545 }, { "clip_ratio/high_max": 0.0018264889531565132, "clip_ratio/high_mean": 0.00040690698256184987, "clip_ratio/low_mean": 0.0026043957577712717, "clip_ratio/low_min": 0.00011290715337963775, "clip_ratio/region_mean": 0.0030113027460174635, "epoch": 0.14429297260090182, "grad_norm": 0.820268452167511, "learning_rate": 2e-07, "loss": 0.0889, "step": 1546 }, { "clip_ratio/high_max": 0.0014638961747550638, "clip_ratio/high_mean": 0.00040067920463116025, "clip_ratio/low_mean": 0.0017244839546037838, "clip_ratio/low_min": 1.8721270862442907e-05, "clip_ratio/region_mean": 0.0021251631660561543, "epoch": 0.1443863057009024, "grad_norm": 1.1142103672027588, "learning_rate": 2e-07, "loss": 0.0861, "step": 1547 }, { "clip_ratio/high_max": 0.0038958541372267064, "clip_ratio/high_mean": 0.0008711912500984909, "clip_ratio/low_mean": 0.0023179607960628346, "clip_ratio/low_min": 9.874154238787014e-05, "clip_ratio/region_mean": 0.003189151975675486, "epoch": 0.144479638800903, "grad_norm": 1.3785908222198486, "learning_rate": 2e-07, "loss": 0.002, "step": 1548 }, { "clip_ratio/high_max": 0.0017569900865055388, "clip_ratio/high_mean": 0.00038262602902250364, "clip_ratio/low_mean": 0.002074264280963689, "clip_ratio/low_min": 7.234633812913671e-05, "clip_ratio/region_mean": 0.002456890342728002, "epoch": 0.14457297190090357, "grad_norm": 1.6213676929473877, "learning_rate": 2e-07, "loss": 0.0605, "step": 1549 }, { "clip_ratio/high_max": 0.0017009091116051422, "clip_ratio/high_mean": 0.0003973875591327669, "clip_ratio/low_mean": 0.002339148621103959, "clip_ratio/low_min": 8.462114055873826e-05, "clip_ratio/region_mean": 0.002736536247539334, "epoch": 0.14466630500090416, "grad_norm": 2.787680149078369, "learning_rate": 2e-07, "loss": 0.1072, "step": 1550 }, { "clip_ratio/high_max": 0.0023180230091384146, "clip_ratio/high_mean": 0.0005066318562967354, "clip_ratio/low_mean": 0.0022704703587805852, "clip_ratio/low_min": 0.00015892461487965193, "clip_ratio/region_mean": 0.0027771022068918683, "epoch": 0.14475963810090475, "grad_norm": 1.9090454578399658, "learning_rate": 2e-07, "loss": 0.055, "step": 1551 }, { "clip_ratio/high_max": 0.0015037819921417395, "clip_ratio/high_mean": 0.0004018538354557677, "clip_ratio/low_mean": 0.0022508582078444306, "clip_ratio/low_min": 0.00012146332301199436, "clip_ratio/region_mean": 0.0026527121081016958, "epoch": 0.14485297120090532, "grad_norm": 3.303067207336426, "learning_rate": 2e-07, "loss": 0.0416, "step": 1552 }, { "clip_ratio/high_max": 0.0017093855171879113, "clip_ratio/high_mean": 0.00047568033909328733, "clip_ratio/low_mean": 0.002399211627562181, "clip_ratio/low_min": 8.037542920646956e-05, "clip_ratio/region_mean": 0.00287489188121981, "epoch": 0.1449463043009059, "grad_norm": 2.86083722114563, "learning_rate": 2e-07, "loss": 0.0625, "step": 1553 }, { "clip_ratio/high_max": 0.0024553460789320525, "clip_ratio/high_mean": 0.0005836718728460255, "clip_ratio/low_mean": 0.0026192884070042055, "clip_ratio/low_min": 6.0087362726335414e-05, "clip_ratio/region_mean": 0.0032029602843977045, "epoch": 0.1450396374009065, "grad_norm": 35.77680969238281, "learning_rate": 2e-07, "loss": 0.0955, "step": 1554 }, { "clip_ratio/high_max": 0.0029813846631441265, "clip_ratio/high_mean": 0.0007253582509747503, "clip_ratio/low_mean": 0.0024449789088976104, "clip_ratio/low_min": 0.00010927723269560374, "clip_ratio/region_mean": 0.0031703371278126724, "epoch": 0.1451329705009071, "grad_norm": 1.7170997858047485, "learning_rate": 2e-07, "loss": 0.0706, "step": 1555 }, { "clip_ratio/high_max": 0.001839655533331097, "clip_ratio/high_mean": 0.00046594555465162557, "clip_ratio/low_mean": 0.0028772918485628907, "clip_ratio/low_min": 0.00020019268504256615, "clip_ratio/region_mean": 0.0033432373202231247, "epoch": 0.14522630360090766, "grad_norm": 99.9493637084961, "learning_rate": 2e-07, "loss": 0.0899, "step": 1556 }, { "clip_ratio/high_max": 0.0028191102592245443, "clip_ratio/high_mean": 0.0006298077287283377, "clip_ratio/low_mean": 0.0025342290027765557, "clip_ratio/low_min": 0.0001354914406874741, "clip_ratio/region_mean": 0.0031640366723877378, "epoch": 0.14531963670090825, "grad_norm": 1.6621710062026978, "learning_rate": 2e-07, "loss": 0.0567, "step": 1557 }, { "clip_ratio/high_max": 0.0031349534160654002, "clip_ratio/high_mean": 0.0006469959677133374, "clip_ratio/low_mean": 0.0025516779605823103, "clip_ratio/low_min": 0.00010410030063212616, "clip_ratio/region_mean": 0.003198673941369634, "epoch": 0.14541296980090884, "grad_norm": 2.0807580947875977, "learning_rate": 2e-07, "loss": 0.0463, "step": 1558 }, { "clip_ratio/high_max": 0.0017299756855209125, "clip_ratio/high_mean": 0.0005205387569731101, "clip_ratio/low_mean": 0.0018997694769495865, "clip_ratio/low_min": 0.0001109719260057318, "clip_ratio/region_mean": 0.002420308239379665, "epoch": 0.1455063029009094, "grad_norm": 52.57670974731445, "learning_rate": 2e-07, "loss": 0.0853, "step": 1559 }, { "clip_ratio/high_max": 0.0033661488560028374, "clip_ratio/high_mean": 0.0008800770328889485, "clip_ratio/low_mean": 0.0021588474555755965, "clip_ratio/low_min": 0.00014527435632771812, "clip_ratio/region_mean": 0.0030389244930120185, "epoch": 0.14559963600091, "grad_norm": 6.196171760559082, "learning_rate": 2e-07, "loss": 0.0412, "step": 1560 }, { "clip_ratio/high_max": 0.0015093350666575134, "clip_ratio/high_mean": 0.0003557764500783378, "clip_ratio/low_mean": 0.002905678973547765, "clip_ratio/low_min": 0.00011970164268859662, "clip_ratio/region_mean": 0.003261455456595286, "epoch": 0.1456929691009106, "grad_norm": 102.10365295410156, "learning_rate": 2e-07, "loss": 0.0651, "step": 1561 }, { "clip_ratio/high_max": 0.002019521431066096, "clip_ratio/high_mean": 0.0005306261755322339, "clip_ratio/low_mean": 0.0019967839143646415, "clip_ratio/low_min": 0.00013193928316468373, "clip_ratio/region_mean": 0.002527410128095653, "epoch": 0.14578630220091116, "grad_norm": 4.868849754333496, "learning_rate": 2e-07, "loss": 0.0636, "step": 1562 }, { "clip_ratio/high_max": 0.006034212074155221, "clip_ratio/high_mean": 0.001083312703485717, "clip_ratio/low_mean": 0.002647472134412965, "clip_ratio/low_min": 4.128280852455646e-05, "clip_ratio/region_mean": 0.0037307847742340527, "epoch": 0.14587963530091175, "grad_norm": 15.132116317749023, "learning_rate": 2e-07, "loss": 0.0588, "step": 1563 }, { "clip_ratio/high_max": 0.003912050975486636, "clip_ratio/high_mean": 0.0008249418997365865, "clip_ratio/low_mean": 0.003138151139864931, "clip_ratio/low_min": 0.00023534113279310986, "clip_ratio/region_mean": 0.00396309289499186, "epoch": 0.14597296840091234, "grad_norm": 7.849503517150879, "learning_rate": 2e-07, "loss": 0.0895, "step": 1564 }, { "clip_ratio/high_max": 0.002090877253067447, "clip_ratio/high_mean": 0.0005327249273250345, "clip_ratio/low_mean": 0.0030958140960137825, "clip_ratio/low_min": 0.00013347401909413747, "clip_ratio/region_mean": 0.0036285390306147747, "epoch": 0.1460663015009129, "grad_norm": 204.26678466796875, "learning_rate": 2e-07, "loss": 0.0808, "step": 1565 }, { "clip_ratio/high_max": 0.0016339678209078556, "clip_ratio/high_mean": 0.00039930258117237827, "clip_ratio/low_mean": 0.002258690699818544, "clip_ratio/low_min": 5.310110282152891e-05, "clip_ratio/region_mean": 0.0026579932928143535, "epoch": 0.1461596346009135, "grad_norm": 42.09465789794922, "learning_rate": 2e-07, "loss": 0.0931, "step": 1566 }, { "clip_ratio/high_max": 0.002933861269866611, "clip_ratio/high_mean": 0.0006801888171139581, "clip_ratio/low_mean": 0.0025214328852598555, "clip_ratio/low_min": 0.0001578905362293881, "clip_ratio/region_mean": 0.003201621620974038, "epoch": 0.1462529677009141, "grad_norm": 3.0651299953460693, "learning_rate": 2e-07, "loss": 0.0722, "step": 1567 }, { "clip_ratio/high_max": 0.002410534302725864, "clip_ratio/high_mean": 0.0005788882303932041, "clip_ratio/low_mean": 0.0031551166030112654, "clip_ratio/low_min": 0.00013254908481030725, "clip_ratio/region_mean": 0.0037340048293117434, "epoch": 0.14634630080091465, "grad_norm": 6.625837326049805, "learning_rate": 2e-07, "loss": 0.1021, "step": 1568 }, { "clip_ratio/high_max": 0.002388019311183598, "clip_ratio/high_mean": 0.0005707476607312856, "clip_ratio/low_mean": 0.00233959115575999, "clip_ratio/low_min": 0.0001351745495412615, "clip_ratio/region_mean": 0.00291033888788661, "epoch": 0.14643963390091524, "grad_norm": 3.8021035194396973, "learning_rate": 2e-07, "loss": 0.0778, "step": 1569 }, { "clip_ratio/high_max": 0.003512184070132207, "clip_ratio/high_mean": 0.0007597188905492658, "clip_ratio/low_mean": 0.003070260729145957, "clip_ratio/low_min": 5.859381963091437e-05, "clip_ratio/region_mean": 0.003829979556030594, "epoch": 0.14653296700091584, "grad_norm": 123.35625457763672, "learning_rate": 2e-07, "loss": 0.0584, "step": 1570 }, { "clip_ratio/high_max": 0.002425365222734399, "clip_ratio/high_mean": 0.0006154531547508668, "clip_ratio/low_mean": 0.0031387264243676327, "clip_ratio/low_min": 0.00019874318604706787, "clip_ratio/region_mean": 0.0037541796336881816, "epoch": 0.1466263001009164, "grad_norm": 4.483541488647461, "learning_rate": 2e-07, "loss": 0.0511, "step": 1571 }, { "clip_ratio/high_max": 0.003263784194132313, "clip_ratio/high_mean": 0.0007810114752828667, "clip_ratio/low_mean": 0.0031168496534519363, "clip_ratio/low_min": 0.0004441272139956709, "clip_ratio/region_mean": 0.0038978611773927696, "epoch": 0.146719633200917, "grad_norm": 4.892777919769287, "learning_rate": 2e-07, "loss": 0.0801, "step": 1572 }, { "clip_ratio/high_max": 0.0026380216659163125, "clip_ratio/high_mean": 0.000684087528497912, "clip_ratio/low_mean": 0.00307053659344092, "clip_ratio/low_min": 4.470992280403152e-05, "clip_ratio/region_mean": 0.003754624216526281, "epoch": 0.14681296630091759, "grad_norm": 7.348733425140381, "learning_rate": 2e-07, "loss": 0.0826, "step": 1573 }, { "clip_ratio/high_max": 0.0025824330978139187, "clip_ratio/high_mean": 0.0006363135275933018, "clip_ratio/low_mean": 0.0026088758822879754, "clip_ratio/low_min": 8.612224155513104e-05, "clip_ratio/region_mean": 0.0032451892984681763, "epoch": 0.14690629940091818, "grad_norm": 4.69542932510376, "learning_rate": 2e-07, "loss": 0.1001, "step": 1574 }, { "clip_ratio/high_max": 0.0030242331558838487, "clip_ratio/high_mean": 0.0007134772458812222, "clip_ratio/low_mean": 0.0023354432778432965, "clip_ratio/low_min": 0.00026018249400294735, "clip_ratio/region_mean": 0.0030489204509649426, "epoch": 0.14699963250091874, "grad_norm": 7.368109703063965, "learning_rate": 2e-07, "loss": 0.0394, "step": 1575 }, { "clip_ratio/high_max": 0.0013022861121498863, "clip_ratio/high_mean": 0.0003627282846991875, "clip_ratio/low_mean": 0.00254511278035352, "clip_ratio/low_min": 0.00014610993275709916, "clip_ratio/region_mean": 0.002907841029809788, "epoch": 0.14709296560091933, "grad_norm": 5.674394130706787, "learning_rate": 2e-07, "loss": 0.0622, "step": 1576 }, { "clip_ratio/high_max": 0.002939444062576513, "clip_ratio/high_mean": 0.0007147967187393078, "clip_ratio/low_mean": 0.002912866257247515, "clip_ratio/low_min": 0.00011562355393834878, "clip_ratio/region_mean": 0.0036276629616622813, "epoch": 0.14718629870091993, "grad_norm": 7.482340335845947, "learning_rate": 2e-07, "loss": 0.0963, "step": 1577 }, { "clip_ratio/high_max": 0.002801228401949629, "clip_ratio/high_mean": 0.000751033802771417, "clip_ratio/low_mean": 0.0030029545741854236, "clip_ratio/low_min": 0.00020934944950568024, "clip_ratio/region_mean": 0.003753988457901869, "epoch": 0.1472796318009205, "grad_norm": 3.5337891578674316, "learning_rate": 2e-07, "loss": 0.0988, "step": 1578 }, { "clip_ratio/high_max": 0.00455448809225345, "clip_ratio/high_mean": 0.000937264690946904, "clip_ratio/low_mean": 0.0030893527618900407, "clip_ratio/low_min": 5.560707541008014e-05, "clip_ratio/region_mean": 0.004026617461931892, "epoch": 0.14737296490092108, "grad_norm": 4.725992202758789, "learning_rate": 2e-07, "loss": 0.0487, "step": 1579 }, { "clip_ratio/high_max": 0.004764191844515153, "clip_ratio/high_mean": 0.0010451707712491043, "clip_ratio/low_mean": 0.0036872572964057326, "clip_ratio/low_min": 0.0001870347732619848, "clip_ratio/region_mean": 0.0047324280749307945, "epoch": 0.14746629800092168, "grad_norm": 3.6773030757904053, "learning_rate": 2e-07, "loss": 0.0794, "step": 1580 }, { "clip_ratio/high_max": 0.006270758145547006, "clip_ratio/high_mean": 0.001358843494017492, "clip_ratio/low_mean": 0.0035475709082675166, "clip_ratio/low_min": 0.00011536174497450702, "clip_ratio/region_mean": 0.004906414425931871, "epoch": 0.14755963110092224, "grad_norm": 3.13504695892334, "learning_rate": 2e-07, "loss": 0.0491, "step": 1581 }, { "clip_ratio/high_max": 0.0037175638135522604, "clip_ratio/high_mean": 0.0009229039915226167, "clip_ratio/low_mean": 0.002810977886838373, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.003733881821972318, "epoch": 0.14765296420092283, "grad_norm": 6.711385726928711, "learning_rate": 2e-07, "loss": 0.1064, "step": 1582 }, { "clip_ratio/high_max": 0.0032769189565442502, "clip_ratio/high_mean": 0.0007121384387573926, "clip_ratio/low_mean": 0.0023528121964773163, "clip_ratio/low_min": 1.120540218835231e-05, "clip_ratio/region_mean": 0.0030649506152258255, "epoch": 0.14774629730092342, "grad_norm": 2.568202495574951, "learning_rate": 2e-07, "loss": 0.0369, "step": 1583 }, { "clip_ratio/high_max": 0.0038418287876993418, "clip_ratio/high_mean": 0.0008206715929190977, "clip_ratio/low_mean": 0.0030159057168930303, "clip_ratio/low_min": 6.879719876451418e-05, "clip_ratio/region_mean": 0.003836577307083644, "epoch": 0.147839630400924, "grad_norm": 2.4861927032470703, "learning_rate": 2e-07, "loss": 0.0732, "step": 1584 }, { "clip_ratio/high_max": 0.004801132787179085, "clip_ratio/high_mean": 0.0011252016811340582, "clip_ratio/low_mean": 0.004052828982821666, "clip_ratio/low_min": 0.00019886954305547988, "clip_ratio/region_mean": 0.005178030703973491, "epoch": 0.14793296350092458, "grad_norm": 16.173974990844727, "learning_rate": 2e-07, "loss": 0.084, "step": 1585 }, { "clip_ratio/high_max": 0.004438998192199506, "clip_ratio/high_mean": 0.0009170644143523532, "clip_ratio/low_mean": 0.003814064199104905, "clip_ratio/low_min": 0.00025108792033279315, "clip_ratio/region_mean": 0.004731128712592181, "epoch": 0.14802629660092517, "grad_norm": 3.275059461593628, "learning_rate": 2e-07, "loss": 0.043, "step": 1586 }, { "clip_ratio/high_max": 0.0033473419989604736, "clip_ratio/high_mean": 0.000780729805626379, "clip_ratio/low_mean": 0.0033988386785495095, "clip_ratio/low_min": 0.00016384675109293312, "clip_ratio/region_mean": 0.0041795684228418395, "epoch": 0.14811962970092574, "grad_norm": 1.7428197860717773, "learning_rate": 2e-07, "loss": 0.1016, "step": 1587 }, { "clip_ratio/high_max": 0.0035803153514279984, "clip_ratio/high_mean": 0.0007860034975237795, "clip_ratio/low_mean": 0.003742596185475122, "clip_ratio/low_min": 0.00025756657851161435, "clip_ratio/region_mean": 0.004528599682089407, "epoch": 0.14821296280092633, "grad_norm": 2.5137462615966797, "learning_rate": 2e-07, "loss": 0.098, "step": 1588 }, { "clip_ratio/high_max": 0.003256996857089689, "clip_ratio/high_mean": 0.0006716904331369733, "clip_ratio/low_mean": 0.002610788964375388, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0032824793743202463, "epoch": 0.14830629590092692, "grad_norm": 1.6252750158309937, "learning_rate": 2e-07, "loss": 0.066, "step": 1589 }, { "clip_ratio/high_max": 0.004591283850459149, "clip_ratio/high_mean": 0.000953434922962515, "clip_ratio/low_mean": 0.004615034311427735, "clip_ratio/low_min": 0.00024176068473025225, "clip_ratio/region_mean": 0.005568469190620817, "epoch": 0.14839962900092749, "grad_norm": 4.403492450714111, "learning_rate": 2e-07, "loss": 0.1163, "step": 1590 }, { "clip_ratio/high_max": 0.0022798290501668816, "clip_ratio/high_mean": 0.0005366639288695296, "clip_ratio/low_mean": 0.004208072023175191, "clip_ratio/low_min": 0.000287162241875194, "clip_ratio/region_mean": 0.004744735975691583, "epoch": 0.14849296210092808, "grad_norm": 9.639986038208008, "learning_rate": 2e-07, "loss": 0.1163, "step": 1591 }, { "clip_ratio/high_max": 0.0034552405231806915, "clip_ratio/high_mean": 0.0009680329421826173, "clip_ratio/low_mean": 0.004152048415562604, "clip_ratio/low_min": 3.4059947211062536e-05, "clip_ratio/region_mean": 0.0051200813613832, "epoch": 0.14858629520092867, "grad_norm": 4.676005840301514, "learning_rate": 2e-07, "loss": 0.0871, "step": 1592 }, { "clip_ratio/high_max": 0.0047938966781657655, "clip_ratio/high_mean": 0.0010236321704724105, "clip_ratio/low_mean": 0.005095163120131474, "clip_ratio/low_min": 0.00017800947671275935, "clip_ratio/region_mean": 0.006118795368820429, "epoch": 0.14867962830092923, "grad_norm": 2.9512834548950195, "learning_rate": 2e-07, "loss": 0.0628, "step": 1593 }, { "clip_ratio/high_max": 0.0031649012526031584, "clip_ratio/high_mean": 0.0008400449357850448, "clip_ratio/low_mean": 0.004872725396126043, "clip_ratio/low_min": 0.0001022861288220156, "clip_ratio/region_mean": 0.005712770340323914, "epoch": 0.14877296140092983, "grad_norm": 8.021714210510254, "learning_rate": 2e-07, "loss": 0.0876, "step": 1594 }, { "clip_ratio/high_max": 0.0056581776443636045, "clip_ratio/high_mean": 0.0012215650854159321, "clip_ratio/low_mean": 0.0033353065300616436, "clip_ratio/low_min": 4.075644028489478e-05, "clip_ratio/region_mean": 0.004556871608656365, "epoch": 0.14886629450093042, "grad_norm": 3.4012291431427, "learning_rate": 2e-07, "loss": 0.0767, "step": 1595 }, { "clip_ratio/high_max": 0.003456940128671704, "clip_ratio/high_mean": 0.0007819540712716844, "clip_ratio/low_mean": 0.0032705844787415117, "clip_ratio/low_min": 8.14315408206312e-05, "clip_ratio/region_mean": 0.004052538693940733, "epoch": 0.148959627600931, "grad_norm": 3.441681385040283, "learning_rate": 2e-07, "loss": 0.0863, "step": 1596 }, { "clip_ratio/high_max": 0.003477719430520665, "clip_ratio/high_mean": 0.0009134169715707685, "clip_ratio/low_mean": 0.0035796952215605415, "clip_ratio/low_min": 5.256518124951981e-05, "clip_ratio/region_mean": 0.00449311213742476, "epoch": 0.14905296070093157, "grad_norm": 27.548805236816406, "learning_rate": 2e-07, "loss": 0.0395, "step": 1597 }, { "clip_ratio/high_max": 0.004061659870785661, "clip_ratio/high_mean": 0.0010525919406063622, "clip_ratio/low_mean": 0.004888974126515677, "clip_ratio/low_min": 4.891526805295143e-05, "clip_ratio/region_mean": 0.005941566196270287, "epoch": 0.14914629380093217, "grad_norm": 54.571407318115234, "learning_rate": 2e-07, "loss": 0.1003, "step": 1598 }, { "clip_ratio/high_max": 0.0036978577336412854, "clip_ratio/high_mean": 0.0009518274719084729, "clip_ratio/low_mean": 0.003909374128852505, "clip_ratio/low_min": 9.167911139229545e-05, "clip_ratio/region_mean": 0.004861201552557759, "epoch": 0.14923962690093276, "grad_norm": 5.038246154785156, "learning_rate": 2e-07, "loss": 0.0701, "step": 1599 }, { "clip_ratio/high_max": 0.004392796420688683, "clip_ratio/high_mean": 0.0010297591570633813, "clip_ratio/low_mean": 0.004019362997496501, "clip_ratio/low_min": 8.042318040679675e-05, "clip_ratio/region_mean": 0.005049122250056826, "epoch": 0.14933296000093332, "grad_norm": 3.5350348949432373, "learning_rate": 2e-07, "loss": 0.1024, "step": 1600 }, { "epoch": 0.14933296000093332, "step": 1600, "total_flos": 0.0, "train_loss": 69.64168917144616, "train_runtime": 28121.2965, "train_samples_per_second": 50.979, "train_steps_per_second": 0.057 } ], "logging_steps": 1, "max_steps": 1600, "num_input_tokens_seen": 1136645105, "num_train_epochs": 1, "save_steps": 160, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }